Ligand SASA in Protein Pocket
An example of computing Ligand SASA (solvent-accessible surface area) in protein pocket using RDKit
Motivation
Solvent-accessible surface area (SASA) is an important descriptor in ligand binding. The extent of ligand SASA value decrease upon binding indicates whether the ligand is deeply buried or not upon binding to the pocket. RDKit provides SASA value calculation, which is based on FreeSASA package.
%matplotlib inline
import matplotlib.pyplot as plt
from io import BytesIO
import pandas as pd
import numpy as np
from rdkit.Chem import PandasTools
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import DataStructs
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import rdRGroupDecomposition
from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions #Only needed if modifying defaults
DrawingOptions.bondLineWidth=1.8
IPythonConsole.ipython_useSVG=True
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.warning')
import rdkit
import py3Dmol
print(rdkit.__version__)
Let's use ABL2-Imatinib complex (PDB:3GVU) for an example. I first downloaded the file and removed the water and the bound ligands.
prot = Chem.MolFromPDBFile('files/3gvu.pdb')
lig = Chem.MolFromMolFile('files/STI.sdf')
viewer = py3Dmol.view(width=300, height=300)
viewer.addModel(Chem.MolToPDBBlock(prot), 'mol')
viewer.addModel(Chem.MolToPDBBlock(lig), 'pdb')
viewer.setStyle({'chain': 'A'}, {'cartoon':{'color':'spectrum'}})
viewer.setStyle({'resn': 'UNL'}, {'stick':{}})
viewer.zoomTo({'resn': 'UNL'})
viewer.show()
Let's define a function that compute SASA. The code is taken from here
from rdkit.Chem import rdFreeSASA
# compute ligand SASA
lig_h = Chem.AddHs(lig, addCoords=True)
# Get Van der Waals radii (angstrom)
ptable = Chem.GetPeriodicTable()
radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in lig_h.GetAtoms()]
# Compute solvent accessible surface area
lig_sasa = rdFreeSASA.CalcSASA(lig_h, radii)
comp = Chem.CombineMols(prot, lig)
comp_h = Chem.AddHs(comp, addCoords=True)
# Get Van der Waals radii (angstrom)
ptable = Chem.GetPeriodicTable()
radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in comp_h.GetAtoms()]
# Compute solvent accessible surface area
comp_sasa = rdFreeSASA.CalcSASA(comp_h, radii)
print(lig_sasa, comp_sasa)
Note that comp_sasa
is the overall SASA of both protein and ligand. We want to compute the SASA of ligand only while in the binding pocket. RDKit stores the per-atom SASA values in the atom object.
comp_lig = Chem.GetMolFrags(comp_h, asMols=True)[-1] # ligand is the last component
lig_sasa_free = 0
for a in lig_h.GetAtoms():
lig_sasa_free += float(a.GetProp("SASA"))
lig_sasa_bound = 0
for a in comp_lig.GetAtoms():
lig_sasa_bound += float(a.GetProp("SASA"))
print("Ligand SASA (free) =", lig_sasa_free)
print("Ligand SASA (bound) =", lig_sasa_bound)
print("Ligand SASA difference =", lig_sasa_free - lig_sasa_bound)
from rdkit.Chem import rdFreeSASA
def compute_sasa(mol):
# Get Van der Waals radii (angstrom)
ptable = Chem.GetPeriodicTable()
radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in mol.GetAtoms()]
# Compute solvent accessible surface area
sasa = rdFreeSASA.CalcSASA(mol, radii)
return sasa
def compute_ligand_sasa_pocket(prot, lig):
# compute complex SASA
comp = Chem.CombineMols(prot, lig)
comp_h = Chem.AddHs(comp, addCoords=True)
comp_sasa = compute_sasa(comp_h)
# compute ligand SASA in pocket
comp_lig = Chem.GetMolFrags(comp_h, asMols=True, sanitizeFrags=False)[-1] # ligand is the last component
lig_sasa_bound = sum([float(a.GetProp("SASA")) for a in comp_lig.GetAtoms()])
return lig_sasa_bound
sasa_free = compute_sasa(lig)
sasa_bound = compute_ligand_sasa_pocket(prot, lig)
print("Ligand SASA (free) =", sasa_free)
print("Ligand SASA (bound) =", sasa_bound)
print("Ligand SASA difference =", sasa_free - sasa_bound)
Let's measure the timing of this function call. I suspect this is not particularly fast because the code needs to compute SASA of all protein atoms as well. Perhaps we can make this faster by only computing SASA within certain distance from the ligand atoms.
%timeit compute_ligand_sasa_pocket(prot, lig)
def compute_ligand_sasa_pocket_cutoff(prot, lig, cutoff=8):
lig_conf = lig.GetConformer()
lig_xyz = lig_conf.GetPositions()
prot_conf = prot.GetConformer()
prot_xyz = conf.GetPositions()
# minimum distance between protein atoms and ligand atoms
r = np.min(np.linalg.norm(prot_xyz[:, np.newaxis, :] - lig_xyz[np.newaxis, :, :], axis=2), axis=1)
indices = np.argwhere(r > cutoff).flatten()
mol = Chem.RWMol(prot)
for idx in sorted(indices, reverse=True):
mol.RemoveAtom(int(idx))
return compute_ligand_sasa_pocket(mol, lig)
sasa_free = compute_sasa(lig)
sasa_bound = compute_ligand_sasa_pocket_cutoff(prot, lig, 5)
print("Ligand SASA (free) =", sasa_free)
print("Ligand SASA (bound) =", sasa_bound)
print("Ligand SASA difference =", sasa_free - sasa_bound)
With the cutoff distance of 5 Å, we get slightly larger SASA value in the bound state. We can improve this by increasing the cutoff distance.
cutoff_values = [5, 6, 7, 8, 9, 10]
sasa_bound_values = [compute_ligand_sasa_pocket_cutoff(prot, lig, c) for c in cutoff_values]
plt.plot(cutoff_values, sasa_bound_values)
plt.xlabel('Cutoff')
plt.ylabel('SASA')
plt.show()
The SASA value converged after 7 Å cutoff. Let's compute the function call speed using 8 Å as cutoff.
%timeit compute_ligand_sasa_pocket_cutoff(prot, lig, 8)
In my laptop, I got 323 ms (without cutoff) vs 229 ms (with cutoff), which represents about 40% speedup.