Motivation

Solvent-accessible surface area (SASA) is an important descriptor in ligand binding. The extent of ligand SASA value decrease upon binding indicates whether the ligand is deeply buried or not upon binding to the pocket. RDKit provides SASA value calculation, which is based on FreeSASA package.

%matplotlib inline
import matplotlib.pyplot as plt

from io import BytesIO
import pandas as pd
import numpy as np
from rdkit.Chem import PandasTools

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import DataStructs
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import rdRGroupDecomposition

from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions #Only needed if modifying defaults

DrawingOptions.bondLineWidth=1.8
IPythonConsole.ipython_useSVG=True
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.warning')
import rdkit
import py3Dmol
print(rdkit.__version__)
2020.03.2

Example

Let's use ABL2-Imatinib complex (PDB:3GVU) for an example. I first downloaded the file and removed the water and the bound ligands.

prot = Chem.MolFromPDBFile('files/3gvu.pdb')
lig = Chem.MolFromMolFile('files/STI.sdf')
viewer = py3Dmol.view(width=300, height=300)
viewer.addModel(Chem.MolToPDBBlock(prot), 'mol')
viewer.addModel(Chem.MolToPDBBlock(lig), 'pdb')
viewer.setStyle({'chain': 'A'}, {'cartoon':{'color':'spectrum'}})
viewer.setStyle({'resn': 'UNL'}, {'stick':{}})
viewer.zoomTo({'resn': 'UNL'})
viewer.show()

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
jupyter labextension install jupyterlab_3dmol

Let's define a function that compute SASA. The code is taken from here

from rdkit.Chem import rdFreeSASA

# compute ligand SASA
lig_h = Chem.AddHs(lig, addCoords=True)

# Get Van der Waals radii (angstrom)
ptable = Chem.GetPeriodicTable()
radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in lig_h.GetAtoms()]

# Compute solvent accessible surface area
lig_sasa = rdFreeSASA.CalcSASA(lig_h, radii)
comp = Chem.CombineMols(prot, lig)
comp_h = Chem.AddHs(comp, addCoords=True)

# Get Van der Waals radii (angstrom)
ptable = Chem.GetPeriodicTable()
radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in comp_h.GetAtoms()]

# Compute solvent accessible surface area
comp_sasa = rdFreeSASA.CalcSASA(comp_h, radii)
print(lig_sasa, comp_sasa)
849.5659988403745 14213.44708409188

Note that comp_sasa is the overall SASA of both protein and ligand. We want to compute the SASA of ligand only while in the binding pocket. RDKit stores the per-atom SASA values in the atom object.

comp_lig = Chem.GetMolFrags(comp_h, asMols=True)[-1] # ligand is the last component

lig_sasa_free = 0
for a in lig_h.GetAtoms():
    lig_sasa_free += float(a.GetProp("SASA"))
    
lig_sasa_bound = 0
for a in comp_lig.GetAtoms():
    lig_sasa_bound += float(a.GetProp("SASA"))
print("Ligand SASA (free) =", lig_sasa_free)
print("Ligand SASA (bound) =", lig_sasa_bound)
print("Ligand SASA difference =", lig_sasa_free - lig_sasa_bound)
Ligand SASA (free) = 849.5659988403745
Ligand SASA (bound) = 68.71464272335984
Ligand SASA difference = 780.8513561170147

Reduce computation time

Let's put above in a functional call and measure the timing.

from rdkit.Chem import rdFreeSASA

def compute_sasa(mol):
    # Get Van der Waals radii (angstrom)
    ptable = Chem.GetPeriodicTable()
    radii = [ptable.GetRvdw(atom.GetAtomicNum()) for atom in mol.GetAtoms()]

    # Compute solvent accessible surface area
    sasa = rdFreeSASA.CalcSASA(mol, radii)
    return sasa

def compute_ligand_sasa_pocket(prot, lig):
    # compute complex SASA
    comp = Chem.CombineMols(prot, lig)
    comp_h = Chem.AddHs(comp, addCoords=True)
    comp_sasa = compute_sasa(comp_h)

    # compute ligand SASA in pocket
    comp_lig = Chem.GetMolFrags(comp_h, asMols=True, sanitizeFrags=False)[-1] # ligand is the last component
        
    lig_sasa_bound = sum([float(a.GetProp("SASA")) for a in comp_lig.GetAtoms()])
    return lig_sasa_bound
sasa_free = compute_sasa(lig)
sasa_bound = compute_ligand_sasa_pocket(prot, lig)

print("Ligand SASA (free) =", sasa_free)
print("Ligand SASA (bound) =", sasa_bound)
print("Ligand SASA difference =", sasa_free - sasa_bound)
Ligand SASA (free) = 767.8208065148369
Ligand SASA (bound) = 68.71464272335984
Ligand SASA difference = 699.1061637914771

Let's measure the timing of this function call. I suspect this is not particularly fast because the code needs to compute SASA of all protein atoms as well. Perhaps we can make this faster by only computing SASA within certain distance from the ligand atoms.

%timeit compute_ligand_sasa_pocket(prot, lig)
323 ms ± 1.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
def compute_ligand_sasa_pocket_cutoff(prot, lig, cutoff=8):
    lig_conf = lig.GetConformer()
    lig_xyz = lig_conf.GetPositions()

    prot_conf = prot.GetConformer()
    prot_xyz = conf.GetPositions()

    # minimum distance between protein atoms and ligand atoms
    r = np.min(np.linalg.norm(prot_xyz[:, np.newaxis, :] - lig_xyz[np.newaxis, :, :], axis=2), axis=1)

    indices = np.argwhere(r > cutoff).flatten()
    mol = Chem.RWMol(prot)
    for idx in sorted(indices, reverse=True):
        mol.RemoveAtom(int(idx))

    return compute_ligand_sasa_pocket(mol, lig)
sasa_free = compute_sasa(lig)
sasa_bound = compute_ligand_sasa_pocket_cutoff(prot, lig, 5)

print("Ligand SASA (free) =", sasa_free)
print("Ligand SASA (bound) =", sasa_bound)
print("Ligand SASA difference =", sasa_free - sasa_bound)
Ligand SASA (free) = 767.8208065148369
Ligand SASA (bound) = 89.35880016654482
Ligand SASA difference = 678.4620063482921

With the cutoff distance of 5 Å, we get slightly larger SASA value in the bound state. We can improve this by increasing the cutoff distance.

cutoff_values = [5, 6, 7, 8, 9, 10]
sasa_bound_values = [compute_ligand_sasa_pocket_cutoff(prot, lig, c) for c in cutoff_values]

plt.plot(cutoff_values, sasa_bound_values)
plt.xlabel('Cutoff')
plt.ylabel('SASA')
plt.show()
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> 2021-02-05T11:44:10.373090 image/svg+xml Matplotlib v3.3.2, https://matplotlib.org/

The SASA value converged after 7 Å cutoff. Let's compute the function call speed using 8 Å as cutoff.

%timeit compute_ligand_sasa_pocket_cutoff(prot, lig, 8)
229 ms ± 20.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

In my laptop, I got 323 ms (without cutoff) vs 229 ms (with cutoff), which represents about 40% speedup.