Source code for neurosnap.algos.alphafold
"""
Variables, functions, and classes associated with AlphaFold2 and whatnot.
"""
import numpy as np
from typing import Optional, Dict
[docs]
def score_af2m_binding(af2m_dict: str, binder_len: int, target_len: Optional[int] = None) -> Dict[str, float]:
"""Calculate binding scores from AlphaFold2 multimer prediction results.
The binder is assumed to be the first part of the sequence up to `binder_len`,
with the target being the remainder, unless otherwise specified.
Adapted from: https://github.com/hgbrian/biomodals/blob/990c010e711c1e8a7221294e0370c6f37927eae6/modal_alphafold.py#L33
Parameters:
af_multimer_dict: From AlphaFold2 multimer JSON file
binder_len: Length of the binder protein sequence
target_len: Length of the target protein sequence
Returns:
A dictionary containing the following keys:
- plddt_binder: Average pLDDT score for the binder.
- plddt_target: Average pLDDT score for the target.
- pae_binder: Average PAE score within the binder.
- pae_target: Average PAE score within the target.
- ipae: Average PAE score for the binder-target interaction.
"""
target_end = (binder_len + target_len) if target_len is not None else None
# pLDDT
plddt_array = np.array(af2m_dict["plddt"])
plddt_binder = np.mean(plddt_array[:binder_len])
plddt_target = np.mean(plddt_array[binder_len:target_end])
# PAE
pae_array = np.array(af2m_dict["pae"])
pae_binder = np.mean(pae_array[:binder_len, :binder_len])
pae_target = np.mean(pae_array[binder_len:target_end, binder_len:target_end])
ipae = np.mean(
[
np.mean(pae_array[:binder_len, binder_len:target_end]),
np.mean(pae_array[binder_len:target_end, :binder_len]),
]
)
return {
"plddt_binder": float(plddt_binder),
"plddt_target": float(plddt_target),
"pae_binder": float(pae_binder),
"pae_target": float(pae_target),
"ipae": float(ipae),
}