Source code for neurosnap.algos.alphafold

"""
Variables, functions, and classes associated with AlphaFold2 and whatnot.
"""

import numpy as np
from typing import Optional, Dict



[docs]
def score_af2m_binding(af2m_dict: str, binder_len: int, target_len: Optional[int] = None) -> Dict[str, float]:
  """Calculate binding scores from AlphaFold2 multimer prediction results.

  The binder is assumed to be the first part of the sequence up to `binder_len`,
  with the target being the remainder, unless otherwise specified.
  Adapted from: https://github.com/hgbrian/biomodals/blob/990c010e711c1e8a7221294e0370c6f37927eae6/modal_alphafold.py#L33

  Parameters:
    af_multimer_dict: From AlphaFold2 multimer JSON file
    binder_len: Length of the binder protein sequence
    target_len: Length of the target protein sequence

  Returns:
    A dictionary containing the following keys:

      - plddt_binder: Average pLDDT score for the binder.
      - plddt_target: Average pLDDT score for the target.
      - pae_binder: Average PAE score within the binder.
      - pae_target: Average PAE score within the target.
      - ipae: Average PAE score for the binder-target interaction.

  """
  target_end = (binder_len + target_len) if target_len is not None else None

  # pLDDT
  plddt_array = np.array(af2m_dict["plddt"])
  plddt_binder = np.mean(plddt_array[:binder_len])
  plddt_target = np.mean(plddt_array[binder_len:target_end])

  # PAE
  pae_array = np.array(af2m_dict["pae"])

  pae_binder = np.mean(pae_array[:binder_len, :binder_len])
  pae_target = np.mean(pae_array[binder_len:target_end, binder_len:target_end])
  ipae = np.mean(
    [
      np.mean(pae_array[:binder_len, binder_len:target_end]),
      np.mean(pae_array[binder_len:target_end, :binder_len]),
    ]
  )

  return {
    "plddt_binder": float(plddt_binder),
    "plddt_target": float(plddt_target),
    "pae_binder": float(pae_binder),
    "pae_target": float(pae_target),
    "ipae": float(ipae),
  }