Source code for pyemap.custom_residues

## Copyright (c) 2017-2022, James Gayvert, Ruslan Tazhigulov, Ksenia Bravaya
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import networkx as nx
import numpy as np
from .data import SB_means, SB_std_dev, clusters, metal_ligands
from .structures import cleanup_bonding, remove_side_chains


def is_pi_bonded(cur_atom, next_atom):
    """Determines whether two atoms are pi bonded based on experimental bond lengths.

    Parameters
    ----------
    cur_atom: BioPython Atom object
    next_atom: BioPython Atom object
        The two atoms being considered

    Returns
    -------
    True: if within 3 standard deviations of equilibrium pi bond length
    False: Otherwise

    References
    ----------
    Haynes, W. M. (Ed.). (2014). CRC Handbook of Chemistry and Physics. CRC Press.
        Reference for equilibrium pi bond lengths and standard deviations

    """
    x1 = cur_atom.coord[0]
    y1 = cur_atom.coord[1]
    z1 = cur_atom.coord[2]
    x2 = next_atom.coord[0]
    y2 = next_atom.coord[1]
    z2 = next_atom.coord[2]
    v1 = np.array((x1, y1, z1))
    v2 = np.array((x2, y2, z2))
    bond = str(cur_atom.element.upper()) + str(next_atom.element.upper())
    cutoff = SB_means.get(bond)
    if cutoff:
        cutoff -= 2 * SB_std_dev.get(bond)
        return dist(v1, v2) <= cutoff
    else:
        return False


def dist(x, y):
    """Returns Euclidean distance between two 3 dimensional points.

    Parameters
    ----------
    x: 3D numpy array
        Point 1
    y: 3D numpy array
        Point 2

    Returns
    -------
    float:
        Euclidean distance between two points

    """
    return np.sqrt(np.sum((x - y)**2))


[docs] def find_conjugated_systems(atoms, res_names): """Finds conjugated systems within a BioPython residue object, and returns them as individual customized BioPython Residue objects. Parameters --------- atoms: array-like List of atoms in the residue res_names: arary-like List of already used names for custom residues Returns ------- custom_res_list: array-like List of customized :class:`Bio.PDB.Residue.Residue` """ # first let's create the chemical graph structure # only these elements will be considered in our search arom_atoms = ['O', 'P', 'N', 'C', 'S'] cust_graph = nx.Graph() for i in range(len(atoms)): for k in range(i, len(atoms)): if (not i == k) and is_pi_bonded(atoms[i], atoms[k]): if atoms[i].element in arom_atoms and atoms[k].element in arom_atoms: cust_graph.add_edge(i, k) cust_graph.nodes[i]["element"] = atoms[i].element cust_graph.nodes[i]["coords"] = atoms[i].coord cust_graph.nodes[k]["element"] = atoms[k].element cust_graph.nodes[k]["coords"] = atoms[k].coord # now we have a forest, let's get each individual tree (only take cycles or bigger than 10) subgraphs = [] all_subgraphs = (cust_graph.subgraph(c) for c in nx.connected_components(cust_graph)) for sub_g in all_subgraphs: graph = sub_g.copy() if nx.cycle_basis(graph): cleanup_bonding(graph) remove_side_chains(graph) subgraphs.append(graph) elif len(graph.nodes()) >= 10: subgraphs.append(graph) custom_res_list = [] count = 1 # iterate over all of the subgraphs for graph in subgraphs: res_name = str(atoms[0].parent.get_resname()) + str(atoms[0].get_full_id()[3][1]) + "(" + str( atoms[0].get_full_id()[2]) + ")" res_name = res_name.strip() if len(subgraphs) > 1: res_name += '-' + str(count) while res_name in res_names: count += 1 res_name = res_name.split('-')[0] + '-' + str(count) atm_list = [] for node in graph.nodes(): atm_list.append(atoms[node]) custom_res_list.append(create_custom_residue(atm_list, res_name)) res_names.append(res_name) return custom_res_list
def create_custom_residue(atm_list, res_name): """Generates customized BioPython residue object corresponding to an ETA moiety. Parameters ---------- atm_list: array-like List of atoms to be included res_name: str Name of custom residue Returns ------- custom_residue: BioPython Residue Object customized BioPython residue object corresponding to a conjugated system """ # copy residue, assign resname atm_list[0].parent.get_full_id() # initialize instance variables custom_res = atm_list[0].get_parent().copy() custom_res.resname = res_name custom_res.node_label = res_name # get serial numbers of aromatic atoms atm_serial_list = [] for atm in atm_list: atm_serial_list.append(atm.serial_number) # remove atoms not in aromatic ring by serial number atm_list = list(custom_res.get_atoms()) for k in range(0, len(atm_list)): if atm_list[k].serial_number not in atm_serial_list: custom_res.detach_child(atm_list[k].id) return custom_res
[docs] def process_custom_residues(non_standard_residue_list): """Identifies and returns customized Bio.PDB.Residue objects corresponding to electron transfer active moieties. Parameters --------- non_standard_residue_list: list of :class:`Bio.PDB.Residue.Residue` List of non-protein residues in the structure Returns ------ custom_res: list of :class:`Bio.PDB.Residue.Residue` List of customized BioPython residue objects corresponding to electron transfer active moieties that are not part of standard protein residues """ res_names = [] custom_res = [] for residue in non_standard_residue_list: atm_list = list(residue.get_atoms()) conj_systems = find_conjugated_systems(atm_list, res_names) if conj_systems: for system in conj_systems: res_names.append(system.resname) custom_res += conj_systems # Clusters for residue in non_standard_residue_list: if residue.resname in clusters or residue.resname.upper() in metal_ligands: atm_list = list(residue.get_atoms()) res_name = str(atm_list[0].parent.get_resname()) + str(atm_list[0].get_full_id()[3][1]) + "(" + str( atm_list[0].get_full_id()[2]) + ")" custom_res.append(create_custom_residue(atm_list, res_name)) return custom_res