## Copyright (c) 2017-2022, James Gayvert, Ruslan Tazhigulov, Ksenia Bravaya
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import networkx as nx
import numpy as np
from .data import SB_means, SB_std_dev, clusters, metal_ligands
from .structures import cleanup_bonding, remove_side_chains
def is_pi_bonded(cur_atom, next_atom):
"""Determines whether two atoms are pi bonded based on experimental bond lengths.
Parameters
----------
cur_atom: BioPython Atom object
next_atom: BioPython Atom object
The two atoms being considered
Returns
-------
True: if within 3 standard deviations of equilibrium pi bond length
False: Otherwise
References
----------
Haynes, W. M. (Ed.). (2014). CRC Handbook of Chemistry and Physics. CRC Press.
Reference for equilibrium pi bond lengths and standard deviations
"""
x1 = cur_atom.coord[0]
y1 = cur_atom.coord[1]
z1 = cur_atom.coord[2]
x2 = next_atom.coord[0]
y2 = next_atom.coord[1]
z2 = next_atom.coord[2]
v1 = np.array((x1, y1, z1))
v2 = np.array((x2, y2, z2))
bond = str(cur_atom.element.upper()) + str(next_atom.element.upper())
cutoff = SB_means.get(bond)
if cutoff:
cutoff -= 2 * SB_std_dev.get(bond)
return dist(v1, v2) <= cutoff
else:
return False
def dist(x, y):
"""Returns Euclidean distance between two 3 dimensional points.
Parameters
----------
x: 3D numpy array
Point 1
y: 3D numpy array
Point 2
Returns
-------
float:
Euclidean distance between two points
"""
return np.sqrt(np.sum((x - y)**2))
[docs]
def find_conjugated_systems(atoms, res_names):
"""Finds conjugated systems within a BioPython residue object, and returns them as individual customized BioPython
Residue objects.
Parameters
---------
atoms: array-like
List of atoms in the residue
res_names: arary-like
List of already used names for custom residues
Returns
-------
custom_res_list: array-like
List of customized :class:`Bio.PDB.Residue.Residue`
"""
# first let's create the chemical graph structure
# only these elements will be considered in our search
arom_atoms = ['O', 'P', 'N', 'C', 'S']
cust_graph = nx.Graph()
for i in range(len(atoms)):
for k in range(i, len(atoms)):
if (not i == k) and is_pi_bonded(atoms[i], atoms[k]):
if atoms[i].element in arom_atoms and atoms[k].element in arom_atoms:
cust_graph.add_edge(i, k)
cust_graph.nodes[i]["element"] = atoms[i].element
cust_graph.nodes[i]["coords"] = atoms[i].coord
cust_graph.nodes[k]["element"] = atoms[k].element
cust_graph.nodes[k]["coords"] = atoms[k].coord
# now we have a forest, let's get each individual tree (only take cycles or bigger than 10)
subgraphs = []
all_subgraphs = (cust_graph.subgraph(c) for c in nx.connected_components(cust_graph))
for sub_g in all_subgraphs:
graph = sub_g.copy()
if nx.cycle_basis(graph):
cleanup_bonding(graph)
remove_side_chains(graph)
subgraphs.append(graph)
elif len(graph.nodes()) >= 10:
subgraphs.append(graph)
custom_res_list = []
count = 1
# iterate over all of the subgraphs
for graph in subgraphs:
res_name = str(atoms[0].parent.get_resname()) + str(atoms[0].get_full_id()[3][1]) + "(" + str(
atoms[0].get_full_id()[2]) + ")"
res_name = res_name.strip()
if len(subgraphs) > 1:
res_name += '-' + str(count)
while res_name in res_names:
count += 1
res_name = res_name.split('-')[0] + '-' + str(count)
atm_list = []
for node in graph.nodes():
atm_list.append(atoms[node])
custom_res_list.append(create_custom_residue(atm_list, res_name))
res_names.append(res_name)
return custom_res_list
def create_custom_residue(atm_list, res_name):
"""Generates customized BioPython residue object corresponding to an ETA moiety.
Parameters
----------
atm_list: array-like
List of atoms to be included
res_name: str
Name of custom residue
Returns
-------
custom_residue: BioPython Residue Object
customized BioPython residue object corresponding to a conjugated system
"""
# copy residue, assign resname
atm_list[0].parent.get_full_id() # initialize instance variables
custom_res = atm_list[0].get_parent().copy()
custom_res.resname = res_name
custom_res.node_label = res_name
# get serial numbers of aromatic atoms
atm_serial_list = []
for atm in atm_list:
atm_serial_list.append(atm.serial_number)
# remove atoms not in aromatic ring by serial number
atm_list = list(custom_res.get_atoms())
for k in range(0, len(atm_list)):
if atm_list[k].serial_number not in atm_serial_list:
custom_res.detach_child(atm_list[k].id)
return custom_res
[docs]
def process_custom_residues(non_standard_residue_list):
"""Identifies and returns customized Bio.PDB.Residue objects corresponding to electron transfer active moieties.
Parameters
---------
non_standard_residue_list: list of :class:`Bio.PDB.Residue.Residue`
List of non-protein residues in the structure
Returns
------
custom_res: list of :class:`Bio.PDB.Residue.Residue`
List of customized BioPython residue objects corresponding to electron transfer active moieties that are not part of
standard protein residues
"""
res_names = []
custom_res = []
for residue in non_standard_residue_list:
atm_list = list(residue.get_atoms())
conj_systems = find_conjugated_systems(atm_list, res_names)
if conj_systems:
for system in conj_systems:
res_names.append(system.resname)
custom_res += conj_systems
# Clusters
for residue in non_standard_residue_list:
if residue.resname in clusters or residue.resname.upper() in metal_ligands:
atm_list = list(residue.get_atoms())
res_name = str(atm_list[0].parent.get_resname()) + str(atm_list[0].get_full_id()[3][1]) + "(" + str(
atm_list[0].get_full_id()[2]) + ")"
custom_res.append(create_custom_residue(atm_list, res_name))
return custom_res