## Copyright (c) 2017-2022, James Gayvert, Ruslan Tazhigulov, Ksenia Bravaya
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Processes parsed .pdb/.mmcif file, and generates a graph based on user selected options.
Collects requested residues and constructs a distance matrix, from which the graph is generated. Edges are filtered out based
on user specifications. Finally, the selected criteria for surface exposure is used to classify residues as buried or exposed.
Results are stored in the emap object which was passed in.
"""
import sys
import Bio.PDB
import networkx as nx
import numpy as np
from Bio.PDB.DSSP import DSSP
from Bio.PDB.ResidueDepth import get_surface, residue_depth
from scipy.spatial import distance_matrix
import math
from collections import OrderedDict
import warnings
from .data import res_name_to_char, side_chain_atoms, char_to_res_name
from .pyemap_exceptions import *
from .utils import validate_binary_params
# Monkey patches detach self to save original ID upon re-assignment to custom residue
def detach_parent(self):
if self.parent:
self.original_id = self.parent.full_id
self.parent = None
Bio.PDB.Atom.Atom.detach_parent = detach_parent
# monkey patches mangling of disordered atoms
def get_unpacked_list(self):
"""
Returns all atoms from the residue,
in case of disordered, keep only first alt loc and remove the alt-loc tag
"""
atom_list = self.get_list()
undisordered_atom_list = []
for atom in atom_list:
if atom.is_disordered():
atom.altloc = " "
undisordered_atom_list.append(atom)
else:
undisordered_atom_list.append(atom)
return undisordered_atom_list
Bio.PDB.Residue.Residue.get_unpacked_list = get_unpacked_list
[docs]
def pathways_model(dist, coef_alpha, exp_beta, r_offset):
"""Applies penalty function parameters and returns score.
:math:`\\epsilon =\\alpha \\exp(-\\beta(R-R_{offset}))`
Parameters
----------
dist: float
Actual distance in angstroms
coef_alpha,exp_beta,r_offset:float
Penalty function parameters
Returns
-------
mod_penalty: float
"""
penalty = coef_alpha * np.exp(-exp_beta * (dist - r_offset))
mod_penalty = -np.log10(penalty)
return mod_penalty
[docs]
def calculate_residue_depth(model, aromatic_residues, rd_cutoff):
"""Returns a list of surface exposed residues as determined by residue depth.
Parameters
----------
filename: str
Name of pdb file to be analyzed
aromatic_residues: list of :class:`Bio.PDB.Residue.Residue`
residues included in the analysis
rd_cutoff: float
Cutoff for buried/surface exposed
Returns
-------
surface_exposed_res: list of str
List of residue names corresponding to the surface exposed residues
"""
try:
surface = get_surface(model)
cutoff = rd_cutoff
surface_exposed_res = []
for residue in aromatic_residues:
depth = residue_depth(residue, surface)
if (depth <= cutoff):
surface_exposed_res.append(residue.node_label)
return surface_exposed_res
except Exception:
warnings.warn("Unable to calculate residue depth. Check that MSMS is installed.", RuntimeWarning, stacklevel=2)
return []
[docs]
def calculate_rsa(filename, model, node_list, rsa_cutoff):
"""Returns a list of surface exposed residues as determined by relative solvent accessibility.
Only standard protein residues are currently supported. Non-protein and user specified custom residues cannot be
classified as surface exposed using this criteria.
Parameters
---------
filename: str
Name of pdb file to be analyzed
model: :class:`Bio.PDB.Model.Model`
Model under analysis
node_list : list of str
List containing which standard residues are included in analysis
rsa_cutoff: float
Cutoff for buried/surface exposed
References
---------
Tien, M. Z.; Meyer, A. G.; Sydykova, D. K.; Spielman, S. J.; Wilke, C. O. PLoS ONE 2013, 8 (11).
Reference for relative solvent accessibility cutoff of 0.05, and for MaxASA values
"""
surface_exposed_res = []
try:
dssp = DSSP(model, filename, acc_array="Wilke")
for key in dssp.keys():
goal_str = dssp[key][1] + str(key[1][1]) + "(" + str(key[0]) + ")"
if goal_str in node_list and dssp[key][3] >= rsa_cutoff:
surface_exposed_res.append(goal_str)
except Exception:
warnings.warn("Unable to calculate solvent accessibility. Check that DSSP is installed.",
RuntimeWarning,
stacklevel=2)
return surface_exposed_res
def dist(x, y):
"""Returns Euclidean distance between two 3 dimensional points.
Parameters
----------
x: 3D numpy array of float
Point 1
y: 3D numpy array of float
Point 2
Returns
-------
float:
Euclidean distance between two points
"""
return np.sqrt(np.sum((x - y)**2))
def get_atom_list(res):
""" Recovers side chain atoms only of standard aromatic residues, returns all atoms for other residues.
Parameters
-----------
res: :class:`Bio.PDB.Residue.Residue`
A BioPython residue object
Returns
-------
atom_list: list of :class:`Bio.PDB.Atom.Atom`
List of atoms to be used in distance matrix calculation
"""
if res.resname in side_chain_atoms:
atom_list = []
sca = side_chain_atoms[res.resname]
for atm in res:
if atm.name in sca:
atom_list.append(atm)
return atom_list
else:
return res.get_atoms()
def get_full_atom_distance_matrix(residues):
atm_d = []
atoms_per_res = []
for res in residues:
res.get_full_id()
atoms = get_atom_list(res)
num_atoms = 0
for cur_atom in atoms:
num_atoms += 1
atm_d.append(np.array([cur_atom.coord[0], cur_atom.coord[1], cur_atom.coord[2]]))
atoms_per_res.append(num_atoms)
return distance_matrix(atm_d, atm_d), atoms_per_res
def closest_atom_dmatrix(residues):
"""Constructs distance matrix based on closest atom distance.
Parameters
----------
residues: list of :class:`Bio.PDB.Residue.Residue`
List of BioPython residues
coef_alpha,exp_beta,r_offset:float
Penalty funciton parameters
Returns
-------
distance_matrix: numpy.array of float
Distance matrix of residues
"""
dmat, atoms_per_res = get_full_atom_distance_matrix(residues)
distance_matrix = np.zeros((len(residues), len(residues)))
slice1_idx = 0
for i in range(0, len(residues)):
slice2_idx = slice1_idx + atoms_per_res[i]
slice3_idx = slice2_idx
for j in range(i + 1, len(residues)):
slice4_idx = slice3_idx + atoms_per_res[j]
my_slice = dmat[slice1_idx:slice2_idx, slice3_idx:slice4_idx]
min_val = np.min(my_slice)
distance_matrix[i][j] = min_val
distance_matrix[j][i] = min_val
slice3_idx = slice4_idx
slice1_idx = slice2_idx
return distance_matrix
def com_dmatrix(residues):
"""Constructs distance matrix based on distances between centers of mass.
Parameters
----------
residues: list of :class:`Bio.PDB.Residue.Residue`
List of residues to be included in analysis
Returns
-------
distance_matrix: numpy array of :class:`Bio.PDB.Residue.Residue`
Distance matrix of residues
"""
com_d = []
# Compute COMs (x0, y0, z0) of side-chains
for res in residues:
res.get_full_id()
atoms = get_atom_list(res)
x_wsum, y_wsum, z_wsum, mass_sum = 0.0, 0.0, 0.0, 0.0
for cur_atom in atoms:
mass = cur_atom.mass
x = cur_atom.coord[0]
y = cur_atom.coord[1]
z = cur_atom.coord[2]
x_wsum += x * mass
y_wsum += y * mass
z_wsum += z * mass
mass_sum += mass
com_x = x_wsum / mass_sum
com_y = y_wsum / mass_sum
com_z = z_wsum / mass_sum
com_d.append(np.array([com_x, com_y, com_z]))
return distance_matrix(com_d, com_d)
def process_standard_residues(standard_residue_list):
"""Generates customized Bio.PDB.Residue.Residue objects. Residues containing no side chain atoms will be removed.
Parameters
----------
standard_residue_list: list of :class:`Bio.PDB.Residue.Residue`
List of Bio.PDB.Residue.Residue objects corresponding to protein residues
Returns
-------
res_list: list of :class:`Bio.PDB.Residue.Residue`
"""
res_list = []
for res in standard_residue_list:
res_letter = res_name_to_char.get(res.resname)
chain = res.full_id[2]
resnum = res.full_id[3][1]
res.node_label = res_letter + str(resnum) + "(" + chain + ")"
valid = False
for atm in res:
sca = side_chain_atoms[res.resname]
if atm.name in sca:
valid = True
break
if valid:
res_list.append(res)
else:
warnings.warn("The record for residue " + res.node_label +
" did not contain any of the following side chain atoms: " + str(sca) +
" and therefore is not included in the graph.",
RuntimeWarning,
stacklevel=2)
return res_list
def create_user_res(serial_list, used_atoms, serial_dict, user_res_names):
"""Creates a customized BioPython Residue object corresponding to a user specified residue.
Parameters
----------
serial_list: list of int
List of atom serial numbers included in residue
used_atoms: list of :class:`Bio.PDB.Atom.Atom`
Atoms already included in analysis
serial_dict: dict of int, :class:`Bio.PDB.Atom.Atom`
Dictionary of serial numbers and :class:`Bio.PDB.Atom.Atom` objects
user_res_names: list of str
User residue names already included in the analysis.
Returns
-------
user_res: :class:`Bio.PDB.Residue.Residue`
Customized residue object corresponding to the atoms in serial_list
"""
source_res = serial_dict[serial_list[0]].parent
source_res.get_full_id()
user_res = source_res.copy()
for atm in list(source_res.get_atoms()):
user_res.detach_child(atm.id)
for serial_number in serial_list:
if serial_number in used_atoms:
message = "Invalid atom serial number range. Atom " + str(
serial_number) + " is already included in another residue."
raise PyeMapUserResidueException(message)
if serial_number not in serial_dict:
message = str(serial_number) + " is not a valid serial number."
raise PyeMapUserResidueException(message)
user_res.add(serial_dict[serial_number])
k = 1
name = "CUST"
name += "-"
while name + str(k) in user_res_names:
k += 1
user_res_names.append(name + str(k))
user_res.resname = name + str(k)
user_res.node_label = user_res.resname
return user_res
def get_standard_residues(all_residues, chain_list, res_names):
"""Returns list of standard aromatic residues.
Runs through every residue in the structure, keeping only the TRP, TYR
(optionally PHE and HIS if specified by user) residues on the chains
that were chosen by the user.
Parameters
----------
all_residues: iterator of :class:`Bio.PDB.Residue.Residue`
List of every residue in structure
chain_list: list of str
Chains to be included in analysis
res_names: list of str
Included amino acids specified by 3 letter code
Returns
-------
residue_list: list of :class:`Bio.PDB.Residue.Residue`
Residues to be included in analysis
"""
residue_list = []
for res in all_residues:
if res.resname in res_names and res.parent.id in chain_list:
res.get_full_id()
arom_res = res.copy()
arom_res.get_full_id()
residue_list.append(arom_res)
residue_list = process_standard_residues(residue_list)
return residue_list
def get_user_residues(custom, used_atoms, serial_dict):
"""Generates customized Bio.PDB.Residue.Residue objects from a custom atom string.
Users may not choose atoms that are already part of standard residues or eta moieties, nor those that are not
part of the selected chains.
Parameters
----------
custom: str
Specified by user to select atoms for custom residues
used_atoms: list of :class:`Bio.PDB.Atom.Atom`
Atoms already included in analysis
serial_dict: dict of int, :class:`Bio.PDB.Atom.Atom`
Dictionary of serial numbers and :class:`Bio.PDB.Atom.Atom` objects
Returns
-------
res_list: list of :class:`Bio.PDB.Residue.Residue`
Custom residues specified by user
Notes
-----
The custom atom string syntax is as follows:
Custom residues are specified based on PDB atom serial number. Ranges are specified with a dash (ex. 10-20).
Individual atoms may also be selected, separated by commas (ex. 1-10,15,17). To select multiple custom residues,
the user encloses the atom ranges with parentheses, and then separate them with commas. In the graph and
visualization, these residues are named CUS-1, CUS-2 etc. based on the order they were specified.
"""
user_res_names = []
custom = custom.strip()
if custom:
res_list = []
l1 = custom.split("),(")
for a in l1:
serial_number_list = []
# get rid of the (
atm_str = a[:]
atm_str = atm_str.replace("(", '')
atm_str = atm_str.replace(")", '')
l2 = atm_str.split(",")
for atm in l2:
if "-" in atm:
index = atm.index("-")
start_atm = int(atm[:index])
end_atm = atm[index + 1:]
end_atm = end_atm.replace(")", "")
end_atm = int(end_atm)
for i in range(start_atm, end_atm + 1):
serial_number_list.append(i)
else:
serial_number_list.append(int(atm))
if serial_number_list != []:
serial_number_list = sorted(list(OrderedDict.fromkeys(serial_number_list)))
new_res = create_user_res(serial_number_list, used_atoms, serial_dict, user_res_names)
else:
raise UserResidueException("Invalid atom serial number range. See the manual for proper syntax.")
res_list.append(new_res)
for atm in new_res:
used_atoms.append(atm.serial_number)
return res_list
return []
def finish_graph(G, surface_exposed_res):
"""Sets surface exposed residues as boxes in graph and removes disconnected vertices.
Parameters
----------
G: :class:`networkx.graph`
Graph object constructed from distance matrix of residues
surface_exposed_res: list of str
Names of surface exposed residues
"""
for goal in surface_exposed_res:
G.nodes[goal]['margin'] = '0.11'
G.nodes[goal]['shape'] = 'box'
# get rid of all disconnected nodes
all_nodes = list(G.nodes())
for node in all_nodes:
if G[node] == {}:
G.remove_node(node)
[docs]
def filter_by_percent(G, percent_edges, num_st_dev_edges, distance_cutoff, coef_alpha, exp_beta, r_offset):
included_edges = []
minval = min(dict(G.edges).items(), key=lambda x: x[1]['weight'])[1]['weight']
# should never happen, but just in case
if minval == 0:
minval = 1
for _, _, d in G.edges(data=True):
d['distance'] = d['weight']
d['weight'] = pathways_model(d['weight'], coef_alpha, exp_beta, r_offset)
d['len'] = d['weight'] / minval # scaling factor for prettier graphs
for node in G.nodes():
edge_length_per_node = []
weights = []
for neighbor in G[node]:
weights.append(G.edges[(node, neighbor)]['weight'])
weights = sorted(weights)
thresh_index = math.ceil(len(weights) * percent_edges / 100)
for neighbor in G[node]:
if weights.index(G.edges[(node, neighbor)]['weight']) <= thresh_index and \
G.edges[(node, neighbor)]['distance'] <= distance_cutoff:
edge_length_per_node.append(G.edges[(node, neighbor)]['weight'])
len_average, len_st_dev = 0.0, 0.0
if edge_length_per_node != []:
edge_length_per_node = np.array(edge_length_per_node, dtype='float64')
len_average = np.average(edge_length_per_node)
len_st_dev = np.std(edge_length_per_node)
for neighbor in G[node]:
if weights.index(G.edges[(node, neighbor)]['weight']) <= thresh_index and \
G.edges[(node, neighbor)]['distance'] <= distance_cutoff and \
np.round(G.edges[(node, neighbor)]['weight'], 8) <= np.round((len_average + num_st_dev_edges * len_st_dev), 8):
included_edges.append([node, neighbor])
excluded_edges = []
for edge in G.edges():
node1 = edge[0]
node2 = edge[1]
if ([node1, node2] not in included_edges) and ([node2, node1] not in included_edges):
excluded_edges.append(edge)
G.remove_edges_from(excluded_edges)
[docs]
def filter_by_degree(G, max_degree, distance_cutoff, coef_alpha, exp_beta, r_offset):
minval = min(dict(G.edges).items(), key=lambda x: x[1]['weight'])[1]['weight']
# should never happen, but just in case
if minval == 0:
minval = 1
remove_edges = []
# impose hard cutoff, collect other edge weights
for u, v, d in G.edges(data=True):
if d['weight'] > distance_cutoff:
remove_edges.append((u, v))
else:
d['distance'] = d['weight']
d['weight'] = pathways_model(d['weight'], coef_alpha, exp_beta, r_offset)
d['len'] = d['weight'] / minval # scaling factor for prettier graphs
G.remove_edges_from(remove_edges)
remove_edges = []
for node in G.nodes:
if G.degree(node) > max_degree:
for edge in sorted(list(G.edges(node)), key=lambda x: G.edges[x]['distance'])[max_degree:]:
if edge not in remove_edges and edge[::-1] not in remove_edges:
remove_edges.append(edge)
remove_edges = sorted(remove_edges, key=lambda x: G.edges[x]['distance'])
for u, v in remove_edges:
if G.degree(u) > max_degree or G.degree(v) > max_degree:
G.remove_edge(u, v)
[docs]
def create_graph(dmatrix, node_labels, edge_prune, coef_alpha, exp_beta, r_offset, distance_cutoff, percent_edges,
num_st_dev_edges, max_degree, eta_moieties):
"""Constructs the graph from the distance matrix and node labels.
Parameters
----------
dmatrix: numpy.array of float
Distance matrix of aromatic residues.
node_label: list of str
Labels for residues in the graph.
edge_prune: int
0 for degree, 1 for percent
residue_numbers:
res numbers
distance_cutoff,max_degree: float
Parameters that determine which edges are kept.
eta_moieties: list of str
Non standard residues that were automatically identified
Returns
-------
G: :class:`networkx.Graph`
Graph of aromatic residues in protein
References
----------
Gray, H. B.; Winkler, J. R. Long-Range Electron Transfer. Proc. Natl. Acad. Sci. U. S. A. 2005, 102 (10),
3534 LP-3539.
Reference for 20A filter on edges
"""
np.set_printoptions(threshold=sys.maxsize)
G = nx.from_numpy_array(dmatrix)
G = nx.relabel_nodes(G, node_labels)
if edge_prune == 'DEGREE':
filter_by_degree(G, max_degree, distance_cutoff, coef_alpha, exp_beta, r_offset)
elif edge_prune == 'PERCENT':
filter_by_percent(G, percent_edges, num_st_dev_edges, distance_cutoff, coef_alpha, exp_beta, r_offset)
else:
raise PyeMapGraphException("Invalid choice of edge_prune. Must be set to 'DEGREE' or 'PERCENT'.")
for name_node in G.nodes():
G.nodes[name_node]['style'] = 'filled'
G.nodes[name_node]['fontname'] = 'Helvetica-Bold'
G.nodes[name_node]['fontsize'] = 32
G.nodes[name_node]['shape'] = "oval"
G.nodes[name_node]['margin'] = '0.04'
G.nodes[name_node]['fontcolor'] = "#000000"
G.nodes[name_node]['color'] = '#708090'
G.nodes[name_node]['penwidth'] = 2.0
G.nodes[name_node]['label'] = name_node
if (name_node[1].isdigit()):
if name_node not in eta_moieties:
if 'Y' == name_node[0]:
G.nodes[name_node]['fillcolor'] = '#96c8f0'
elif 'W' == name_node[0]:
G.nodes[name_node]['fillcolor'] = '#f07878'
elif 'F' == name_node[0]:
G.nodes[name_node]['fillcolor'] = '#f09664'
elif 'H' == name_node[0]:
G.nodes[name_node]['fillcolor'] = '#c8f0c8'
else:
G.nodes[name_node]['fillcolor'] = '#708090'
else:
G.nodes[name_node]['fillcolor'] = '#FFC0CB'
else:
G.nodes[name_node]['fillcolor'] = '#FFC0CB'
for edge in G.edges():
name_node1, name_node2 = edge[0], edge[1]
G[name_node1][name_node2]['color'] = '#778899'
G[name_node1][name_node2]['penwidth'] = 1.5
G[name_node1][name_node2]['style'] = 'dashed'
return G
def store_params(emap, params):
params.pop('chains')
params.pop('eta_moieties')
params.pop('emap')
params.pop('include_residues')
emap._process_params = params
[docs]
def process(emap,
chains=None,
eta_moieties=None,
dist_def='COM',
sdef='RSA',
edge_prune='PERCENT',
include_residues=["Y", "W"],
custom="",
distance_cutoff=20,
max_degree=4,
percent_edges=1.0,
num_st_dev_edges=1.0,
rd_thresh=3.03,
rsa_thresh=0.2,
coef_alpha=1.0,
exp_beta=2.3,
r_offset=0.0):
"""Constructs emap graph theory model based on user specs, and saves it to the emap object.
Parameters
---------
emap: :class:`~pyemap.emap`
Object for storing state of emap analysis.
chains: list of str
List of strings corresponding to chains included in analysis
eta_moieties: list of str
List of strings corresponding to residue names of eta moieties
dist_def: str, optional
Definition of distance matrix. 'COM' for center of mass, 'CATM' for closest atom
sdef: str, optional
Algorithm to use for surface exposure. 'RD' for residue depth, 'RSA' for relative solvent accessibility
edge_prune: str, optional
Algorithm for pruning edges. 'DEGREE' for degree, 'PERCENT' for percent
include_residues: list of str
Included amino acids specified by 1 letter code
custom: str, optional
Custom atom string specified by user
distance_cutoff: float
Defines a pure distance threshold. PyeMap will only keep edges with distances less than or equal distance_cutoff.
max_degree: int, optional
Maximum degree of any vertex. Only used when edge_prune is set to 'DEGREE'.
percent_edges: float, optional
Percent of edges to keep for each node. Only used when edge_prune is set to 'PERCENT'.
num_st_dev_edges: float, optional
Number of standard deviations of edges to keep. Only used when edge_prune is set to 'PERCENT'.
rd_thresh: float, optional
Threshold for buried/surface exposed for residue depth
rsa_thresh: float, optional
Threshold for buried/surface exposed for relative solvent accessbility
coef_alpha,exp_beta,r_offset: float, optional
Penalty function parameters.
Raises
------
RuntimeError:
Not enough residues to construct a graph
"""
max_degree = int(max_degree)
dist_def, edge_prune, sdef = validate_binary_params(dist_def, edge_prune, sdef)
emap_params = locals().copy()
emap._reset_process()
pdb_file = emap.file_path
if chains is None:
chains = [emap.chains[0]]
if eta_moieties is None:
eta_moieties = []
for resname, moiety in emap.eta_moieties.items():
if moiety.get_full_id()[2] in chains:
eta_moieties.append(resname)
else:
for resname in eta_moieties:
if resname not in emap.eta_moieties:
raise PyeMapGraphException("Error: " + str(resname) + " is not a valid residue name.")
res_names = []
res_chars = []
for i, res in enumerate(include_residues):
if res.upper() in char_to_res_name:
res_names.append(char_to_res_name[res.upper()])
res_chars.append(res.upper())
elif res.upper() in res_name_to_char:
res_names.append(res.upper())
res_chars.append(res_name_to_char[res.upper()])
else:
raise PyeMapGraphException("Error: " + str(res) + " is not a valid 1-letter or 3-letter amino acid code.")
model = emap._structure[0]
all_residues = get_standard_residues(model.get_residues(), chains, res_names)
for resname in eta_moieties:
all_residues.append(emap.eta_moieties[resname])
used_atoms = []
for res in all_residues:
for atm in res:
used_atoms.append(atm.serial_number)
user_residues = []
if custom:
selection = Bio.PDB.Selection.unfold_entities(emap._structure, target_level='A')
serial_numbers = [atom.serial_number for atom in selection]
serial_dict = dict(zip(serial_numbers, selection))
user_residues = get_user_residues(custom, used_atoms, serial_dict)
all_residues += user_residues
if len(all_residues) < 2:
raise PyeMapGraphException("Not enough residues to construct a graph.")
node_labels = {}
for i in range(0, len(all_residues)):
node_labels[i] = all_residues[i].node_label
if dist_def == 'COM':
dmatrix = com_dmatrix(all_residues)
elif dist_def == 'CATM':
dmatrix = closest_atom_dmatrix(all_residues)
else:
raise PyeMapGraphException(
"Invalid choice of dist_def. Must be set to 'COM' (center of mass) or 'CATM'(closest atom).")
G = create_graph(dmatrix, node_labels, edge_prune, coef_alpha, exp_beta, r_offset, distance_cutoff, percent_edges,
num_st_dev_edges, max_degree, emap.eta_moieties.keys())
G.graph['pdb_id'] = emap.pdb_id
if len(G.edges()) == 0:
raise PyeMapGraphException("Not enough edges to construct a graph.")
# define surface exposed residues
if sdef is None:
warnings.warn("Protein surface will not be computed. All residues will be classified as buried...")
surface_exposed_res = []
else:
try:
if sdef == 'RD':
surface_exposed_res = calculate_residue_depth(model, all_residues, rd_thresh)
elif sdef == 'RSA':
surface_exposed_res = calculate_rsa(pdb_file, model, node_labels.values(), rsa_thresh)
else:
surface_exposed_res = []
warnings.warn("Invalid choice of surface definition. sdef must be set to 'RD' or 'RSA'.")
except Exception:
warnings.warn("Computing protein surface failed. All residues will be classified as buried...")
surface_exposed_res = []
finish_graph(G, surface_exposed_res)
for res in all_residues:
emap._add_residue(res)
for res in user_residues:
emap.user_residues[res.resname] = res
emap._store_initial_graph(G)
emap._include_residues = res_chars
store_params(emap, emap_params)
return emap