Source code for pyemap.emap

## Copyright (c) 2017-2022, James Gayvert, Ruslan Tazhigulov, Ksenia Bravaya
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from .custom_residues import is_pi_bonded
from .data import clusters, metal_ligands, res_name_to_smiles
import networkx as nx
from .utils import extract_resname
from networkx.drawing.nx_agraph import to_agraph
from collections import OrderedDict
from PIL import Image
import os
from shutil import copyfile
import tempfile
from .process_data import get_atom_list
import datetime
from pysmiles import write_smiles
from .pyemap_exceptions import *
from .utils import draw_mpl_graph
import warnings


[docs] class emap(): ''' Manages the data generated at all stages of PyeMap analysis. Attributes ---------- file_path: str Crystal structure file being analyzed by PyeMap. eta_moieties: dict of str: :class:`Bio.PDB.Residue.Residue` Non-protein eta moieties automatically identified at the parsing step. chain_list: list of str List of chains identified at the parsing step. sequences: dict of str, str Amino acid sequence for each chain in FASTA format residues: dict of str: :class:`Bio.PDB.Residue.Residue` Residues included in the graph after the process step. user_residues: dict of str: :class:`Bio.PDB.Residue.Residue` Custom residues specified by the user. init_graph: :class:`networkx.Graph` Graph generated after the process step. branches: dict of int: :class:`~pyemap.Branch` Branches found by PyeMap analysis paths: dict of str: :class:`~pyemap.ShortestPath` Paths found by PyeMap sorted by lowest to highest score. paths_graph: :class:`networkx.Graph` Graph generated after the shortest paths step. '''
[docs] def __init__(self, file_path, pdb_id, eta_moieties, chain_list, sequences): '''Initializes emap object. Parameters ---------- file_path: str Name of file eta_moieties: list of :class:`Bio.PDB.Residue.Residue` Customized residue objects generated for automatically detected eta moieties chain_list: list of str Chains identified by the parser sequences: dict of str:str Key is chain id, value is sequence in fasta format ''' self.file_path = file_path self.pdb_id = pdb_id self.residues = {} self.chains = chain_list self.sequences = sequences self.active_chains = {} self.eta_moieties = {} self.user_residues = {} self._include_residues = [] self._process_params = {} self.paths = OrderedDict() self.paths_graph = None self.init_graph = None self.branches = OrderedDict() for residue in eta_moieties: self._add_eta_moiety(residue)
def _store_initial_graph(self, graph): '''Stores graph representation of emap model Parameters ---------- graph: networkx.Graph Graph theory representation of emap model Notes ----- Attributes of nodes/edges store info on surface exposure, edge weights etc. ''' self.init_graph = graph.copy() def _store_paths(self, branches, yens=False): '''Stores pathways in emap object, and sets their ngl selection strings for visualization. Parameters --------- shortest_paths: list of pyemap.shortest_paths.Branch branches found by emap yens: boolean, optional True when target specified, False when only source is specified ''' shortest_paths = [] for br in branches: self.branches[br.branch_id] = br shortest_paths += br.paths shortest_paths = sorted(shortest_paths) for pt in shortest_paths: self.paths[pt.path_id] = pt self._visualize_pathway(pt, yens) def _store_paths_graph(self, graph): '''Stores graph representation of emap model with selected pathway(s) highlighted Parameters ---------- graph: networkx.Graph Graph theory representation of emap model Notes ----- Attributes of nodes/edges store info on surface exposure, edge weights etc. ''' self.paths_graph = graph.copy() def _reset_process(self): '''Returns emap object to state it was in after parsing. ''' self.residues = {} self.active_chains = {} self.user_residues = {} self.init_graph = None self.paths = OrderedDict() self.paths_graph = None self.branches = OrderedDict() self._include_residues = [] self._process_params = {} def _reset_paths(self): '''Returns emap object to state it was in after the process step. ''' self.paths = OrderedDict() self.paths_graph = None self.branches = OrderedDict() def _get_residue_graph(self, residue): atoms = list(get_atom_list(residue)) arom_atoms = ['O', 'P', 'N', 'C', 'S'] res_graph = nx.Graph() for i in range(len(atoms)): for k in range(i, len(atoms)): if (not i == k) and is_pi_bonded(atoms[i], atoms[k]): if atoms[i].element in arom_atoms and atoms[k].element in arom_atoms: res_graph.add_edge(i, k) res_graph.nodes[i]["element"] = atoms[i].element res_graph.nodes[i]["coords"] = atoms[i].coord res_graph.nodes[k]["element"] = atoms[k].element res_graph.nodes[k]["coords"] = atoms[k].coord return res_graph def _add_eta_moiety(self, residue): '''Gets the smiles string for an automatically identified non-protein eta moiety, and adds the residue to the eta_moieties dictionary. Parameters ---------- residue: Bio.PDB.Residue.Residue Customized residue object generated for automatically detected eta moiety ''' if extract_resname(residue) not in clusters + list(metal_ligands.keys()) and "CUST" not in residue.resname: res_graph = self._get_residue_graph(residue) try: smiles_str = write_smiles(res_graph) except Exception as e: smiles_str = "unknown" residue.smiles = smiles_str elif extract_resname(residue) in metal_ligands: atm_name = next(residue.get_atoms()).name atm_name = atm_name[0] + atm_name[1].lower() charge = metal_ligands[extract_resname(residue)] smiles_str = "[" + atm_name if charge > 0: smiles_str += '+{}'.format(charge) elif charge < 0: smiles_str += '-{}'.format(charge) smiles_str += ']' residue.smiles = smiles_str self.eta_moieties[residue.resname] = residue
[docs] def visualize_pathway_in_nglview(self,ptid,view): '''Visualize pathway in nglview widget Parameters ---------- ptid: str Pathway ID to be visualized view: :class:`nglview.widget.NGLWidget` NGL Viewer widget ''' pt = self.paths[ptid] for i in range(0,len(pt.selection_strs)): first_atm_select = pt.selection_strs[i][:pt.selection_strs[i].index(')')+1]+"]" view.add_representation('ball+stick',sele=pt.selection_strs[i],color=pt.color_list[i]) view.add_representation('label',color="black",sele=first_atm_select,labelText=pt.label_texts[i])
def _visualize_pathway(self, pathway, yens): colors = {"F": "orange", "Y": "blue", "W": "red", "H": "green"} selection_strs = [] color_list = [] labeled_atoms = [] label_texts = [] for res in pathway.get_path_as_list()[1:-1]: label_texts.append(res) try: if res not in self.eta_moieties: color_list.append(colors[res[0]]) labeled_atoms.append(".CA") else: color_list.append("pink") labeled_atoms.append(next(self.residues[res].get_atoms()).name) except KeyError: color_list.append("pink") labeled_atoms.append(next(self.residues[res].get_atoms()).name) selection_strs.append(self.residues[res].ngl_string) color_list[0] = "yellow" if yens: color_list[-1] = "turquoise" pathway.set_visualization(selection_strs, color_list, labeled_atoms, label_texts) def _add_residue(self, residue): residue.ngl_string = self._get_ngl_string(residue) if residue.resname in res_name_to_smiles: residue.smiles = res_name_to_smiles[residue.resname.upper()] self.residues[residue.node_label] = residue chain_id = residue.full_id[2] if chain_id in self.active_chains: self.active_chains[chain_id].append(residue) else: self.active_chains[chain_id] = [residue] def _get_ngl_string(self, residue): """Returns NGL selection string for residue Parameters ---------- residue: Bio.PDB.Residue.Residue object Returns ------- select_string: str NGL selection string for residue """ select_string = "" atm_list = list(residue.get_atoms()) first_atm = atm_list[0] if hasattr(first_atm, "original_id"): id = first_atm.original_id else: id = first_atm.full_id select_string += "(" + str(id[3][1]) + " and :" + str( id[2]) + " and ^" + residue.id[2] + " and ." + first_atm.name + ")" for i in range(1, len(atm_list)): atm = atm_list[i] if hasattr(atm, "original_id"): id = first_atm.original_id else: id = atm.full_id select_string += " or " select_string += "(" + str(id[3][1]) + " and :" + str( id[2]) + " and ^" + residue.id[2] + " and ." + atm.name + ")" return select_string
[docs] def residue_to_file(self, resname, dest="", size=(100, 100)): '''Saves image of residue to file in .svg format. Parameters ---------- resname: str Name of residue (node label) to be saved to file. dest: str, optional destination to save the image size: (float,float), optional dimensions of image saved to file ''' try: from rdkit import Chem from rdkit.Chem.Draw import rdMolDraw2D except (ModuleNotFoundError, ImportError) as e: raise ModuleNotFoundError( "RDKit is required for visualization of chemical structures. See https://www.rdkit.org/docs/Install.html" ) from e if resname in self.residues or resname in self.eta_moieties: if "CUST" in resname: raise KeyError("Not available for user defined residues.") elif resname[:3] in clusters: cluster_img_name = os.path.abspath( os.path.dirname(__file__)) + '/data/clusters/' + resname[:3] + '.svg' if dest: target_name = dest else: target_name = resname + ".svg" copyfile(cluster_img_name, target_name) else: try: try: mol = Chem.MolFromSmiles(self.eta_moieties[resname].smiles) except Exception: mol = Chem.MolFromSmiles(self.residues[resname].smiles) d2d = rdMolDraw2D.MolDraw2DSVG(size[0], size[1]) d2d.DrawMolecule(mol) d2d.FinishDrawing() if dest: with open(dest, 'w') as f: f.write(d2d.GetDrawingText()) else: with open(resname + '.svg', 'w') as f: f.write(d2d.GetDrawingText()) except Exception as e: raise PyeMapException("Could not draw residue: {}".format(resname)) from e else: raise KeyError("No record of any residue by that name.")
[docs] def residue_to_Image(self, resname, scale=1.0): '''Returns PIL image of chemical structure. Parameters ----------- resname: str Name of residue scale: float, optional Output scaling factor, default dimensions are (100,100) Returns -------- img: :class:`PIL.Image.Image` ''' try: from rdkit import Chem from rdkit.Chem.Draw import rdMolDraw2D except (ModuleNotFoundError, ImportError) as e: raise ModuleNotFoundError( "RDKit is required for visualization of chemical structures. See https://www.rdkit.org/docs/Install.html" ) from e if resname in self.residues or resname in self.eta_moieties: if "CUST" in resname: raise KeyError("Not available for user defined residues.") elif resname[:3] in clusters: dest = os.path.abspath( os.path.dirname(__file__)) + '/data/clusters/' + resname[:3] + '.png' img = Image.open(dest) return img try: try: mol = Chem.MolFromSmiles(self.eta_moieties[resname].smiles) except Exception: mol = Chem.MolFromSmiles(self.residues[resname].smiles) d2d = rdMolDraw2D.MolDraw2DCairo(100, 100) d2d.DrawMolecule(mol) d2d.FinishDrawing() dest = tempfile.NamedTemporaryFile(suffix=".png").name d2d.WriteDrawingText(dest) img = Image.open(dest) return img except Exception as e: raise PyeMapException("Could not draw residue: {}".format(resname)) from e else: raise KeyError("No record of any residue by that name.")
def _graph_to_file(self, G, dest=""): '''Saves image of graph generated by process step to file. Parameters ---------- dest: str Destination for writing to file. ''' if G: try: import pygraphviz agraph = to_agraph(G) agraph.graph_attr.update(ratio=1.0, overlap="rc", mode="ipsep", splines="true") if agraph.number_of_nodes() <= 200: try: agraph.layout(prog='neato', args="-Gepsilon=0.01 -Gmaxiter=50") except Exception as e: raise RuntimeError("There was a problem with Graphviz. See https://graphviz.gitlab.io/") from e else: try: agraph.layout(prog='dot') except Exception as e: raise RuntimeError("There was a problem with Graphviz. See https://graphviz.gitlab.io/") from e if dest: svg_fn = dest + '.svg' png_fn = dest + '.png' agraph.draw(svg_fn, prog='neato', args="-Gepsilon=0.01 -Gmaxiter=50") agraph.draw(png_fn, prog='neato', args="-Gepsilon=0.01 -Gmaxiter=50") else: png_fn = self.file_path[:-4] + "_graph.png" agraph.draw(png_fn, prog='neato', args="-Gepsilon=0.01 -Gmaxiter=50") except (ModuleNotFoundError, ImportError) as e: warnings.warn("Unable to import pygraphviz. See: http://pygraphviz.github.io/. Falling back on matplotlib...") import matplotlib.pyplot as plt draw_mpl_graph(G) if dest: svg_fn = dest + '.svg' png_fn = dest + '.png' plt.savefig(svg_fn) plt.savefig(png_fn) else: png_fn = self.file_path[:-4] + "_graph.png" plt.savefig(png_fn) plt.clf() else: raise RuntimeError("Nothing to draw.")
[docs] def init_graph_to_file(self, dest=""): '''Saves image of graph generated by process step to file. Parameters ---------- dest: str Destination for writing to file. ''' return self._graph_to_file(self.init_graph, dest)
[docs] def paths_graph_to_file(self, dest=""): '''Saves image of graph generated by pathways step to file. Parameters ---------- dest:str Destination for writing to file. ''' return self._graph_to_file(self.paths_graph, dest)
[docs] def get_surface_exposed_residues(self): '''Returns list of surface exposed residues. Returns ------- surface_exposed: list of str List of surface exposed residues identified by pyemap ''' if self.init_graph: surface_exposed = [] for n, d in self.init_graph.nodes(data=True): if d['shape'] == "box": surface_exposed.append(n) return surface_exposed else: raise RuntimeError("No graph found. Please run pyemap.process(my_emap) to generate the graph.")
def _report_header(self): full_str = "" full_str += "Generated:\n" + str(datetime.datetime.now()) + "\n" full_str += "Parameters:\n{}\n".format(str(self._process_params)) full_str += "Included residues:\n" full_str += str(self._include_residues) + "\n" full_str += "Active chains:\n" full_str += str(list(self.active_chains.keys())) + "\n" full_str += "Included non protein moieties:\n" full_str += str(list(self.eta_moieties.keys())) + "\n" custom_res_atms = [] custom_res_names = [] for key, val in self.user_residues.items(): custom_res_names.append(key) custom_res_atms.append([atm.serial_number for atm in val]) custom_res_dict = dict(zip(custom_res_names, custom_res_atms)) full_str += "User defined residues:\n{}\n".format(custom_res_dict) return full_str
[docs] def report(self, dest=""): '''Returns report of most probable pathways. Writes to file if destination is specified. Parameters ----------- dest: str, optional Destination for writing to file Returns -------- output: str Formatted report of pathways found Raises ------- RuntimeError Nothing to report ''' if self.branches: output = self._report_header() + "\nPathways:\n" for br in self.branches.values(): output += str(br) + "\n" if dest: fi = open(dest, "w") fi.write(output) return output else: return output else: raise RuntimeError("Nothing to report.")
def _graph_to_Image(self, G): '''Returns PIL image of graph Returns -------- img: :class:`PIL.Image.Image` ''' if G: fout = tempfile.NamedTemporaryFile(suffix=".png") try: import pygraphviz agraph = to_agraph(G) agraph.graph_attr.update(ratio=1.0, overlap="ipsep", mode="ipsep", splines="true") try: agraph.layout(prog='neato', args="-Gepsilon=0.01 -Gmaxiter=50") except Exception as e: raise RuntimeError("There was a problem with Graphviz. See https://graphviz.gitlab.io/") from e if agraph.number_of_nodes() <= 200: agraph.draw(fout.name, prog='neato') else: agraph.draw(fout.name, prog='dot') except (ModuleNotFoundError, ImportError) as e: import matplotlib.pyplot as plt warnings.warn('Unable to import pygraphviz. See: http://pygraphviz.github.io/. Falling back on matplotlib...') draw_mpl_graph(G) plt.savefig(fout) plt.clf() img = Image.open(fout.name) return img else: raise RuntimeError("Nothing to draw.")
[docs] def init_graph_to_Image(self): '''Returns PIL image of initial graph Returns -------- img: :class:`PIL.Image.Image` ''' return self._graph_to_Image(self.init_graph)
[docs] def paths_graph_to_Image(self): '''Returns PIL image of pathways graph Returns -------- img: :class:`PIL.Image.Image` ''' return self._graph_to_Image(self.paths_graph)