Source code for pyemap.graph_mining.utils

## Copyright (c) 2017-2022, James Gayvert, Ruslan Tazhigulov, Ksenia Bravaya
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
#    list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
#    contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from ..data import char_to_res_name
from pysmiles import write_smiles, read_smiles
import networkx as nx
import math


def extract_chain(resname):
    try:
        return resname[resname.index('(') + 1:resname.index(")")]
    except Exception:
        return ''


def get_edge_label(G, edge, edge_thresholds):
    dist = G.edges[edge]['distance']
    try:
        label = 1
        for thresh in edge_thresholds:
            if dist < thresh:
                break
            else:
                label += 1
        return label
    except Exception:
        return 1



[docs]
def write_graph_smiles(generic_subgraph):
    ''' Returns pseudo-SMILES string for supplied graph.

    Parameters
    -----------
    generic_subgraph: :class:`networkx.Graph`
        Graph to be transformed into string representation. The :attr:`label` attribute 
        of each node should be set to the 1-letter amino acid code or special character.

    Returns
    --------
    pseudosmiles: str
        String representation of graph of interest
    '''
    G = generic_subgraph.copy()
    element_dict = {}
    num_chars = 0
    for i, node in enumerate(G.nodes):
        G.nodes[node]['element'] = 'C' + str(i)
        element_dict['C' + str(i)] = G.nodes[node]['label']
        num_chars += len(G.nodes[node]['label'])
    proper_smiles = write_smiles(G)
    for key, val in element_dict.items():
        proper_smiles = proper_smiles.replace(key, val)
    # linear case
    if len(proper_smiles.replace('[', '').replace(']', '')) == num_chars:
        return proper_smiles.replace('[', '').replace(']', '')
    else:
        return proper_smiles



def get_numerical_node_label(u, res_to_num_label):
    if strip_res_number(u) in char_to_res_name and strip_res_number(u) in res_to_num_label:
        res_name = strip_res_number(u)
        result = res_to_num_label[res_name]
    elif strip_res_number(u) in char_to_res_name:
        result = res_to_num_label['X']
    else:
        result = res_to_num_label["#"]
    return result


def strip_res_number(u):
    for i in range(0, len(u)):
        if u[i].isdigit():
            return u[:i]


def node_match(node1, node2):
    return node1['num_label'] == node2['num_label']


def edge_match(edge1, edge2):
    return edge1['num_label'] == edge2['num_label']


def get_graph_matcher(protein_graph, generic_subgraph):
    return nx.algorithms.isomorphism.GraphMatcher(protein_graph,
                                                  generic_subgraph,
                                                  node_match=node_match,
                                                  edge_match=edge_match)


def set_defaults(kwargs):
    default = {
        'distance_cutoff': 20,
        'max_degree': 4,
        'dist_def': 'COM',
        'sdef': 'RSA',
        'edge_prune': 'PERCENT',
        'percent_edges': 1.0,
        'num_st_dev_edges': 1.0,
        'rd_thresh': 3.03,
        'rsa_thresh': 0.2,
        'coef_alpha': 1.0,
        'exp_beta': 2.3,
        'r_offset': 0.0
    }
    for arg in default:
        if arg not in kwargs:
            kwargs[arg] = default[arg]
    return kwargs


def make_pretty_subgraph(sg):
    for name_node in sg.nodes():
        sg.nodes[name_node]['style'] = 'filled'
        sg.nodes[name_node]['fontname'] = 'Helvetica-Bold'
        sg.nodes[name_node]['fontsize'] = 14
        sg.nodes[name_node]['margin'] = '0.04'
        sg.nodes[name_node]['fontcolor'] = "#000000"
        sg.nodes[name_node]['color'] = '#708090'
        sg.nodes[name_node]['penwidth'] = 2.0
        if (len(sg.nodes[name_node]['label']) == 1) or (len(sg.nodes[name_node]['label']) > 1
                                                        and sg.nodes[name_node]['label'][1].isdigit()):
            if 'Y' == sg.nodes[name_node]['label'][0]:
                sg.nodes[name_node]['fillcolor'] = '#96c8f0'
            elif 'W' == sg.nodes[name_node]['label'][0]:
                sg.nodes[name_node]['fillcolor'] = '#f07878'
            elif 'F' == sg.nodes[name_node]['label'][0]:
                sg.nodes[name_node]['fillcolor'] = '#f09664'
            elif 'H' == sg.nodes[name_node]['label'][0]:
                sg.nodes[name_node]['fillcolor'] = '#c8f0c8'
            else:
                sg.nodes[name_node]['fillcolor'] = '#FFC0CB'
        else:
            sg.nodes[name_node]['fillcolor'] = '#FFC0CB'
    for edge in sg.edges:
        try:
            dist = '{0:.2f}'.format(sg.edges[edge]['distance'])
            sg.edges[edge]['len'] = 1.0 + math.log10(float(dist))
            sg.edges[edge]['label'] = dist
        except Exception:
            pass
        sg.edges[edge]['fontname'] = 'Helvetica'
        sg.edges[edge]['color'] = '#778899'
        sg.edges[edge]['penwidth'] = 1.5
        sg.edges[edge]['style'] = 'dashed'
    return sg


def nodes_and_edges_from_smiles(smiles_str, edge_thresholds=[], residue_categories=[]):
    ''' Returns all possible combinations of nodes and edges based on graph string and edge thresholds and residue categories.

    Parameters
    ----------
    graph_str: str
        Specification of graph
    edge_thresholds: list of float
        Edge thresholds
    residue_categories: list of str
        List of 1 letter amino acid codes
    '''
    if '[' not in smiles_str:
        new_smiles = ""
        for char in smiles_str:
            new_smiles += '[{}]'.format(char)
        smiles_str = new_smiles
    # replace some problematic characters
    if 'H' in smiles_str:
        smiles_str = smiles_str.replace('H', 'He')
    if '#' in smiles_str:
        smiles_str = smiles_str.replace('#', 'Np')
    base_graph = read_smiles(smiles_str)
    node_list = []
    for node in base_graph.nodes:
        try:
            if base_graph.nodes[node]['element'] == 'He':
                node_list.append('H')
            elif base_graph.nodes[node]['element'] == 'Np':
                node_list.append('#')
            else:
                node_list.append(base_graph.nodes[node]['element'])
        except Exception:
            node_list.append("*")
    l1 = []
    for i in range(0, len(edge_thresholds) + 1):
        l1.append(i + 1)
    from itertools import product
    edge_combs = list(product(l1, repeat=len(base_graph.edges)))
    edges = list(base_graph.edges)
    indices = [i for i, x in enumerate(node_list) if x == "*"]
    if len(indices) == 0:
        node_combs = [node_list]
    else:
        node_combs = []
        wildcard_combs = list(product(residue_categories, repeat=len(indices)))
        for comb in wildcard_combs:
            for i, idx in enumerate(indices):
                node_list[idx] = comb[i]
            node_combs.append(node_list.copy())
    return node_combs, edge_combs, edges