In [81]:
import json
import pandas as pd
import numpy as np
from yome import Session
from yome.models import *
from yome.util import to_df, report
import re
from sqlalchemy import or_, and_
from sqlalchemy.orm import aliased
import itertools as it
import seaborn as sns

In [17]:
from mpl_recipes import mpl_setup
%mpl_setup

Populating the interactive namespace from numpy and matplotlib


In [18]:
session = Session()

# Load putative transporter list from ecocyc

In [59]:
ec_putative_transporters = pd.read_table('../sources/ecocyc/ecocyc-putative-inner-membrane-transporters.tsv', header=0)

In [60]:
len(ec_putative_transporters)

163

In [61]:
ec_putative_transporters[ec_putative_transporters.duplicated(subset='primary_name')]

Unnamed: 0,primary_name,gene_product


# Get db Y-ome transporters

In [146]:
transporter_summary_html_strings = [
    'is a member of the Multi Antimicrobial Extrusion (MATE) Family of transporters',
]
transporter_strings = [
    'transporter',
    'antiporter',
    'exporter',
    'permease',
    'transport protein',
    'PTS enzyme',
    'efflux pump',
]

In [147]:
Knowledgebase2 = aliased(Knowledgebase)
KnowledgebaseGene2 = aliased(KnowledgebaseGene)
KnowledgebaseFeature2 = aliased(KnowledgebaseFeature)

In [148]:
transporters = to_df(
    session.query(
        Gene.locus_id,
        KnowledgebaseGene.primary_name,
        KnowledgebaseFeature.feature,
        Knowledgebase2.name,
        KnowledgebaseGene2.annotation_quality
    )
    .join(KnowledgebaseGene, KnowledgebaseGene.gene_id == Gene.id)
    .join(Knowledgebase, Knowledgebase.id == KnowledgebaseGene.knowledgebase_id)
    .join(KnowledgebaseFeature, KnowledgebaseFeature.knowledgebase_gene_id == KnowledgebaseGene.id)
    .filter(or_(
        and_(
            or_(*[KnowledgebaseFeature.feature.ilike(f'%{x}%') for x in transporter_strings]),
            KnowledgebaseFeature.feature_type.notin_(['summary_html']),
        ),
        and_(
            or_(*[KnowledgebaseFeature.feature.ilike(f'%{x}%') for x in transporter_summary_html_strings]),
            KnowledgebaseFeature.feature_type == 'summary_html',
        ),
    ))
    .join(KnowledgebaseGene2, KnowledgebaseGene2.gene_id == Gene.id)
    .join(Knowledgebase2, Knowledgebase2.id == KnowledgebaseGene2.knowledgebase_id)
    .filter(Knowledgebase2.name == 'Y-ome')
    .filter(KnowledgebaseGene2.annotation_quality == 'low')
)

In [151]:
transporters_ar = (
    transporters
    .groupby('primary_name')
    .agg(lambda x: list(x))
    .drop(['locus_id', 'name', 'annotation_quality'], axis=1)
)

In [152]:
len(transporters_ar)

220

In [161]:
'ydaN' in transporters_ar.index

False

# Merge

In [153]:
merged = transporters_ar.merge(ec_putative_transporters, how='outer', left_index=True, right_on='primary_name')

In [154]:
merged.head()

Unnamed: 0,feature,primary_name,gene_product
162,[DUF1656 family putative inner membrane efflux...,aaeX,
162,[CP4-6 prophage; ABC transporter ATP-binding p...,afuC,
162,"[L-asparagine transporter, transporter, L-aspa...",ansP,
0,[putative major facilitator superfamily transp...,araJ,putative major facilitator superfamily transpo...
162,[putative ABC transporter periplasmic binding ...,artI,


In [168]:
print(len(merged[~merged.gene_product.isnull()]))

163


In [169]:
print(len(merged[~merged.feature.isnull()]))

220


In [170]:
len(merged[~merged.gene_product.isnull() & ~merged.feature.isnull()])

124

In [171]:
len(merged[merged.gene_product.isnull() & merged.feature.isnull()])

0

# set up for export

NOTES
- b3682 / glvB excluded as pseudogene
- b1342 / ydaN evidence from EcoCyc makes this not in the Y-ome "Assay of unpurified protein [Worlock02]"
- might want to pull some out like b4662 / sgrT

In [180]:
yome_transporters = (
    transporters
    .groupby('locus_id')
    .agg(lambda x: list(x))
    .drop(['name', 'annotation_quality'], axis=1)
    .reset_index()
    .rename(columns={'locus_id': 'locus_tag', 'feature': 'matched_features'})
)
yome_transporters.primary_name = yome_transporters.primary_name.apply(lambda x: x[0])
yome_transporters.matched_features = yome_transporters.matched_features.apply(lambda x: '; '.join(x))
yome_transporters.head()

Unnamed: 0,locus_tag,primary_name,matched_features
0,b0007,yaaJ,putative alanine/glycine:cation symporter fami...
1,b0045,yaaU,putative major facilitator superfamily transpo...
2,b0106,hofC,Protein transport protein HofC homolog
3,b0107,hofB,Protein transport protein HofB homolog
4,b0127,yadG,putative ABC transporter ATP-binding protein Y...


In [181]:
yome_transporters.to_csv('../yome_transporters.tsv', sep='\t', index=False)