Example Python script for retrieving genome coordinates with protein API

In [1]:
import requests, sys
import ijson
import pandas as pd
import json






class ParseCoordinatesJson():

   
    def __init__(self,url,geneName,taxId,pageSize):
        self.coordinatesURL = url
        self.geneName = geneName
        self.taxId = taxId
        self.pageSize = pageSize
      
      
      
    def processGene(self,gene):
        for g in gene:
            if(g["type"] == "primary"):
                return g["value"]  
        return "No_Primary"
    
    
    
    def processExon(self,exon):
        exonDat = []
        exonDat.append(exon["id"])
        exonDat.append(exon["genomeLocation"]["begin"]["position"])
        exonDat.append(exon["genomeLocation"]["end"]["position"])
        exonDat.append(exon["proteinLocation"]["begin"]["position"])
        exonDat.append(exon["proteinLocation"]["end"]["position"]) 
        return exonDat
    
    
    
      
    def parseGenomeJson(self,coordinates): 
        pData = []

        for row in coordinates:
            accession = row["accession"]
            geneName = self.processGene(row["gene"])
            taxId = row["taxid"]
            chromosome = row["gnCoordinate"][0]["genomicLocation"]["chromosome"]
            chrStart = row["gnCoordinate"][0]["genomicLocation"]["start"]
            chrEnd = row["gnCoordinate"][0]["genomicLocation"]["end"]
            ensg = row["gnCoordinate"][0]["ensemblGeneId"]
            enst = row["gnCoordinate"][0]["ensemblTranscriptId"]
            ensp = row["gnCoordinate"][0]["ensemblTranslationId"]
            
            if(row["gnCoordinate"][0]["genomicLocation"]["reverseStrand"]):
                strand = "-" 
            else:
                strand ="+" 

            exons = row["gnCoordinate"][0]["genomicLocation"]["exon"]
            for e in exons:
                processedRow = []
                processedRow.append(accession)
                processedRow.append(geneName)
                processedRow.append(taxId)
                processedRow.append(ensg)
                processedRow.append(enst)
                processedRow.append(ensp)
                processedRow.append(chromosome)
                processedRow.append(chrStart)
                processedRow.append(chrEnd)
                processedRow.append(strand)
                processedRow.append(len(exons))  
                processedRow.extend(self.processExon(e))
                pData.append(processedRow)
             
        return pData
    
    
    
    
    def processFeature(self,feat):
        featDat = []
        featDat.append(feat["genomeLocation"]["begin"]["position"])
        featDat.append(feat["genomeLocation"]["end"]["position"])
        featDat.append(feat["location"]["begin"]["position"])
        featDat.append(feat["location"]["end"]["position"])
        
        evidence = feat["evidence"]
        mergedEvid = []
        for e in evidence:
            if("dbReference" in e):
                evidVal = e["dbReference"]["type"] + " " + e["dbReference"]["id"] + " " + e["code"]
            else:    
                evidVal = e["code"] 
            mergedEvid.append(evidVal)
        featDat.append(mergedEvid)
        return featDat
    

    def parseFeatureJson(self,coordinates,featureType): 
        
        fData = []


       
        #print(coordinates)
        
        for row in coordinates:
            
            
            accession = row["accession"]
            geneName = self.processGene(row["gene"])
            taxId = row["taxid"]
            chromosome = row["gnCoordinate"][0]["genomicLocation"]["chromosome"]
            chrStart = row["gnCoordinate"][0]["genomicLocation"]["start"]
            chrEnd = row["gnCoordinate"][0]["genomicLocation"]["end"]
            ensg = row["gnCoordinate"][0]["ensemblGeneId"]
            enst = row["gnCoordinate"][0]["ensemblTranscriptId"]
            ensp = row["gnCoordinate"][0]["ensemblTranslationId"]
            
            if(row["gnCoordinate"][0]["genomicLocation"]["reverseStrand"]):
                strand = "-" 
            else:
                strand ="+" 
            
            
            #print(row) 
            features = row["gnCoordinate"][0]["feature"]
            
            fcnt = 1
            for f in features:
                
                # Only want to get the specified featureType (annotation Type)
                if(f["type"] != featureType):
                    continue
                
                processedRow = []
                
                processedRow.append(accession)
                processedRow.append(geneName)
                processedRow.append(taxId)
                processedRow.append(ensg)
                processedRow.append(enst)
                processedRow.append(ensp)
                processedRow.append(chromosome)
                processedRow.append(chrStart)
                processedRow.append(chrEnd)
                processedRow.append(strand)
                processedRow.append(featureType)
                processedRow.append("feature #" + str(fcnt))
                fcnt = fcnt + 1
                
                processedRow.extend(self.processFeature(f))  
                                
            
                fData.append(processedRow)
             
        return fData 


    # Carries out the REST get request and returns a combined list of all the results    
    def jsonRequest(self):
        
        # Format get URL
        geneArg = "gene=" + self.geneName
        taxIdArg = "taxid=" + self.taxId
        urlFormat = "format=json"
        startCount = 0
        resultsCount = startCount
        maxResultsPerPage = self.pageSize
        startAt = "from=" + str(startCount)
        resultsPerPage = "size=" + str(maxResultsPerPage)
        entries = []
        
        #
        fullUrl = self.coordinatesURL + "?" + geneArg + "&" + taxIdArg + "&" \
        + startAt + "&" + resultsPerPage + "&" + urlFormat 
        #print(fullUrl)
        responseFail = 0
               
        while True:
            response = requests.get(fullUrl)
            if(self.checkResponse(response)):
                responseFail = 0
                jsonResponse = json.loads(response.text)
                entries.extend(jsonResponse)
                resultsCount += maxResultsPerPage
                
                # Check if we need to do another page
                if response.headers["x-pagination-totalrecords"] is None or (resultsCount >= int(response.headers["x-pagination-totalrecords"])):
                    break
                if int(response.headers["x-pagination-totalrecords"]) % self.pageSize == 0:
                    print(response.headers)
                
                # Update page offsets and reformat get URL
                startAt = "from=" + str(resultsCount)
                fullUrl = self.coordinatesURL + "?" + geneArg + "&" + taxIdArg + "&" \
                + startAt + "&" + resultsPerPage + "&" + urlFormat 
                
            # Stop requesting from server if have 10 fails
            elif(responseFail == 10):
                break
            # Record a failed response from server
            else:
                responseFail += 1
        print("total results count = " + response.headers["x-pagination-totalrecords"])     
        response.close()
        return entries
    


    # Checks the server response code is correct 
    def checkResponse(self,resp):
        if(resp.status_code == 200):
            return True
        else:
            return False     
    
    
    
if __name__ == '__main__':
    
    coordinatesURL = "https://www.ebi.ac.uk/proteins/api/coordinates"
    geneName = "fgfr2"
    taxId = "9606"
    pageSize = 100
   
    main = ParseCoordinatesJson(coordinatesURL,geneName,taxId,pageSize)
    fgfr2Data = main.jsonRequest()
   
    genomeMapping = main.parseGenomeJson(fgfr2Data)
      
    for gm in genomeMapping:
        print(gm)
        
    feature = "disulfide bond"   
    featureMapping = main.parseFeatureJson(fgfr2Data,feature)   
    
    for fm in featureMapping:
        print(fm) 

    exit()    

total results count = 12
['A0A087WY21', 'FGFR2', 9606, 'ENSG00000066468', 'ENST00000613324', 'ENSP00000481464', '10', 121564495, 121593764, '-', 3, 'ENSE00003724297', 121593709, 121593764, 1, 19]
['A0A087WY21', 'FGFR2', 9606, 'ENSG00000066468', 'ENST00000613324', 'ENSP00000481464', '10', 121564495, 121593764, '-', 3, 'ENSE00003488412', 121565438, 121565704, 19, 108]
['A0A087WY21', 'FGFR2', 9606, 'ENSG00000066468', 'ENST00000613324', 'ENSP00000481464', '10', 121564495, 121593764, '-', 3, 'ENSE00003743246', 121564495, 121564579, 108, 136]
['D2CGD1', 'FGFR2', 9606, 'ENSG00000066468', 'ENST00000613048', 'ENSP00000484154', '10', 121479860, 121593817, '-', 16, 'ENSE00003567716', 121593709, 121593817, 1, 37]
['D2CGD1', 'FGFR2', 9606, 'ENSG00000066468', 'ENST00000613048', 'ENSP00000484154', '10', 121479860, 121593817, '-', 16, 'ENSE00003637083', 121564502, 121564579, 37, 63]
['D2CGD1', 'FGFR2', 9606, 'ENSG00000066468', 'ENST00000613048', 'ENSP00000484154', '10', 121479860, 121593817, '-', 16, 

In [7]:
%%HTML
<html class="no-js" lang="">
    <head>
        <meta charset="utf-8">
        <meta http-equiv="x-ua-compatible" content="ie=edge">
        <title>protvista-track-demo</title>
        <meta name="description" content="">
        <meta name="viewport" content="width=device-width, initial-scale=1">

        <link rel="apple-touch-icon" href="apple-touch-icon.png">
        <!-- Place favicon.ico in the root directory -->
        <style>
        svg {
          background: #def;
        }
        div.rule {
          display: flex;
          justify-content: space-evenly;
        }
        div.rule div{
          background: #ccc;
          flex-grow: 1;
          text-align: center;
        }
        div.rule div:nth-child(even){
          background: #eee;
        }
        </style>

    </head>
    <body>
      <table>
        <tr>
          <th>FROM</th><th>Track</th><th>TO</th>
        </tr>
        <tr>
        <tr>
          <td>1</td>
          <td width="100%">
            <protvista-track
              id= "track5"
              length="3"
              displaystart="1" displayend="3"
              highlightstart="2" highlightend="2"
              layout="non-overlapping"
              shape="roundRectangle"
              height="420px">
            </protvista-track>
            <div class="rule">
              <div>1</div>
              <div>2</div>
              <div>3</div>
            </div>
          </td>
          <td>3</td>
        </tr>
      </table>
      <script src="https://d3js.org/d3.v4.min.js" charset="utf-8" defer></script>

      <!-- Required for IE11 -->
      <script src="../node_modules/babel-polyfill/dist/polyfill.min.js"></script>
      <!-- Web component polyfill (only loads what it needs) -->
      <script src="../node_modules/@webcomponents/webcomponentsjs/webcomponents-lite.js" charset="utf-8"></script>
      <!-- Required to polyfill modern browsers as code is ES5 for IE... -->
      <script src="../node_modules/@webcomponents/webcomponentsjs/custom-elements-es5-adapter.js" charset="utf-8"></script>

      <script src="../node_modules/protvista-zoomable/dist/protvista-zoomable.js" charset="utf-8" defer></script>
      <script src="../dist/protvista-track.js" charset="utf-8" defer></script>

      <script>
      window.onload = () => {
        const data = [
          {accession:'feature1', start: 10, end: 40, color: '#342ea2'},
          {accession:'feature2', locations: [{ fragments:[{start: 50, end: 160}]}], color: '#A42ea2'},
          {accession:'feature3', locations: [{
            fragments:[{start: 155, end: 155}]},
            {fragments:[{start: 158, end: 158}]}
          ], color: '#A4Aea2'},
          {accession:'feature4', locations: [
            { fragments:[
              {start: 200, end: 204},
              {start: 206, end: 210}
            ]}
          ]},
        ];
        // document.querySelector("#track1").data = data;
        // document.querySelector("#track1b").data = data;
        // document.querySelector("#track1c").data = data;
        // document.querySelector("#track2").data = data;
        // document.querySelector("#track3").data = data;
        // document.querySelector("#track4").data = [
        //   {accession:'feature1', start: 1, end: 2, color: '#3e3'},
        //   {accession:'feature1', start: 2, end: 3, color: '#e33'},
        // ];
        document.querySelector("#track5").data = [
          {accession:'feature1', start: 1, end: 1, color: '#339'},
          {accession:'feature2', start: 2, end: 2, color: '#33d'},
          {accession:'feature3', start: 3, end: 3, color: '#336'},
          {accession:'feature4', start: 1, end: 2, color: '#73d'},
          {accession:'feature5', start: 2, end: 3, color: '#63e'},
          {accession:'feature6', start: 2, end: 2, color: '#35d', shape: 'rectangle'},
          {accession:'feature7', start: 2, end: 2, color: '#3a3', shape: 'bridge'},
          {accession:'feature8', start: 1, end: 2, color: '#3a3', shape: 'bridge'},
          {accession:'feature9', start: 2, end: 2, color: '#33d', shape: 'diamond'},
          {accession:'feature10', start: 2, end: 3, color: '#33d', shape: 'diamond'},
          {accession:'feature11', start: 2, end: 2, color: '#3a3', shape: 'chevron'},
          {accession:'feature12', start: 1, end: 2, color: '#3a3', shape: 'chevron'},
          {accession:'feature13', start: 2, end: 2, color: '#d3d', shape: 'catFace'},
          {accession:'feature14', start: 2, end: 3, color: '#d3d', shape: 'catFace'},
          {accession:'feature15', start: 2, end: 2, color: '#3a3', shape: 'triangle'},
          {accession:'feature16', start: 1, end: 2, color: '#3a3', shape: 'triangle'},
          {accession:'feature17', start: 2, end: 2, color: '#d3d', shape: 'wave'},
          {accession:'feature18', start: 2, end: 3, color: '#d3d', shape: 'wave'},
          {accession:'feature15', start: 2, end: 2, color: '#3a3', shape: 'hexagon'},
          {accession:'feature16', start: 1, end: 2, color: '#3a3', shape: 'hexagon'},
          {accession:'feature17', start: 2, end: 2, color: '#d3d', shape: 'pentagon'},
          {accession:'feature18', start: 2, end: 3, color: '#d3d', shape: 'pentagon'},
          {accession:'feature19', start: 2, end: 2, color: '#3a3', shape: 'circle'},
          {accession:'feature20', start: 1, end: 2, color: '#3a3', shape: 'circle'},
          {accession:'feature21', start: 2, end: 2, color: '#d3d', shape: 'arrow'},
          {accession:'feature22', start: 2, end: 3, color: '#d3d', shape: 'arrow'},
          {accession:'feature19', start: 2, end: 2, color: '#3a3', shape: 'doubleBar'},
          {accession:'feature20', start: 1, end: 2, color: '#3a3', shape: 'doubleBar'},
        ];
      }
      </script>
    </body>
</html>

FROM,Track,TO
,,
1.0,1  2  3,3.0


In [18]:
%%HTML
<!doctype html>
<html class="no-js" lang="">
    <head>
        <meta charset="utf-8">
        <meta http-equiv="x-ua-compatible" content="ie=edge">
        <title>protvista-track-demo</title>
        <meta name="description" content="">
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <link type="text/css" rel="stylesheet" href="../styles/protvistaConfig.css"/>
    </head>
    <body>
      <table>
        <tr>
          <th>FROM</th><th>Track</th><th>TO</th>
        </tr>
        <tr>
          <td>1</td>
          <td width="100%">
            <protvista-track id="track1" length="770" displaystart="1" displayend="770" tooltip-event="click">
              <protvista-feature-adapter id="adapter1">
                <data-loader>
                  <source src="https://www.ebi.ac.uk/proteins/api/features/P05067?categories=PTM" />
                </data-loader>
              </protvista-feature-adapter>
            </protvista-track>
          </td>
          <td>770</td>
        </tr>
      </table>
      <script src="https://d3js.org/d3.v4.min.js" charset="utf-8" defer></script>
      <!-- Required for IE11 -->
      <script src="../node_modules/babel-polyfill/dist/polyfill.min.js"></script>
      <!-- Web component polyfill (only loads what it needs) -->
      <script src="../node_modules/@webcomponents/webcomponentsjs/webcomponents-lite.js" charset="utf-8"></script>
      <!-- Required to polyfill modern browsers as code is ES5 for IE... -->
      <script src="../node_modules/@webcomponents/webcomponentsjs/custom-elements-es5-adapter.js" charset="utf-8"></script>

      <script src="../node_modules/data-loader/dist/index.js" charset="utf-8" defer></script>
      <script src="../node_modules/protvista-uniprot-entry-adapter/dist/ProtVistaUniProtEntryAdapter.js" charset="utf-8" defer></script>
      <script src="../node_modules/protvista-feature-adapter/dist/ProtVistaFeatureAdapter.js" charset="utf-8" defer></script>
      <script src="../node_modules/protvista-zoomable/dist/protvista-zoomable.js" charset="utf-8" defer></script>
      <script src="../dist/protvista-track.js" charset="utf-8" defer></script>
    </body>
</html>

FROM,Track,TO
1,,770


In [None]:
from IPython.core.display import HTML

def _set_css_style(css_file_path):
   """
   Read the custom CSS file and load it into Jupyter.
   Pass the file path to the CSS file.
   """

   styles = open(css_file_path, "r").read()
   s = '<style>%s</style>' % styles     
   return HTML(s)

In [22]:
%%javascript
<script src="https://d3js.org/d3.v4.min.js" charset="utf-8" defer></script>
 <!-- Required for IE11 -->
      <script src="https://cdn.jsdelivr.net/npm/babel-polyfill/dist/polyfill.min.js"></script>
      <!-- Web component polyfill (only loads what it needs) -->
      <script src="https://cdn.jsdelivr.net/npm/@webcomponents/webcomponentsjs/webcomponents-lite.js" charset="utf-8"></script>
      <!-- Required to polyfill modern browsers as code is ES5 for IE... -->
      <script src="https://cdn.jsdelivr.net/npm/@webcomponents/webcomponentsjs/custom-elements-es5-adapter.js" charset="utf-8"></script>

      <script src="https://cdn.jsdelivr.net/npm/protvista-navigation@latest/dist/protvista-navigation.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-zoomable@latest/dist/protvista-zoomable.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-tooltip@0.0.2/dist/protvista-tooltip.min.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-track@latest/dist/protvista-track.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-interpro-track@latest/dist/protvista-interpro-track.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-sequence@latest/dist/protvista-sequence.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-variation@latest/dist/protvista-variation.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-variation-filter@latest/dist/protvista-variation-filter.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/data-loader/dist/index.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/uniprot-entry-data-adapter/dist/UniProtEntryDataAdapter.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-feature-adapter/dist/ProtVistaFeatureAdapter.js" charset="utf-8" defer></script>
      <script src="https://cdn.jsdelivr.net/npm/protvista-manager@latest/dist/protvista-manager.js" charset="utf-8" defer></script>
      <script>
      </script>
     

<IPython.core.display.Javascript object>

In [24]:
%%HTML
<!doctype html>
<html class="no-js" lang="">
    <head>
        <meta charset="utf-8">
        <meta http-equiv="x-ua-compatible" content="ie=edge">
        <title>protvista-manager-demo</title>
        <meta name="description" content="">
        <meta name="viewport" content="width=device-width, initial-scale=1">

        <link rel="apple-touch-icon" href="apple-touch-icon.png">
        <!-- Place favicon.ico in the root directory -->

    </head>
    <body>
      <protvista-manager attributes="length displaystart displayend highlightstart highlightend variantfilters" id="example">
        <protvista-navigation length="223" displaystart="1" displayend="158" highlightStart="10" highlightEnd="45"></protvista-navigation>
        <protvista-sequence id="seq1" length="223" displaystart="1" displayend="158"></protvista-sequence>
        <!-- <protvista-track id="track1" length="223" displaystart="1" displayend="158" highlightstart="10" highlightend="45"></protvista-track>
        <protvista-track id="track2" length="223" displaystart="1" displayend="158" highlightstart="10" highlightend="45" layout="non-overlapping"></protvista-track> -->
        <protvista-track id="track1" length="770" displaystart="1" displayend="770">
          <protvista-feature-adapter id="adapter1">
            <data-loader>
              <source src="https://www.ebi.ac.uk/proteins/api/features/P05067?categories=PTM" />
            </data-loader>
          </protvista-feature-adapter>
        </protvista-track>
        <protvista-track id= "track2" length="223" displaystart="1" displayend="158" highlightstart="10" highlightend="45" layout="non-overlapping"></protvista-track>
        <protvista-interpro-track id="track3" length="223" displaystart="1" displayend="158" highlightstart="10" highlightend="45"></protvista-interpro-track>
        <protvista-variation filters="disease" length="223" displaystart="1" displayend="158" highlightstart="10" highlightend="45">
          <data-loader>
            <source src="https://www.ebi.ac.uk/proteins/api/variation/P05067" />
          </data-loader>
        </protvista-variation>
        <protvista-variation-filter></protvista-variation-filter>
      </protvista-manager>

      
    </body>
</html>

In [26]:
%%HTML
<html class="no-js" lang="">

<head>
    <meta charset="utf-8">
    <meta http-equiv="x-ua-compatible" content="ie=edge">
    <title></title>
    <meta name="description" content="">
    <meta name="viewport" content="width=device-width, initial-scale=1">

    <link rel="apple-touch-icon" href="apple-touch-icon.png">
    <link rel="stylesheet" type="text/css" href="https://cdnjs.cloudflare.com/ajax/libs/foundation/6.4.3/css/foundation.min.css">
    <!-- Place favicon.ico in the root directory -->
</head>

<body>
    <div>
        <protvista-structure accession="P06493" highlightstart="290" highlightend="310"></protvista-structure>

        <!-- Required for IE11 -->
        <script src="../node_modules/babel-polyfill/dist/polyfill.min.js" defer></script>
        <!-- Web component polyfill (only loads what it needs) -->
        <script src="../node_modules/@webcomponents/webcomponentsjs/webcomponents-lite.js" charset="utf-8" defer></script>
        <!-- Required to polyfill modern browsers as code is ES5 for IE... -->
        <script src="../node_modules/@webcomponents/webcomponentsjs/custom-elements-es5-adapter.js" charset="utf-8" defer></script>

        <link rel="stylesheet" href="../dist/css/LiteMol-plugin.min.css" type="text/css" defer/>
        <script src="../dist/protvista-structure.js" charset="utf-8" defer></script>
</body>

</html>