# Week 3 Deployment

In [1]:
!pip install fastapi uvicorn nest_asyncio pyngrok matplotlib networkx

Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.12-py3-none-any.whl (26 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.12


In [3]:
!pip install pyranges

Collecting pyranges
  Downloading pyranges-0.1.4-py3-none-any.whl.metadata (3.7 kB)
Collecting ncls>=0.0.63 (from pyranges)
  Downloading ncls-0.0.70-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting sorted_nearest>=0.0.33 (from pyranges)
  Downloading sorted_nearest-0.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (964 bytes)
Downloading pyranges-0.1.4-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m72.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ncls-0.0.70-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m89.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sorted_nearest-0.0.39-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m120.0 MB/s[0m eta [36m0:00:00

In [4]:
import os
import tarfile
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import re
import pyranges as pr

In [5]:
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import FileResponse
import tempfile
import matplotlib.pyplot as plt
import networkx as nx

# Replace with your actual function logic

def get_coexp_network_updated(query, iMARGI_files, freq):
    df = pd.read_csv(iMARGI_files, sep='\t', comment='#', header=None)
    gene_names = pd.read_csv("./genes_df.csv")
    genes = gene_names[['Chromosome','Start','End','gene_name']]

    # if feeding the function with all 10 columns
    if df.shape[1] == 10:

        # assign column names
        df.columns = [
            "RNA_chr", "RNA_start", "RNA_end",
            "DNA_chr", "DNA_start", "DNA_end",
            "name", "score", "RNA_strand", "DNA_strand"
        ]


        # left join RNA names with hg38 reference
        rna_annot = genes.rename(columns={
        'Chromosome':'RNA_chr',
        'Start'     :'RNA_start',
        'End'       :'RNA_end',
        'gene_name' :'RNA_gene_name'})

        df = df.merge(
            rna_annot,
            how = 'left',
            on = ['RNA_chr', 'RNA_start', 'RNA_end']
        )

        # doing the same for DNA names
        dna_annot = genes.rename(columns={
            'Chromosome':'DNA_chr',
            'Start'     :'DNA_start',
            'End'       :'DNA_end',
            'gene_name' :'DNA_gene_name'})

        df = df.merge(
            dna_annot,
            how = 'left',
            on = ['DNA_chr', 'DNA_start', 'DNA_end']
        )


        # when query is DNA_coordinate
        if query[:3]== 'chr':

            query_type = 'coordinate'
            query_chr = query.split(':')[0]
            coord = query.split(':')[1].replace(',','').replace('–', '-')
            query_start, query_end = map(int, coord.split('-'))

            # getting the df with all the DNA interactions
            # that match the query RNA coordinate
            query_df = df[(df['RNA_chr'] == query_chr) &
            (df['RNA_start'] <= query_end) &
            (df['RNA_end'] >= query_start)
            ]

            # add 'DNA_coord'
            query_df['DNA_coord'] = (
                query_df['DNA_chr'].astype(str)
                + ':'
                + query_df['DNA_start'].astype(str)
                + '-'
                + query_df['DNA_end'].astype(str)
            )

            # due to lack of time/effort
            # for now only checking the
            # occurances of different 'DNA_start' to showcase vc

            vc = query_df[['RNA_chr', 'DNA_coord']].value_counts().reset_index(name='count')
            vc = vc.sort_values(by='count', ascending=False).head(100)

            G = nx.Graph()
            G.add_node(query)

            for _, row in vc.iterrows():
                dna_label = row['DNA_coord']
                G.add_node(dna_label, count=row['count'])
                G.add_edge(query, dna_label, weight=row['count'])

            plt.figure(figsize=(14, 12))
            pos = nx.spring_layout(G, k=0.6, seed=42)

            node_sizes = []
            node_colors = []

            for node in G.nodes():
                if node == query:
                    node_sizes.append(800)
                    node_colors.append('red')
                else:
                    count = G.nodes[node].get("count", 1)
                    node_sizes.append(100 + count * 10)
                    # Highlight if count > frequency
                    if count > freq:
                        node_colors.append('orange')  # Highlight color
                    else:
                        node_colors.append('skyblue')  # Default

        # when query is RNA_gene_name
        else:
            if query not in df['RNA_gene_name'].values:
                print(f'Sorry, gene name {query} not found in dataset.')

            else:
                query_df = df[df['RNA_gene_name'] == query][['RNA_gene_name', 'DNA_gene_name']]


                vc = query_df.value_counts().reset_index(name='count')
                vc = vc.sort_values(by='count', ascending=False).head(100)

                G = nx.Graph()
                G.add_node(query)

                for _, row in vc.iterrows():
                    dna_label = row['DNA_gene_name']
                    G.add_node(dna_label, count=row['count'])
                    G.add_edge(query, dna_label, weight=row['count'])

                plt.figure(figsize=(14, 12))
                pos = nx.spring_layout(G, k=0.6, seed=42)

                node_sizes = []
                node_colors = []

                for node in G.nodes():
                    if node == query:
                        node_sizes.append(800)
                        node_colors.append('red')
                    else:
                        count = G.nodes[node].get("count", 1)
                        node_sizes.append(100 + count * 10)
                        # Highlight if count > frequency
                        if count > freq:
                            node_colors.append('orange')  # Highlight color
                        else:
                            node_colors.append('skyblue')


        # Draw the graph
        plt.figure(figsize=(14, 12))
        pos = nx.spring_layout(G, k=0.6, seed=42)

        nx.draw(
            G, pos,
            with_labels=True,
            node_size=node_sizes,
            node_color=node_colors,
            edge_color='gray',
            font_size=8
        )

        plt.title(f"{query} Interaction Network (Highlighting count > {freq})", fontsize=16)
        plt.axis('off')

        return plt.gcf()

    # if feeding the dataset with gene info already
    else:

        # iMARGI datasets with more than 10 cols
        # usually contains the additional info
        # this is to locate the targeted columns
        # if the datasets contain more/less than 10 columns


        gene_info_cols = []
        # save the column names that contain
        # addtional DNA/RNA info into a list

        for col in df.columns:
            sample_values = df[col].dropna().astype(str).head(20)

            match_ratio = sum(
                bool(re.match(r'^(?:[^|]*\|){2,}[^|]*$', val)) for val in sample_values
            ) / len(sample_values)

            # setting the matchin pattern to be
            # at least 70% matching
            if match_ratio > 0.7:
                gene_info_cols.append(col)


        # rename the columns
        df.rename(
            columns=dict(zip(gene_info_cols, ["RNA_gene_info", "DNA_gene_info"])),
            inplace=True
            )


        # the first 6 columns usually
        # follow this pattern
        df.columns = [
            "RNA_chr", "RNA_start", "RNA_end",
            "DNA_chr", "DNA_start", "DNA_end",
            *df.columns[6:].astype(str)
            ]

        # create a new df with the preferred columns
        df = df[["RNA_chr", "RNA_start", "RNA_end",
            "DNA_chr", "DNA_start", "DNA_end",
            "RNA_gene_info", "DNA_gene_info"]]

        # filter rows where both gene info fields have exactly 2 pipe characters (i.e., 3 parts)

        df_rna_filtered = df[df['RNA_gene_info'].str.count(r'\|')==2]

        # now do the same for DNA
        df_all_filtered = df_rna_filtered[df_rna_filtered['DNA_gene_info'].str.count(r'\|') == 2]

        df = df_all_filtered

        # Split RNA_gene_info into 3 new columns
        df[['RNA_gene_id', 'RNA_gene_name', 'RNA_gene_type']] = df['RNA_gene_info'].str.split('|', expand=True)

        # Split DNA_gene_info into 3 new columns
        df[['DNA_gene_id', 'DNA_gene_name', 'DNA_gene_type']] = df['DNA_gene_info'].str.split('|', expand=True)


        # if the query matches dna_coordinates
        if query[:3]== 'chr':
            query_type = 'coordinate'
            query_chr = query.split(':')[0]
            coord = query.split(':')[1].replace(',','').replace('–', '-')
            query_start, query_end = map(int, coord.split('-'))

            # getting the df with all the DNA interactions
            # that match the query RNA coordinate
            query_df = df[(df['RNA_chr'] == query_chr) &
            (df['RNA_start'] <= query_end) &
            (df['RNA_end'] >= query_start)
            ]

            vc = query_df['DNA_gene_name'].value_counts().reset_index(name='count')
            vc = vc.sort_values(by='count', ascending=False).head(100)

            G = nx.Graph()
            G.add_node(query)

            for _, row in vc.iterrows():
                dna_label = f"{row['DNA_gene_name']}"
                G.add_node(dna_label, count=row['count'])

                G.add_edge(query, dna_label, weight=row['count'])

            plt.figure(figsize=(14, 12))
            pos = nx.spring_layout(G, k=0.6, seed=42)

            node_sizes = []
            node_colors = []

            for node in G.nodes():
                if node == query:
                    node_sizes.append(800)
                    node_colors.append('red')
                else:
                    count = G.nodes[node].get("count", 1)
                    node_sizes.append(100 + count * 10)
                    # Highlight if count > frequency
                    if count > freq:
                        node_colors.append('orange')  # Highlight color
                    else:
                        node_colors.append('skyblue')  # Default


        # if the query is RNA_gene_name
        else:
            if query not in df['RNA_gene_name'].values:
                print(f'Sorry, gene name {query} not found in dataset.')

            else:

                query_df = df[df['RNA_gene_name'] == query][['DNA_gene_name']]


                vc = query_df.value_counts().reset_index(name='count')
                vc = vc.sort_values(by='count', ascending=False).head(100)

                G = nx.Graph()
                G.add_node(query)

                for _, row in vc.iterrows():
                    dna_label = row['DNA_gene_name']
                    G.add_node(dna_label, count=row['count'])
                    G.add_edge(query, dna_label, weight=row['count'])

                plt.figure(figsize=(14, 12))
                pos = nx.spring_layout(G, k=0.6, seed=42)

                node_sizes = []
                node_colors = []

                for node in G.nodes():
                    if node == query:
                        node_sizes.append(800)
                        node_colors.append('red')
                    else:
                        count = G.nodes[node].get("count", 1)
                        node_sizes.append(100 + count * 10)
                        # Highlight if count > frequency
                        if count > freq:
                            node_colors.append('orange')  # Highlight color
                        else:
                            node_colors.append('skyblue')





        # Draw the graph
        plt.figure(figsize=(14, 12))
        pos = nx.spring_layout(G, k=0.6, seed=42)

        nx.draw(
            G, pos,
            with_labels=True,
            node_size=node_sizes,
            node_color=node_colors,
            edge_color='gray',
            font_size=8
        )

        plt.title(f"{query} Interaction Network (Highlighting count > {freq})", fontsize=16)
        plt.axis('off')

        return plt.gcf()




In [7]:
!ngrok config add-authtoken 30ZLq86f381eckwFAmLZAyQKBZV_4YxLjbngtusfgnzGbEueC

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [10]:
from pyngrok import ngrok

ngrok.set_auth_token("30ZLq86f381eckwFAmLZAyQKBZV_4YxLjbngtusfgnzGbEueC")

In [16]:
ngrok_tunnel = ngrok.connect(8000)
print("🌐 Public URL:", ngrok_tunnel.public_url)

🌐 Public URL: https://763c865d10af.ngrok-free.app


In [14]:


app = FastAPI()

@app.post("/run")
async def run_analysis(
    file: UploadFile,
    query: str = Form(...),
    freq: int = Form(...)
):
    with tempfile.NamedTemporaryFile(delete=False) as tmp:
        tmp.write(await file.read())
        tmp_path = tmp.name

    fig = get_coexp_network_updated(query, tmp_path, int(freq))
    output_path = "/tmp/network.png"
    fig.savefig(output_path)
    plt.close(fig)
    return FileResponse(output_path, media_type="image/png")


nest_asyncio.apply()
uvicorn.run(app, port=8000)

INFO:     Started server process [3720]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [3720]


In [13]:
#print out the server name

print("the server web is https://f384df7c1d87.ngrok-free.app/")

the server web is https://f384df7c1d87.ngrok-free.app/


In [17]:
public_url = ngrok.connect(8000)
print("🔗 Public URL:", public_url)

# Start the server
nest_asyncio.apply()
uvicorn.run(app, port=8000)

🔗 Public URL: NgrokTunnel: "https://3ae80a7d756b.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [3720]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-13' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:69> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/main.py", line 580, in run
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 67, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
 

INFO:     64.52.136.117:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     64.52.136.117:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     64.52.136.117:0 - "POST / HTTP/1.1" 404 Not Found
INFO:     64.52.136.117:0 - "GET /run HTTP/1.1" 405 Method Not Allowed
INFO:     64.52.136.117:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     64.52.136.117:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     64.52.136.117:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     64.52.136.117:0 - "POST /run HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.11/dist-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.11/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.11/dist-packages/starlette/middleware/errors.py",

INFO:     64.52.136.117:0 - "POST /run HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.11/dist-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.11/dist-packages/starlette/middleware/errors.py", line 186, in __call__
    raise exc
  File "/usr/local/lib/python3.11/dist-packages/starlette/middleware/errors.py",