# postgres info

In [None]:
psql -h localhost -p 5432 -U postgres -d postgres

# get SQL statement

In [None]:
table_name = "protmodcon"  # Replace with your table name
columns = ",\n  ".join([f"ADD COLUMN ann{i} text" for i in range(1, 101)])
sql = f"ALTER TABLE {table_name}\n  {columns};"
print(sql)

# Insert protein_id_position_AA

In [None]:
# Create csv on servers
import json
import pandas as pd

# Load your data
with open('protein_id_position_AA.json', 'r') as f:
    protein_id_position_AA = json.load(f)

with open('protein_id_annotation_position.json', 'r') as f:
    protein_id_annotation_position = json.load(f)

rows = []

# From protein_id_position_AA.json (using AA as annotation)
for key, aa in protein_id_position_AA.items():
    protein_id, position = key.rsplit('_', 1)
    rows.append({'protein_id': protein_id, 'position': position, 'annotation': aa})

# From protein_id_annotation_position.json (using actual annotation)
for protein_id, annotations in protein_id_annotation_position.items():
    for annotation, positions in annotations.items():
        for position in positions:
            rows.append({'protein_id': protein_id, 'position': str(position), 'annotation': annotation})

# Optional: Deduplicate
df = pd.DataFrame(rows)
df = df.drop_duplicates(subset=['protein_id', 'position', 'annotation'])

# Export to CSV or use as needed
df.to_csv('protmodcon.csv', index=False)
print(df.head())

In [12]:
import csv
import psycopg2
from psycopg2.extras import execute_values
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Update these with your actual credentials
DB_NAME = "postgres"
DB_USER = "postgres"
DB_PASSWORD = "postgrespassword"
DB_HOST = "localhost"
CSV_FILE = "protmodcon.csv"  # Path to your CSV file

def load_csv_to_postgres(csv_file):
    # Read CSV
    with open(csv_file, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        rows = [ (row['protein_id'], row['position'], row['annotation']) for row in reader ]

    logger.info(f"Read {len(rows)} rows from {csv_file}")

    # Connect to PostgreSQL
    conn = psycopg2.connect(
        dbname=DB_NAME,
        user=DB_USER,
        password=DB_PASSWORD,
        host=DB_HOST
    )
    cur = conn.cursor()

    # Bulk insert
    query = """
        INSERT INTO protmodcon (protein_id, position, annotation)
        VALUES %s
        ON CONFLICT (protein_id, position, annotation) DO NOTHING
    """
    execute_values(cur, query, rows, page_size=1000)
    conn.commit()
    cur.close()
    conn.close()
    logger.info(f"Inserted {len(rows)} rows into PostgreSQL table protmodcon.")

#if __name__ == "__main__":
#    load_csv_to_postgres(CSV_FILE)

2025-05-27 10:44:40,026 - INFO - Read 28210695 rows from protmodcon.csv
2025-05-27 10:53:49,197 - INFO - Inserted 28210695 rows into PostgreSQL table protmodcon.


In [16]:
pw = '?upHL4O#ROMLfa4Exuqo!hLWrLbr87if'

In [20]:
import requests

url = "http://localhost:8080/v1/graphql"

# Query for Tyrosine (Y)
query_tyrosine = '''
query {
  protmodcon(where: { annotation: { _eq: "S" } }) {
    protein_id
    position
  }
}
'''

# Query for Phospho ([21]Phospho)
query_phospho = '''
query {
  protmodcon(where: { annotation: { _eq: "[21]Phospho" } }) {
    protein_id
    position
  }
}
'''

headers = {
    'x-hasura-admin-secret': pw
}

def run_query(query):
    response = requests.post(url, json={'query': query}, headers=headers)
    response.raise_for_status()
    print(response.json())
    return response.json()['data']['protmodcon']

# Fetch both result sets
tyrosine_rows = run_query(query_tyrosine)
phospho_rows = run_query(query_phospho)

# Create sets of (protein_id, position) pairs
tyrosine_keys = set((row['protein_id'], row['position']) for row in tyrosine_rows)
phospho_keys = set((row['protein_id'], row['position']) for row in phospho_rows)

# Compute the intersection
intersection = tyrosine_keys & phospho_keys

print(f"Number of Tyrosine residues that are phosphorylated: {len(intersection)}")

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Number of Tyrosine residues that are phosphorylated: 67034
