<a href="https://colab.research.google.com/github/vprobon/iLIR-ML-data/blob/main/fetch_AF2disorder_mobidb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!cp LIRcentral_22Apr2024-UniqueIDs-Canonical.txt  uniprot_ids.txt
!cat uniprot_ids.txt

In [None]:
import requests
import json
import csv

# Function to read UniProt IDs from a file
def read_uniprot_ids(file_path):
    with open(file_path, 'r') as file:
        uniprot_ids = file.read().splitlines()
    return uniprot_ids

# Function to query MobiDB for AlphaFold-disorder results
def query_mobidb(uniprot_id):
    url = f"https://mobidb.bio.unipd.it/api/download?acc={uniprot_id}&format=json"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Function to extract prediction-disorder-alphafold values from JSON
def extract_alphafold_disorder(json_data):
    if 'prediction-disorder-alphafold' in json_data:
        return json_data['prediction-disorder-alphafold']
    else:
        return None

# Function to save all results to a single CSV file
def save_all_results(file_path, results):
    with open(file_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['UniProt ID', 'AlphaFold Disorder Predictions'])
        for uniprot_id, disorder_values in results.items():
            writer.writerow([uniprot_id, disorder_values])

def main(uniprot_ids_file, output_file):
    uniprot_ids = read_uniprot_ids(uniprot_ids_file)
    results = {}
    for uniprot_id in uniprot_ids:
        if '-' in uniprot_id:
          continue
        print(f"Querying MobiDB for UniProt ID: {uniprot_id}")
        json_data = query_mobidb(uniprot_id)
        if json_data:
            disorder_values = extract_alphafold_disorder(json_data)
            if disorder_values:
                results[uniprot_id] = disorder_values
                #print(f"Results found for UniProt ID: {uniprot_id}")
            else:
                print(f"No AlphaFold disorder values for UniProt ID: {uniprot_id}")
        else:
            print(f"Failed to retrieve results for UniProt ID: {uniprot_id}")

    save_all_results(output_file, results)
    print(f"All results saved to {output_file}")

# Example usage
uniprot_ids_file = 'uniprot_ids.txt'  # Replace with your file path containing UniProt IDs
output_file = 'alphafold_disorder_predictions.csv'  # Output file to save results
main(uniprot_ids_file, output_file)


Querying MobiDB for UniProt ID: A0A3Q9JIX6
No AlphaFold disorder values for UniProt ID: A0A3Q9JIX6
Querying MobiDB for UniProt ID: A1Z995
Querying MobiDB for UniProt ID: A5HBY5
No AlphaFold disorder values for UniProt ID: A5HBY5
Querying MobiDB for UniProt ID: B4F9B1
Querying MobiDB for UniProt ID: B9EIS5
No AlphaFold disorder values for UniProt ID: B9EIS5
Querying MobiDB for UniProt ID: C4R8D7
Querying MobiDB for UniProt ID: D0NBE6
Querying MobiDB for UniProt ID: G5EC37
Querying MobiDB for UniProt ID: H6WNF3
No AlphaFold disorder values for UniProt ID: H6WNF3
Querying MobiDB for UniProt ID: M1BJF6
Querying MobiDB for UniProt ID: O13709
Querying MobiDB for UniProt ID: O14641
Querying MobiDB for UniProt ID: O15040
Querying MobiDB for UniProt ID: O15111
Querying MobiDB for UniProt ID: O15392
Querying MobiDB for UniProt ID: O15553
Querying MobiDB for UniProt ID: O60238
Querying MobiDB for UniProt ID: O75143
Querying MobiDB for UniProt ID: O75164
Querying MobiDB for UniProt ID: O75376
Quer

In [None]:
!wc -l alphafold_disorder_predictions.csv
!tail

123 alphafold_disorder_predictions.csv
