In [1]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time
from tqdm import tqdm
from abnumber import Chain

In [2]:
# Define Sample Path, Define Humanized Path
sample_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/sample_humanization_result.csv'
humanized_fpath = '/data/home/waitma/antibody_proj/antidiff/data/lab_data/humanization_pair_data.xlsx'

In [3]:
# Get DataFrame from the fpath.
sample_df = pd.read_csv(sample_fpath)
humanized_df = pd.read_excel(humanized_fpath)

In [4]:
# Define deal out-of-order table.
def regular_order_table(out_of_order_table):
    all_table_data = []
    for table in out_of_order_table:
        table_data = []
        for row in table.find_all('tr'):
            row_data = []
            for cell in row.find_all(['th', 'td']):
                row_data.append(cell.text)
            table_data.append(row_data)
        all_table_data.append(table_data)
    return all_table_data[:2]  # only the first two will be used, all is three.

# Define extract data. Only want to know wther the sequence can be viewed as human.
def extract_human_data(regular_table):
    extracted_data = []
    for table_data in regular_table:
        table_header = table_data[0]
        human_row = [None, None, None, None]
        for row in table_data:
            if row[-1] == 'HUMAN':
                human_row = row
        extracted_data.extend(human_row)
    return extracted_data

# Define request process.
def get_predict_result(job_name, h_seq, l_seq):
    # Url path
    humab_url = 'https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab'

    data = {
        'h_sequence_score': h_seq,
        'l_sequence_score': l_seq,
        'jobname_score': job_name
    }
    reponse = requests.post(humab_url, data=data)
    result_url = reponse.url
    print(result_url)

    # Need to wait a moment.
    time.sleep(15)

    # Get the result page.
    result_response = requests.get(result_url)

    if result_response.status_code == 200:
        soup = BeautifulSoup(result_response.text, 'html.parser')
        tables = soup.find_all('table', {'class': 'table table-results'})
        # print(tables)
        
        predict_table = regular_order_table(tables)
        print(predict_table)
        extract_data = extract_human_data(predict_table)
        print(extract_data)
    else:
        print('May be url has problem or need larger sleep time.')

    sequence_list = [h_seq, l_seq]
    return extract_data + sequence_list
    

In [6]:
# Get the sample sequence from data.
# Correct dataframe name
new_columns_name = {
    'Specific': 'Specific',
    'name': 'name',
    'HSEQ': 'hseq',
    'LSEQ': 'lseq'
}

sample_df = sample_df.rename(columns=new_columns_name)
sample_human_df = sample_df[sample_df['Specific'] == 'humanization'].reset_index(drop=True)
sample_human_df

Unnamed: 0,Specific,name,hseq,lseq,Unnamed: 4
0,humanization,3A3human_0,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...,
1,humanization,MAK195human_0,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...,
2,humanization,31#human_0,EVQLVQSGAEVKKPGESLKISCKASGYTFTDYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATISCKSSQSLLNTNSQKNYIAWYQQKP...,
3,humanization,56#human_0,QVQLQESGPRLVEPSETLSLTCTVSGYSITSDYDWGWIRQPPGRGL...,DTLLTQSPDHLSVTLGERATISCRASQNIGTSLNWYQQKPGQSPKR...,
4,humanization,74#human_0,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...,
...,...,...,...,...,...
311,humanization,149G11human_0,EVRLVQSGAEVKKPGESLTISCKISGYTFTNYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATFNCKSSQNLLYNSNQKSYLAWYQQKP...,
312,humanization,mu515H7human_0,EVQLVESGGGLVQPGGSLRLSCATSGFTFTDNYMDWVRQAPGKGLE...,IVLTQSPDSLAVSLGERATINCKSSQSLFNSRTRKNYLTWYQQKPG...,
313,humanization,mu16B5human_0,QVRLVQSGAEVKKPGASVGVSCKASGFNIKDIYIHWVRQVPGQGPE...,DIVMTQFPNSLAVSLGERATINCKSSQSLLNSRTRKNYLSWYQKKA...,
314,humanization,21B12human_0,QVQLVQSGSELKKPGASVKVSCKASGYTFTNYGLNWVRQAPGQGLE...,DIVMTQSPDSLAVSLGERATIDCKSSQSLLYSSNQKNYLTWYQQKP...,


In [7]:
filter_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/filter_humanization_result.csv'
filter_human_df = pd.read_csv(filter_fpath)
filter_human_df

Unnamed: 0,Specific,name,hseq,lseq,Unnamed: 4
0,humanization,3A3human_0,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...,
1,humanization,31#human_0,EVQLVQSGAEVKKPGESLKISCKASGYTFTDYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATISCKSSQSLLNTNSQKNYIAWYQQKP...,
2,humanization,74#human_0,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...,
3,humanization,SC73.38human_0,QVHLVQSGAEVKKPGASVKVSCKASGYSFTGYTIHWVRQAPGQGLE...,EIVMTQSPATLSVSPGETATLSCRASQSISNNLVWYQQKPDQAPRL...,
4,humanization,SC73.39human_0,QVQLVQSGAEVKKPGASVKVSCRTSGYSFTNYNFAWVRQAPRQGLE...,DIVMTQSPDSLAVSLGERATIKCKSSQSLLNSSNQKNYLAWYQQKP...,
...,...,...,...,...,...
111,humanization,RK22human_0,EVQLVQSGVEVKKPGESLKISCKGSGYTFTSYWIGWVRQMPGKGLE...,DIVVTQSPDSLAVSLGERATINCKSSQSLLNSANQKNYLAWYQQKP...,
112,humanization,49G8human_0,QVQLVQSGSEVKKPGASVKVSCQASGYTFADYEIHWMRQAPGQGPE...,DVVVTQSPLSLPVTLGQPASISCRSSQSIVHSNGNTYLDWFQQRPG...,
113,humanization,mu515H7human_0,EVQLVESGGGLVQPGGSLRLSCATSGFTFTDNYMDWVRQAPGKGLE...,IVLTQSPDSLAVSLGERATINCKSSQSLFNSRTRKNYLTWYQQKPG...,
114,humanization,mu16B5human_0,QVRLVQSGAEVKKPGASVGVSCKASGFNIKDIYIHWVRQVPGQGPE...,DIVMTQFPNSLAVSLGERATINCKSSQSLLNSRTRKNYLSWYQKKA...,


In [10]:
Chain(sample_df['lseq'][0], scheme='imgt').chain_type

'K'

In [12]:
# Define a new dataframe for saving results.
humab_df = pd.DataFrame(columns=['Raw_name', 'H-V-gene', 'H-Score', 'H-Threshold', 'H-Classification',
                                 'L-V-gene', 'L-Score', 'L-Threshold', 'L-Classification', 'H_seq', 'L_seq'])
# test_h = 'EVQLVESGGGLVQPGGSLRLSCAASGFTFSNFWMDWVRQAPGKGLEWIAGIRLKSYNYATHYAESVKGRFTISRDDSKSTVYLQMNSLRAEDTAVYYCTDWDGAYWGQGTLVTVSS'
# test_l = 'DIVMTQSPSSLSASVGDRVTITCKASQDVSTDVAWYQQKPGKSPKLLIYSASYRYTGVPSRFSGSGSGTDFTLTISSLQPEDIATYYCQQHYSTPFTFGQGTKLEIK'
Not_successful_index = []
for i, line in tqdm(enumerate(sample_human_df.iterrows())):
    h_seq = line[1]['hseq']
    l_seq = line[1]['lseq']

    # Make sure the chain type is K, else skip
    l_chain_type = Chain(l_seq, scheme='imgt').chain_type
    if l_chain_type == 'L' :
        continue
        
    name = [line[1]['name']]
    # print(name)
    job_name = line[1]['Specific'] + '_' + str(i)
    for retry in range(10):
        try:
            data = get_predict_result(job_name, h_seq, l_seq)
            if len(data) > 2:
                break
        except:
            time.sleep(5)
            continue
    if len(data) != 2:
        new_data = name + data
        new_line_df = pd.DataFrame([new_data], columns=humab_df.columns)
        humab_df = pd.concat([humab_df, new_line_df], ignore_index=True)
    else:
        Not_successful_index.append(i)
    
    

0it [00:00, ?it/s]

https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0820480


1it [00:20, 20.36s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.005', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.735', '0.575', 'HUMAN'], ['HV4', '0.000', '0.565', 'NOT HUMAN'], ['HV5', '0.000', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.900', '0.720', 'HUMAN'], ['KV2', '0.005', '0.720', 'NOT HUMAN'], ['KV3', '0.085', '0.720', 'NOT HUMAN'], ['KV4', '0.020', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV3', '0.735', '0.575', 'HUMAN', 'KV1', '0.900', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0036930
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0762731
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0662848


2it [01:45, 58.23s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.140', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.975', '0.575', 'HUMAN'], ['HV4', '0.020', '0.565', 'NOT HUMAN'], ['HV5', '0.015', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.010', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.730', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.010', '0.720', 'NOT HUMAN'], ['KV4', '0.030', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV3', '0.975', '0.575', 'HUMAN', 'KV1', '0.730', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0733558


3it [02:05, 41.00s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.670', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.185', '0.575', 'NOT HUMAN'], ['HV4', '0.075', '0.565', 'NOT HUMAN'], ['HV5', '1.000', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.080', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.010', '0.720', 'NOT HUMAN'], ['KV2', '0.125', '0.720', 'NOT HUMAN'], ['KV3', '0.085', '0.720', 'NOT HUMAN'], ['KV4', '0.985', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV5', '1.000', '0.520', 'HUMAN', 'KV4', '0.985', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0127961
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0439387
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0569298


4it [03:02, 47.17s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.025', '0.725', 'NOT HUMAN'], ['HV2', '0.030', '0.835', 'NOT HUMAN'], ['HV3', '0.040', '0.575', 'NOT HUMAN'], ['HV4', '0.995', '0.565', 'HUMAN'], ['HV5', '0.080', '0.520', 'NOT HUMAN'], ['HV6', '0.030', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.560', '0.720', 'NOT HUMAN'], ['KV2', '0.105', '0.720', 'NOT HUMAN'], ['KV3', '0.810', '0.720', 'HUMAN'], ['KV4', '0.295', '0.655', 'NOT HUMAN'], ['KV5', '0.025', '0.815', 'NOT HUMAN'], ['KV6', '0.190', '0.475', 'NOT HUMAN']]]
['HV4', '0.995', '0.565', 'HUMAN', 'KV3', '0.810', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0582062


5it [03:20, 36.74s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.540', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.240', '0.575', 'NOT HUMAN'], ['HV4', '0.120', '0.565', 'NOT HUMAN'], ['HV5', '1.000', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.070', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.175', '0.720', 'NOT HUMAN'], ['KV2', '0.180', '0.720', 'NOT HUMAN'], ['KV3', '0.125', '0.720', 'NOT HUMAN'], ['KV4', '0.950', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.005', '0.475', 'NOT HUMAN']]]
['HV5', '1.000', '0.520', 'HUMAN', 'KV4', '0.950', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0755411
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0352182
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0545713


6it [04:20, 44.82s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.385', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.210', '0.575', 'NOT HUMAN'], ['HV4', '0.010', '0.565', 'NOT HUMAN'], ['HV5', '0.055', '0.520', 'NOT HUMAN'], ['HV6', '0.005', '0.930', 'NOT HUMAN'], ['HV7', '0.770', '0.720', 'HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.940', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.075', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.015', '0.475', 'NOT HUMAN']]]
['HV7', '0.770', '0.720', 'HUMAN', 'KV1', '0.940', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0285580


7it [04:40, 36.45s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.995', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.210', '0.575', 'NOT HUMAN'], ['HV4', '0.005', '0.565', 'NOT HUMAN'], ['HV5', '0.135', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.205', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.865', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.030', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.005', '0.815', 'NOT HUMAN'], ['KV6', '0.060', '0.475', 'NOT HUMAN']]]
['HV1', '0.995', '0.725', 'HUMAN', 'KV1', '0.865', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0212814
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0510133
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0135733


8it [05:41, 44.51s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '1.000', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.175', '0.575', 'NOT HUMAN'], ['HV4', '0.015', '0.565', 'NOT HUMAN'], ['HV5', '0.225', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.245', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.315', '0.720', 'NOT HUMAN'], ['KV2', '0.030', '0.720', 'NOT HUMAN'], ['KV3', '1.000', '0.720', 'HUMAN'], ['KV4', '0.065', '0.655', 'NOT HUMAN'], ['KV5', '0.005', '0.815', 'NOT HUMAN'], ['KV6', '0.080', '0.475', 'NOT HUMAN']]]
['HV1', '1.000', '0.725', 'HUMAN', 'KV3', '1.000', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0217295


9it [06:00, 36.53s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.995', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.135', '0.575', 'NOT HUMAN'], ['HV4', '0.010', '0.565', 'NOT HUMAN'], ['HV5', '0.170', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.230', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.025', '0.720', 'NOT HUMAN'], ['KV2', '0.145', '0.720', 'NOT HUMAN'], ['KV3', '0.050', '0.720', 'NOT HUMAN'], ['KV4', '0.985', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV1', '0.995', '0.725', 'HUMAN', 'KV4', '0.985', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0880493
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0671881
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0339657


10it [06:58, 43.15s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.645', '0.725', 'NOT HUMAN'], ['HV2', '0.005', '0.835', 'NOT HUMAN'], ['HV3', '0.265', '0.575', 'NOT HUMAN'], ['HV4', '0.050', '0.565', 'NOT HUMAN'], ['HV5', '1.000', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.090', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.035', '0.720', 'NOT HUMAN'], ['KV2', '0.090', '0.720', 'NOT HUMAN'], ['KV3', '0.080', '0.720', 'NOT HUMAN'], ['KV4', '0.940', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV5', '1.000', '0.520', 'HUMAN', 'KV4', '0.940', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0701860


11it [07:18, 35.91s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.660', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.375', '0.575', 'NOT HUMAN'], ['HV4', '0.060', '0.565', 'NOT HUMAN'], ['HV5', '1.000', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.100', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.025', '0.720', 'NOT HUMAN'], ['KV2', '0.070', '0.720', 'NOT HUMAN'], ['KV3', '0.035', '0.720', 'NOT HUMAN'], ['KV4', '0.925', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.005', '0.475', 'NOT HUMAN']]]
['HV5', '1.000', '0.520', 'HUMAN', 'KV4', '0.925', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0500595
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0764032
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0500377


12it [08:15, 42.21s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.015', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.980', '0.575', 'HUMAN'], ['HV4', '0.010', '0.565', 'NOT HUMAN'], ['HV5', '0.000', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.870', '0.720', 'HUMAN'], ['KV2', '0.085', '0.720', 'NOT HUMAN'], ['KV3', '0.250', '0.720', 'NOT HUMAN'], ['KV4', '0.330', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.030', '0.475', 'NOT HUMAN']]]
['HV3', '0.980', '0.575', 'HUMAN', 'KV1', '0.870', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0141001


13it [08:34, 35.31s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.995', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.075', '0.575', 'NOT HUMAN'], ['HV4', '0.005', '0.565', 'NOT HUMAN'], ['HV5', '0.080', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.275', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '1.000', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.005', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV1', '0.995', '0.725', 'HUMAN', 'KV1', '1.000', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0784031
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0480596
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0525127


14it [09:31, 41.82s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.000', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '1.000', '0.575', 'HUMAN'], ['HV4', '0.000', '0.565', 'NOT HUMAN'], ['HV5', '0.000', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.025', '0.720', 'NOT HUMAN'], ['KV2', '0.070', '0.720', 'NOT HUMAN'], ['KV3', '0.030', '0.720', 'NOT HUMAN'], ['KV4', '1.000', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.005', '0.475', 'NOT HUMAN']]]
['HV3', '1.000', '0.575', 'HUMAN', 'KV4', '1.000', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0483575


15it [09:50, 35.10s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.000', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.945', '0.575', 'HUMAN'], ['HV4', '0.000', '0.565', 'NOT HUMAN'], ['HV5', '0.000', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.020', '0.720', 'NOT HUMAN'], ['KV2', '0.090', '0.720', 'NOT HUMAN'], ['KV3', '0.045', '0.720', 'NOT HUMAN'], ['KV4', '1.000', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV3', '0.945', '0.575', 'HUMAN', 'KV4', '1.000', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0105593
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0946746
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0046732


16it [10:54, 43.66s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.000', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.980', '0.575', 'HUMAN'], ['HV4', '0.000', '0.565', 'NOT HUMAN'], ['HV5', '0.000', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.015', '0.720', 'NOT HUMAN'], ['KV2', '1.000', '0.720', 'HUMAN'], ['KV3', '0.005', '0.720', 'NOT HUMAN'], ['KV4', '0.020', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV3', '0.980', '0.575', 'HUMAN', 'KV2', '1.000', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0314620


17it [11:22, 38.85s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.020', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.920', '0.575', 'HUMAN'], ['HV4', '0.005', '0.565', 'NOT HUMAN'], ['HV5', '0.005', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.995', '0.720', 'HUMAN'], ['KV2', '0.005', '0.720', 'NOT HUMAN'], ['KV3', '0.000', '0.720', 'NOT HUMAN'], ['KV4', '0.005', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV3', '0.920', '0.575', 'HUMAN', 'KV1', '0.995', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0337526
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0868816
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0046540


18it [12:21, 45.11s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.005', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.955', '0.575', 'HUMAN'], ['HV4', '0.005', '0.565', 'NOT HUMAN'], ['HV5', '0.005', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.825', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.015', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.010', '0.475', 'NOT HUMAN']]]
['HV3', '0.955', '0.575', 'HUMAN', 'KV1', '0.825', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0231441


19it [12:40, 37.24s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.620', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.355', '0.575', 'NOT HUMAN'], ['HV4', '0.090', '0.565', 'NOT HUMAN'], ['HV5', '0.965', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.125', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.045', '0.720', 'NOT HUMAN'], ['KV2', '0.880', '0.720', 'HUMAN'], ['KV3', '0.040', '0.720', 'NOT HUMAN'], ['KV4', '0.050', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.010', '0.475', 'NOT HUMAN']]]
['HV5', '0.965', '0.520', 'HUMAN', 'KV2', '0.880', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0791376
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0635896
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0607670


20it [13:38, 43.49s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.985', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.160', '0.575', 'NOT HUMAN'], ['HV4', '0.025', '0.565', 'NOT HUMAN'], ['HV5', '0.335', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.310', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.820', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.040', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.005', '0.475', 'NOT HUMAN']]]
['HV1', '0.985', '0.725', 'HUMAN', 'KV1', '0.820', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0587213


21it [13:58, 36.26s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.540', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.245', '0.575', 'NOT HUMAN'], ['HV4', '0.070', '0.565', 'NOT HUMAN'], ['HV5', '0.985', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.050', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.945', '0.720', 'HUMAN'], ['KV2', '0.010', '0.720', 'NOT HUMAN'], ['KV3', '0.210', '0.720', 'NOT HUMAN'], ['KV4', '0.030', '0.655', 'NOT HUMAN'], ['KV5', '0.005', '0.815', 'NOT HUMAN'], ['KV6', '0.030', '0.475', 'NOT HUMAN']]]
['HV5', '0.985', '0.520', 'HUMAN', 'KV1', '0.945', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0314702
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0904921
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0747296


22it [14:55, 42.53s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.010', '0.725', 'NOT HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.990', '0.575', 'HUMAN'], ['HV4', '0.005', '0.565', 'NOT HUMAN'], ['HV5', '0.005', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.000', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.995', '0.720', 'HUMAN'], ['KV2', '0.005', '0.720', 'NOT HUMAN'], ['KV3', '0.095', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV3', '0.990', '0.575', 'HUMAN', 'KV1', '0.995', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0818180


23it [15:18, 36.65s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.920', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.090', '0.575', 'NOT HUMAN'], ['HV4', '0.000', '0.565', 'NOT HUMAN'], ['HV5', '0.115', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.205', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.915', '0.720', 'HUMAN'], ['KV2', '0.005', '0.720', 'NOT HUMAN'], ['KV3', '0.025', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.005', '0.475', 'NOT HUMAN']]]
['HV1', '0.920', '0.725', 'HUMAN', 'KV1', '0.915', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0309271
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0079973
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0291702


24it [16:15, 42.92s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.980', '0.725', 'HUMAN'], ['HV2', '0.075', '0.835', 'NOT HUMAN'], ['HV3', '0.195', '0.575', 'NOT HUMAN'], ['HV4', '0.070', '0.565', 'NOT HUMAN'], ['HV5', '0.200', '0.520', 'NOT HUMAN'], ['HV6', '0.100', '0.930', 'NOT HUMAN'], ['HV7', '0.200', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.105', '0.720', 'NOT HUMAN'], ['KV2', '0.590', '0.720', 'NOT HUMAN'], ['KV3', '0.525', '0.720', 'NOT HUMAN'], ['KV4', '0.305', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.010', '0.475', 'NOT HUMAN']]]
['HV1', '0.980', '0.725', 'HUMAN', None, None, None, None]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0375312


25it [16:38, 36.98s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.995', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.210', '0.575', 'NOT HUMAN'], ['HV4', '0.005', '0.565', 'NOT HUMAN'], ['HV5', '0.130', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.180', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.870', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.010', '0.720', 'NOT HUMAN'], ['KV4', '0.000', '0.655', 'NOT HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV1', '0.995', '0.725', 'HUMAN', 'KV1', '0.870', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0108547
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0124030
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0777423


26it [17:38, 43.81s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '1.000', '0.725', 'HUMAN'], ['HV2', '0.000', '0.835', 'NOT HUMAN'], ['HV3', '0.155', '0.575', 'NOT HUMAN'], ['HV4', '0.025', '0.565', 'NOT HUMAN'], ['HV5', '0.075', '0.520', 'NOT HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.165', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.030', '0.720', 'NOT HUMAN'], ['KV2', '0.135', '0.720', 'NOT HUMAN'], ['KV3', '0.045', '0.720', 'NOT HUMAN'], ['KV4', '0.990', '0.655', 'HUMAN'], ['KV5', '0.000', '0.815', 'NOT HUMAN'], ['KV6', '0.005', '0.475', 'NOT HUMAN']]]
['HV1', '1.000', '0.725', 'HUMAN', 'KV4', '0.990', '0.655', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0793375


27it [17:58, 36.67s/it]

[[['V-gene', 'Score', 'Threshold', 'Classification'], ['HV1', '0.330', '0.725', 'NOT HUMAN'], ['HV2', '0.005', '0.835', 'NOT HUMAN'], ['HV3', '0.150', '0.575', 'NOT HUMAN'], ['HV4', '0.065', '0.565', 'NOT HUMAN'], ['HV5', '0.980', '0.520', 'HUMAN'], ['HV6', '0.000', '0.930', 'NOT HUMAN'], ['HV7', '0.130', '0.720', 'NOT HUMAN']], [['V-gene', 'Score', 'Threshold', 'Classification'], ['KV1', '0.960', '0.720', 'HUMAN'], ['KV2', '0.000', '0.720', 'NOT HUMAN'], ['KV3', '0.115', '0.720', 'NOT HUMAN'], ['KV4', '0.005', '0.655', 'NOT HUMAN'], ['KV5', '0.005', '0.815', 'NOT HUMAN'], ['KV6', '0.000', '0.475', 'NOT HUMAN']]]
['HV5', '0.980', '0.520', 'HUMAN', 'KV1', '0.960', '0.720', 'HUMAN']
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0564821
[]
[]
https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231208_0974774
[]
[]


27it [18:45, 41.67s/it]

KeyboardInterrupt



In [53]:
humab_df.head()

Unnamed: 0,Raw_name,H-V-gene,H-Score,H-Threshold,H-Classification,L-V-gene,L-Score,L-Threshold,L-Classification,H_seq,L_seq
0,3A3human_0,HV3,0.735,0.575,HUMAN,KV1,0.9,0.72,HUMAN,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...
1,MAK195human_0,HV3,0.975,0.575,HUMAN,KV1,0.73,0.72,HUMAN,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...
2,74#human_0,HV5,1.0,0.52,HUMAN,KV4,0.95,0.655,HUMAN,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...
3,11E6human_0,HV7,0.77,0.72,HUMAN,KV1,0.94,0.72,HUMAN,QVQLVQSESELKKPGASVRISCMASGYTFTNFGLNWVRQAPGQGFE...,AVQLFQSPSSVSASVGDRVTITCRASQNVGTAIAWYQQIKGKAPKP...
4,SC73.39human_0,HV1,0.995,0.725,HUMAN,KV4,0.985,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCRTSGYSFTNYNFAWVRQAPRQGLE...,DIVMTQSPDSLAVSLGERATIKCKSSQSLLNSSNQKNYLAWYQQKP...


In [47]:
# both_h_l_human = humab_df[humab_df['H-Classification'] == 'HUMAN' and humab_df['L-Classification'] == 'HUMAN']
human_h, human_l = humab_df['H-Classification'], humab_df['L-Classification']
both_h_l_index =  (human_h == 'HUMAN').astype(bool) & (human_l == 'HUMAN').astype(bool)
both_h_l_df = humab_df[both_h_l_index]

In [49]:
only_h_df = humab_df[human_h == 'HUMAN']
only_l_df = humab_df[human_l == 'HUMAN']

In [51]:
only_h_df

Unnamed: 0,Raw_name,H-V-gene,H-Score,H-Threshold,H-Classification,L-V-gene,L-Score,L-Threshold,L-Classification,H_seq,L_seq
0,3A3human_0,HV3,0.735,0.575,HUMAN,KV1,0.900,0.720,HUMAN,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...
1,MAK195human_0,HV3,0.975,0.575,HUMAN,KV1,0.730,0.720,HUMAN,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...
2,74#human_0,HV5,1.000,0.520,HUMAN,KV4,0.950,0.655,HUMAN,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...
3,11E6human_0,HV7,0.770,0.720,HUMAN,KV1,0.940,0.720,HUMAN,QVQLVQSESELKKPGASVRISCMASGYTFTNFGLNWVRQAPGQGFE...,AVQLFQSPSSVSASVGDRVTITCRASQNVGTAIAWYQQIKGKAPKP...
4,SC73.39human_0,HV1,0.995,0.725,HUMAN,KV4,0.985,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCRTSGYSFTNYNFAWVRQAPRQGLE...,DIVMTQSPDSLAVSLGERATIKCKSSQSLLNSSNQKNYLAWYQQKP...
...,...,...,...,...,...,...,...,...,...,...,...
123,B-E29human_0,HV3,0.995,0.575,HUMAN,KV2,0.970,0.720,HUMAN,EEQVLESGGGFVQPGGSLRLSCGASEFTFSNYAMSWARQAPGKGLE...,DIVMTQTPLSSPVTLGRPASISCTSSQSIVDITGNTYLSWLQQRPG...
124,8G8human_0,HV7,0.940,0.720,HUMAN,,,,,QVQLVQSGSELKNPGASVKVSCKAFGYTFTNYGINWVRQAPGQGLE...,LPVLTQPPAVSASLGASIKLTCTLSSQHSTYTIEWSQQRPERSPRF...
125,ABC_G1D02human_0,HV1,0.995,0.725,HUMAN,KV1,0.960,0.720,HUMAN,QVQLVQSGAEVKEPGASVTLSCQASGYAFTNYFIHWVRQAPGQGLE...,DIQMTQSPSTLSAFVGDRVTITCRASSSISSNYLAWYQQKPGKAPK...
126,ABC_G1D03human_0,HV1,1.000,0.725,HUMAN,KV4,0.995,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCQASGYTFTDHNLHWFRRAPGQGLE...,DIVMIQSPDSLAVSLGERATISCKSSQTLLYSSDQKNYLAWYQQKP...


In [54]:
only_l_df

Unnamed: 0,Raw_name,H-V-gene,H-Score,H-Threshold,H-Classification,L-V-gene,L-Score,L-Threshold,L-Classification,H_seq,L_seq
0,3A3human_0,HV3,0.735,0.575,HUMAN,KV1,0.900,0.720,HUMAN,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...
1,MAK195human_0,HV3,0.975,0.575,HUMAN,KV1,0.730,0.720,HUMAN,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...
2,74#human_0,HV5,1.000,0.520,HUMAN,KV4,0.950,0.655,HUMAN,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...
3,11E6human_0,HV7,0.770,0.720,HUMAN,KV1,0.940,0.720,HUMAN,QVQLVQSESELKKPGASVRISCMASGYTFTNFGLNWVRQAPGQGFE...,AVQLFQSPSSVSASVGDRVTITCRASQNVGTAIAWYQQIKGKAPKP...
4,SC73.39human_0,HV1,0.995,0.725,HUMAN,KV4,0.985,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCRTSGYSFTNYNFAWVRQAPRQGLE...,DIVMTQSPDSLAVSLGERATIKCKSSQSLLNSSNQKNYLAWYQQKP...
...,...,...,...,...,...,...,...,...,...,...,...
122,18V4Fhuman_0,HV3,0.965,0.575,HUMAN,LV7,0.945,0.755,HUMAN,QVPLVESGGGVVQPGKSLRLACAASGFSLTGYGMHWVRQAPGKGLE...,QTVVTQEPSLTVSPGGTVTFTCASSTGAVTTSNYPSWFQQKLGQAP...
123,B-E29human_0,HV3,0.995,0.575,HUMAN,KV2,0.970,0.720,HUMAN,EEQVLESGGGFVQPGGSLRLSCGASEFTFSNYAMSWARQAPGKGLE...,DIVMTQTPLSSPVTLGRPASISCTSSQSIVDITGNTYLSWLQQRPG...
125,ABC_G1D02human_0,HV1,0.995,0.725,HUMAN,KV1,0.960,0.720,HUMAN,QVQLVQSGAEVKEPGASVTLSCQASGYAFTNYFIHWVRQAPGQGLE...,DIQMTQSPSTLSAFVGDRVTITCRASSSISSNYLAWYQQKPGKAPK...
126,ABC_G1D03human_0,HV1,1.000,0.725,HUMAN,KV4,0.995,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCQASGYTFTDHNLHWFRRAPGQGLE...,DIVMIQSPDSLAVSLGERATISCKSSQTLLYSSDQKNYLAWYQQKP...


In [48]:
both_h_l_df

Unnamed: 0,Raw_name,H-V-gene,H-Score,H-Threshold,H-Classification,L-V-gene,L-Score,L-Threshold,L-Classification,H_seq,L_seq
0,3A3human_0,HV3,0.735,0.575,HUMAN,KV1,0.900,0.720,HUMAN,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...
1,MAK195human_0,HV3,0.975,0.575,HUMAN,KV1,0.730,0.720,HUMAN,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...
2,74#human_0,HV5,1.000,0.520,HUMAN,KV4,0.950,0.655,HUMAN,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...
3,11E6human_0,HV7,0.770,0.720,HUMAN,KV1,0.940,0.720,HUMAN,QVQLVQSESELKKPGASVRISCMASGYTFTNFGLNWVRQAPGQGFE...,AVQLFQSPSSVSASVGDRVTITCRASQNVGTAIAWYQQIKGKAPKP...
4,SC73.39human_0,HV1,0.995,0.725,HUMAN,KV4,0.985,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCRTSGYSFTNYNFAWVRQAPRQGLE...,DIVMTQSPDSLAVSLGERATIKCKSSQSLLNSSNQKNYLAWYQQKP...
...,...,...,...,...,...,...,...,...,...,...,...
122,18V4Fhuman_0,HV3,0.965,0.575,HUMAN,LV7,0.945,0.755,HUMAN,QVPLVESGGGVVQPGKSLRLACAASGFSLTGYGMHWVRQAPGKGLE...,QTVVTQEPSLTVSPGGTVTFTCASSTGAVTTSNYPSWFQQKLGQAP...
123,B-E29human_0,HV3,0.995,0.575,HUMAN,KV2,0.970,0.720,HUMAN,EEQVLESGGGFVQPGGSLRLSCGASEFTFSNYAMSWARQAPGKGLE...,DIVMTQTPLSSPVTLGRPASISCTSSQSIVDITGNTYLSWLQQRPG...
125,ABC_G1D02human_0,HV1,0.995,0.725,HUMAN,KV1,0.960,0.720,HUMAN,QVQLVQSGAEVKEPGASVTLSCQASGYAFTNYFIHWVRQAPGQGLE...,DIQMTQSPSTLSAFVGDRVTITCRASSSISSNYLAWYQQKPGKAPK...
126,ABC_G1D03human_0,HV1,1.000,0.725,HUMAN,KV4,0.995,0.655,HUMAN,QVQLVQSGAEVKKPGASVKVSCQASGYTFTDHNLHWFRRAPGQGLE...,DIVMIQSPDSLAVSLGERATISCKSSQTLLYSSDQKNYLAWYQQKP...


In [56]:
print('H humanization rate: {}'.format(len(only_h_df)/len(humab_df)))
print('L humanization rate: {}'.format(len(only_l_df)/len(humab_df)))
print('Both H and L humanization rate: {}'.format(len(both_h_l_df)/len(humab_df)))

H humanization rate: 0.9767441860465116
L humanization rate: 0.8914728682170543
Both H and L humanization rate: 0.875968992248062


In [19]:
def save_pairs(heavy_chains, light_chains, path):
    # assert len(heavy_chains) == len(light_chains)
    # with open(path, 'w') as f:
    #     for heavy, light in zip(heavy_chains, light_chains):
    Chain.to_fasta(heavy_chains, f, description='VH')
    Chain.to_fasta(light_chains, f, description='VL')

In [57]:
# Test for 3D structure.
save_fasta_mouse_fpath = '/data/home/waitma/antibody_proj/antidiff/data/tfold_test/mouse_chain.fasta'
save_fasta_human_fpath = '/data/home/waitma/antibody_proj/antidiff/data/tfold_test/human_chain.fasta'

# raw result.
mouse_h = Chain(sample_df.iloc[0]['hseq'], scheme='imgt')
mouse_l = Chain(sample_df.iloc[0]['lseq'], scheme='imgt')
mouse_h.name, mouse_l.name = 'mouse0', 'mouse0'

human_h = Chain(sample_df.iloc[1]['hseq'], scheme='imgt')
human_l = Chain(sample_df.iloc[1]['lseq'], scheme='imgt')
human_h.name, human_l.name = 'human0', 'human0'

save_pairs(mouse_h, mouse_l, save_fasta_mouse_fpath)
save_pairs(human_h, human_l, save_fasta_human_fpath)

In [24]:
print(human_h)
print(human_l)

EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLEWVGRIRLKSYNYATEYAASVKGRFTISRDDSRSVVYLEMSSLKTEDTATYFCTDWDGAYWGPGTLVTVSS
DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKVLIYSASTLESGVPFRFSGSGSGTDFTLTISSLQPEDSATYYCQQHYSTPFTFGPGTKVDV


In [28]:
response.url

'https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab_results/20231123_0990646'

In [29]:
result_test = requests.get(response.url)

In [31]:
result_test.text

'<!DOCTYPE html>\n<html>\n\n<head>\n  <meta charset="utf-8">\n  <meta name="viewport" content="width=device-width, initial-scale=1">\n  \n  <!-- PAGE settings -->\n  <title>SAbPred: Hu-mAb Results\n</title>\n  <link href="/webapps/sabdab-sabpred/static/img/favicon.png" rel="shortcut icon" type="image/x-icon" />\n\n  <!-- CSS dependencies -->\n  <link rel="stylesheet" href="/webapps/sabdab-sabpred/static/css/wireframe.css">\n  <link rel="stylesheet" href="/webapps/sabdab-sabpred/static/css/google-fonts.css">\n  <!--<link href="https://fonts.googleapis.com/css?family=Comfortaa|Josefin+Slab:100,400|Source+Code+Pro" rel="stylesheet">-->\n  \n  <link href="/webapps/sabdab-sabpred/static/jsav/JSAVcustom.css" rel="stylesheet">\n  <link href="/webapps/sabdab-sabpred/static/Welcome_files/jquery-ui.css" rel="stylesheet">\n  <script src="/webapps/sabdab-sabpred/static/Welcome_files/jquery.js"></script> <!-- Must be loaded before JSAV -->\n  <script type="text/javascript" src="/webapps/sabdab-sabp

In [33]:
soup = BeautifulSoup(result_test.text, 'html.parser')

In [40]:
tables = soup.find_all('table', {'class': 'table table-results'})
all_table_data = []
for table in tables:
    table_data = []
    for row in table.find_all('tr'):
        row_data = []
        for cell in row.find_all(['th', 'td']):
            row_data.append(cell.text)
        table_data.append(row_data)
    all_table_data.append(table_data)
# soup

In [53]:
tables

[<table class="table table-results" style="text-align: center;">
 <tr class="head"><th>V-gene</th><th>Score</th><th>Threshold</th><th>Classification</th></tr>
 <tr>
 <td>HV1</td>
 <td>0.005</td>
 <td>0.725</td>
 <td style="background-color: #ed8282;">NOT HUMAN</td>
 </tr>
 <tr>
 <td>HV2</td>
 <td>0.000</td>
 <td>0.835</td>
 <td style="background-color: #ed8282;">NOT HUMAN</td>
 </tr>
 <tr>
 <td>HV3</td>
 <td>0.815</td>
 <td>0.575</td>
 <td style="background-color: #52d9a1;">HUMAN</td>
 </tr>
 <tr>
 <td>HV4</td>
 <td>0.000</td>
 <td>0.565</td>
 <td style="background-color: #ed8282;">NOT HUMAN</td>
 </tr>
 <tr>
 <td>HV5</td>
 <td>0.000</td>
 <td>0.520</td>
 <td style="background-color: #ed8282;">NOT HUMAN</td>
 </tr>
 <tr>
 <td>HV6</td>
 <td>0.000</td>
 <td>0.930</td>
 <td style="background-color: #ed8282;">NOT HUMAN</td>
 </tr>
 <tr>
 <td>HV7</td>
 <td>0.005</td>
 <td>0.720</td>
 <td style="background-color: #ed8282;">NOT HUMAN</td>
 </tr>
 </table>,
 <table class="table table-results" 

In [59]:
extract_human_data(all_table_data[:2])

['HV3', '0.815', '0.575', 'HUMAN', 'KV1', '0.725', '0.720', 'HUMAN']

In [43]:
extracted_data = []
for table_data in all_table_data[:2]:
    table_header = table_data[0]
    human_rows = [row for row in table_data if row[-1] == 'HUMAN']
    extracted_data.append((table_header, human_rows))

In [44]:
extracted_data

[(['V-gene', 'Score', 'Threshold', 'Classification'],
  [['HV3', '0.815', '0.575', 'HUMAN']]),
 (['V-gene', 'Score', 'Threshold', 'Classification'],
  [['KV1', '0.725', '0.720', 'HUMAN']])]

In [55]:
import pandas as pd

# 示例数据
data = [
    (['V-gene', 'Score', 'Threshold', 'Classification'], [['HV3', '0.815', '0.575', 'HUMAN']]),
    (['V-gene', 'Score', 'Threshold', 'Classification'], [['KV1', '0.725', '0.720', 'HUMAN']])
]

# 创建空的DataFrame
df = pd.DataFrame(columns=['H-V-gene', 'H-Score', 'H-Threshold', 'H-Classification',
                           'L-V-gene', 'L-Score', 'L-Threshold', 'L-Classification', 'Type'])

# 将数据添加到DataFrame中
H_test_data = data[0][1]
L_test_data = data[1][1]
    for row in rows:
        row_data = dict(zip(new_header, row))
        row_data['Type'] = new_header[0]
        new_row_df = pd.DataFrame(row_data, index=[0])
        df = pd.concat([df, new_row_df], ignore_index=True)

# 显示DataFrame
print(df)

  H-V-gene H-Score H-Threshold H-Classification L-V-gene L-Score L-Threshold  \
0      HV3   0.815       0.575            HUMAN      NaN     NaN         NaN   
1      NaN     NaN         NaN              NaN      KV1   0.725       0.720   

  L-Classification      Type  
0              NaN  H-V-gene  
1            HUMAN  L-V-gene  


In [11]:
for i in range(10):
    if i > 5:
        continue
    print(i)

0
1
2
3
4
5
