In [2]:
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import time
from tqdm import tqdm
import re
from abnumber import Chain
import json
from urllib.parse import urlencode
import concurrent.futures

import seaborn as sns
import matplotlib.pyplot as plt

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

#%matplotlib inline


In [12]:
# Define Sample Path, Define Humanized Path
sample_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/sample_humanization_result.csv'
humanized_fpath = '/data/home/waitma/antibody_proj/antidiff/data/lab_data/humanization_pair_data_filter.csv'

In [13]:
# Get DataFrame from the fpath.
sample_df = pd.read_csv(sample_fpath)
humanized_df = pd.read_csv(humanized_fpath)

In [5]:
# Define deal out-of-order table.
def regular_order_table(out_of_order_table):
    all_table_data = []
    for table in out_of_order_table:
        table_data = []
        for row in table.find_all('tr'):
            row_data = []
            for cell in row.find_all(['th', 'td']):
                row_data.append(cell.text)
            table_data.append(row_data)
        all_table_data.append(table_data)
    return all_table_data[:2]  # only the first two will be used, all is three.

# Define extract data. Only want to know wther the sequence can be viewed as human.
def extract_human_data(regular_table):
    extracted_data = []
    for table_data in regular_table:
        table_header = table_data[0]
        human_row = [None, None, None, None]
        for row in table_data:
            if row[-1] == 'HUMAN':
                human_row = row
        extracted_data.extend(human_row)
    return extracted_data

# Define request process.
def get_predict_result(job_name, h_seq, l_seq):
    # Url path
    humab_url = 'https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabpred/humab'

    data = {
        'h_sequence_score': h_seq,
        'l_sequence_score': l_seq,
        'jobname_score': job_name
    }
    reponse = requests.post(humab_url, data=data)
    result_url = reponse.url
    print(result_url)

    # Need to wait a moment.
    time.sleep(15)

    # Get the result page.
    result_response = requests.get(result_url)

    if result_response.status_code == 200:
        soup = BeautifulSoup(result_response.text, 'html.parser')
        tables = soup.find_all('table', {'class': 'table table-results'})
        # print(tables)
        
        predict_table = regular_order_table(tables)
        print(predict_table)
        extract_data = extract_human_data(predict_table)
        print(extract_data)
    else:
        print('May be url has problem or need larger sleep time.')

    sequence_list = [h_seq, l_seq]
    return extract_data + sequence_list
    

In [6]:
# Get the sample sequence from data.
# Correct dataframe name
new_columns_name = {
    'Specific': 'Specific',
    'name': 'name',
    'HSEQ': 'hseq',
    'LSEQ': 'lseq'
}

sample_df = sample_df.rename(columns=new_columns_name)
sample_human_df = sample_df[sample_df['Specific'] == 'humanization'].reset_index(drop=True)
sample_human_df.head()

Unnamed: 0,Specific,name,hseq,lseq,Unnamed: 4
0,humanization,3A3human_0,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...,
1,humanization,MAK195human_0,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...,
2,humanization,31#human_0,EVQLVQSGAEVKKPGESLKISCKASGYTFTDYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATISCKSSQSLLNTNSQKNYIAWYQQKP...,
3,humanization,56#human_0,QVQLQESGPRLVEPSETLSLTCTVSGYSITSDYDWGWIRQPPGRGL...,DTLLTQSPDHLSVTLGERATISCRASQNIGTSLNWYQQKPGQSPKR...,
4,humanization,74#human_0,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...,


In [7]:
filter_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/filter_humanization_result.csv'
filter_human_df = pd.read_csv(filter_fpath)
filter_human_df

Unnamed: 0,Specific,name,hseq,lseq,Unnamed: 4
0,humanization,3A3human_0,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...,
1,humanization,31#human_0,EVQLVQSGAEVKKPGESLKISCKASGYTFTDYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATISCKSSQSLLNTNSQKNYIAWYQQKP...,
2,humanization,74#human_0,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...,
3,humanization,SC73.38human_0,QVHLVQSGAEVKKPGASVKVSCKASGYSFTGYTIHWVRQAPGQGLE...,EIVMTQSPATLSVSPGETATLSCRASQSISNNLVWYQQKPDQAPRL...,
4,humanization,SC73.39human_0,QVQLVQSGAEVKKPGASVKVSCRTSGYSFTNYNFAWVRQAPRQGLE...,DIVMTQSPDSLAVSLGERATIKCKSSQSLLNSSNQKNYLAWYQQKP...,
...,...,...,...,...,...
111,humanization,RK22human_0,EVQLVQSGVEVKKPGESLKISCKGSGYTFTSYWIGWVRQMPGKGLE...,DIVVTQSPDSLAVSLGERATINCKSSQSLLNSANQKNYLAWYQQKP...,
112,humanization,49G8human_0,QVQLVQSGSEVKKPGASVKVSCQASGYTFADYEIHWMRQAPGQGPE...,DVVVTQSPLSLPVTLGQPASISCRSSQSIVHSNGNTYLDWFQQRPG...,
113,humanization,mu515H7human_0,EVQLVESGGGLVQPGGSLRLSCATSGFTFTDNYMDWVRQAPGKGLE...,IVLTQSPDSLAVSLGERATINCKSSQSLFNSRTRKNYLTWYQQKPG...,
114,humanization,mu16B5human_0,QVRLVQSGAEVKKPGASVGVSCKASGFNIKDIYIHWVRQVPGQGPE...,DIVMTQFPNSLAVSLGERATINCKSSQSLLNSRTRKNYLSWYQKKA...,


In [18]:
T20_REGEX = re.compile('<td>T20 Score:</td><td>([0-9.]+)</td>')
def get_t20_online(seq):
    chain = Chain(seq, scheme='imgt')
    chain_type = 'vh' if chain.chain_type == 'H' else ('vl' if chain.chain_type == 'L' else 'vk')
    html = None
    for retry in range(5):
        url = f'https://sam.curiaglobal.com/t20/cgi-bin/blast.py?chain={chain_type}&region=1&output=3&seqs={seq}'
        try:
            request = requests.get(url)
            if request.ok:
                html = request.text
                break
        except Exception as e:
            print(e)
        time.sleep(0.5 + retry * 5)
        print('Retry', retry+1)
    if not html:
        sys.exit(1)
    # print(html)
    matches = T20_REGEX.findall(html)
    time.sleep(1)
    if not matches:
        print(html)
        # raise ValueError(f'Error calling url {url}')
        return None, None
    return float(matches[0]), chain_type

def get_pair_data_t20(h_seq, l_seq):
    h_score, h_type = get_t20_online(h_seq)
    l_score, l_type = get_t20_online(l_seq)
    print(h_score, l_score)
    return [h_score, h_type, l_score, l_type, h_seq, l_seq]

def process_line(line):
    h_seq = line[1]['h_seq']
    l_seq = line[1]['l_seq']
    name = [line[1]['name']]
    for retry in range(10):
        try:
            data = get_pair_data_t20(h_seq, l_seq)
            if len(data) > 2:
                break
        except:
            time.sleep(5)
            continue
    if len(data) != 2:
        new_data = name + data
        new_line_df = pd.DataFrame([new_data], columns=t20_filter_df.columns)
        return new_line_df
    else:
        return None

In [9]:
# Define a new dataframe for saving results.
# t20_df = pd.DataFrame(columns=['Raw_name', 'H_Score', 'H_gene', 'L_score', 'L-gene', 'H_seq', 'L_seq'])

# with concurrent.futures.ProcessPoolExecutor() as executor:
#     results = list(tqdm(executor.map(process_line, sample_human_df.iterrows()), total=len(sample_human_df)))

# t20_df = pd.concat([result for result in results if result is not None], ignore_index=True)
# Not_successful_index = [i for i, result in enumerate(results) if result is None]


save_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/t20_score_result.csv'
t20_df = pd.read_csv(save_fpath)
t20_df

Unnamed: 0,Raw_name,H_Score,H_gene,L_score,L-gene,H_seq,L_seq
0,3A3human_0,75.71437,vh,82.64157,vk,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...
1,MAK195human_0,78.24797,vh,82.66367,vk,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...
2,31#human_0,86.16677,vh,86.81427,vk,EVQLVQSGAEVKKPGESLKISCKASGYTFTDYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATISCKSSQSLLNTNSQKNYIAWYQQKP...
3,56#human_0,78.79037,vh,72.63897,vk,QVQLQESGPRLVEPSETLSLTCTVSGYSITSDYDWGWIRQPPGRGL...,DTLLTQSPDHLSVTLGERATISCRASQNIGTSLNWYQQKPGQSPKR...
4,74#human_0,78.24797,vh,85.30977,vk,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...
...,...,...,...,...,...,...,...
311,149G11human_0,77.30777,vh,88.30367,vk,EVRLVQSGAEVKKPGESLTISCKISGYTFTNYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATFNCKSSQNLLYNSNQKSYLAWYQQKP...
312,mu515H7human_0,86.54000,vh,79.33637,vk,EVQLVESGGGLVQPGGSLRLSCATSGFTFTDNYMDWVRQAPGKGLE...,IVLTQSPDSLAVSLGERATINCKSSQSLFNSRTRKNYLTWYQQKPG...
313,mu16B5human_0,63.97447,vh,76.68147,vk,QVRLVQSGAEVKKPGASVGVSCKASGFNIKDIYIHWVRQVPGQGPE...,DIVMTQFPNSLAVSLGERATINCKSSQSLLNSRTRKNYLSWYQKKA...
314,21B12human_0,87.03397,vh,90.53160,vk,QVQLVQSGSELKKPGASVKVSCKASGYTFTNYGLNWVRQAPGQGLE...,DIVMTQSPDSLAVSLGERATIDCKSSQSLLYSSNQKNYLTWYQQKP...


In [10]:
t20_filter_df = pd.DataFrame(columns=['Raw_name', 'H_Score', 'H_gene', 'L_score', 'L-gene', 'H_seq', 'L_seq'])
# with concurrent.futures.ProcessPoolExecutor() as executor:
#     results = list(tqdm(executor.map(process_line, filter_human_df.iterrows()), total=len(filter_human_df)))

# t20_filter_df = pd.concat([result for result in results if result is not None], ignore_index=True)
# Not_successful_index = [i for i, result in enumerate(results) if result is None]
# save_filter_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/t20_score_filter_result.csv'
# t20_filter_df.to_csv(save_filter_fpath, index=False)

                        

                

In [16]:
lab_human_df = humanized_df[humanized_df['type'] != 'mouse'].reset_index(drop=True)
lab_human_df

Unnamed: 0,type,name,order_name,h_seq,l_seq
0,humanized,h3A3-5,0_humanized,EVQLVESGGGLVQPGGSLRLSCAASGFTFSNFWMDWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQDVSTDVAWYQQKPGKAPKL...
1,humanized,AB240,1_humanized,EVQLQESGPGLVKPSETLSLTCTVSGGSISDYGVNWIRQPPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQAVSSAVAWYQQKPGKAPKL...
2,humanized,31HZ,2_humanized,QVQLVQSGAEVKKPGSSVKVSCKASGYTFTDYWMNWVRLAPGQGLE...,DVVITQSPDSLAVSLGERATINCKSSQSLLNTNSQKNYLAWYQQKP...
3,humanized,56HZ,3_humanized,QVQLQESGPGLVKPSQTLSLTCTVSGYFNSITSDYDWHWIRHHPGK...,DIVLTQSPATLSLSPGERATLSCRASQNIGTSIHWYQHKPGQSPRL...
4,humanized,74HZ,4_humanized,QVQLVQSGAEVKKPGSSVKVSCKASGYTFINYWMNWVRQAPGQGLE...,DIVITQSPDSLAVSLGERATINCKSSQTLLNSNTQKNYLAWYQQKP...
...,...,...,...,...,...
344,humanized,149G11-VH1VL1,350_humanized,QVQLVQSGAEVKKPGASVKVSCKASGYTFTNYWIHWVRQAPGQGLE...,DIVMTQSPDSLAVSLGERATINCKSSQNLLYNSNQKSYLAWYQQKP...
345,humanized,hz515H7 VH1 D76N VL2.1,352_humanized,EVQLVESGGGLVQPGRSLRLSCTASGFTFTDNYMSWVRQAPGKGLE...,DIVMTQSPDSLAVSLGERATMSCKSSQSLFNSRTRKNYLAWYQQKP...
346,humanized,hu16B5,353_humanized,QVQLVQSGAEVKKPGATVKISCKVSGFNIKDIYMHWVQQAPGKGLE...,DIVMTQSPDSLAVSLGERATINCKSSQSLLNSRTRKNYLAWYQQKP...
347,humanized,hu21B12,354_humanized,QVQLVQSGSELKKPGASVKVSCKASGYTFTNYGMHWVRQAPGQGLE...,DIVMTQSPDSLAVSLGERATINCKSSQSLLYSSNQKNYLAWYQQKP...


In [26]:
t20_lab_df = pd.DataFrame(columns=['Raw_name', 'H_score', 'H_gene', 'L_score', 'L-gene', 'H_seq', 'L_seq'])
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = list(tqdm(executor.map(process_line, lab_human_df.iterrows()), total=len(lab_human_df)))

t20_lab_df = pd.concat([result for result in results if result is not None], ignore_index=True)
Not_successful_index = [i for i, result in enumerate(results) if result is None]
save_lab_fpath = '/data/home/waitma/antibody_proj/antidiff/data/lab_data/t20_score_lab_filter_result.csv'
t20_lab_df.to_csv(save_lab_fpath, index=False)

  0%|                                                                                                                           | 0/349 [00:00<?, ?it/s]

Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVQSGSELKKPGASVKVSCKASGYTFTNFGMNWVRQAPGQGLEWMGYINTNTGESIYSEEFKGRFVFSLDTSVSTAYLQICSLKAEDTAVYYCARSRMVTAYGMDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVQSGAEVKKPGASVKVSCKASGYTFTSYRMHWVRQAPGQGLEWIGYINPSTGYTEYNQKFKDKATMTADKSISTAYMELSSLRSEDTAVYYCARGGGVFDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))

Retry Retry1 
1
Retry 1
84.15977 92.92047
76.1296 79.72227
79.9586 87.89727


  0%|▎                                                                                                                  | 1/349 [00:07<44:39,  7.70s/it]

82.54177 89.27887
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVQSGAEVKKPGESLKISCKGSGYSFTGYTMNWVRQMPGKGLEWMGLINPYNGGTTYNQKFKGQVTISADKSISTAYLQWSSLKASDTAMYYCARDWDYYFDVWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
78.1256 87.07967
83.29067 87.14957


  1%|▋                                                                                                                  | 2/349 [00:09<24:30,  4.24s/it]

82.39327 82.66367
81.41677 88.08417
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIVLTQSPATLSLSPGERATLSCRASQNIGTSIHWYQHKPGQSPRLLIKFASESISGIPARFSGSGSGTDFTLTISSLEPEDFAVYYCQQSKSWPTYTFGGGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
83.34757 80.39827
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKASGYTFTSYGLSWVRQAPGQGLEWMGEIFPGSGNSNYNENFKGRVTMTTDTSTSTAYMELRSLRSDDTAVYYCARGGFDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
75.16397 85.0456
Retry 2
Retry 1
83.49597 89.77887
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSSLSASVGDRVTITCKSSQSLLNSGNQKNYL

  1%|█▎                                                                                                                 | 4/349 [00:20<28:29,  4.95s/it]

80.60487 88.36457
84.29177 84.01877


  5%|█████▉                                                                                                            | 18/349 [00:23<04:41,  1.17it/s]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGGSLRLSCAASGFTFSDAWMDWVRQAPGKGLEWVGEIRSKVNNHETYYAESVKGRFTISRDDSKNSLYLQMNSLKTEDTAVYYCARNDYFDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
83.63647 83.36457
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSSLSASVGDRVTITCKASQNVGTSVAWYQQKPGKVPKALLYSASYRFSGVPSRFSGSGSGTDFTLTISSLQPEDVATYYCQQYNSYPLTFGQGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
81.11577 89.95337
79.65817 81.84687
80.1236 88.45797
Retry 1
84.87187 83.55147
75.65047 83.87857
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKASGF

  5%|██████▏                                                                                                           | 19/349 [00:35<10:01,  1.82s/it]

77.22227 78.09737
74.91457 82.6068783.7856
 83.41127
83.31827 76.18817
82.05887 87.83197
Retry 2
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGGSLRLSCAVSGFVFSRYWMSWVRQAPGKGLEWIGEINPDSSTINYTSSLKDRFTISRDNAKNSLYLQMNSLRAEDTAVYYCASLITTEDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIVMTQSPDSLAVSLGERATINCRASKSVSTSGYVYMHWYQQKPGQPPKLLIYLASYLESGVPDRFSGSGSGTDFTLTISSVQAEDVAVYYCQHSRDLTFPFGGGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSSLSASVGDRVTITCKAS

  9%|█████████▊                                                                                                        | 30/349 [01:00<10:49,  2.04s/it]

83.43227 90.04
Retry 2
82.13687 82.19637
83.22317 80.81087
Retry 3
80.8126 88.57147
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKVSGYTFTDYEMHWVRQAPGKGLEWMGVIDPETGGTAYNQKFKGRVTLTADTSTDTAYMELSSLRSEDTAVYYCTRGTTVVGLDYWGQGTTLTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
79.58687 84.76647
79.29177 78.91897


 11%|█████████████                                                                                                     | 40/349 [01:05<07:13,  1.40s/it]

Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTPGEPASISCRSSQSLVSSKGNTYLHWYLQKPGQSPQFLIYKVSNRFSGVPDRFSGSGSGTDFTLKISRVEAEDVGVYFCSQSTHFPRTFGGGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGGSLRLSCAASGFTFSTFGMHWVRQAPGKGLEWVSYITSGNSPIYFTDTVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARSSYYGNSMDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
83.81157 83.13087
Retry 1
77.6056 81.07487
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSAMSASVGDRVTITCRASHEISGYLSWFQQKPGGVIKRLINAASTLASGVPSRFSGSRSGTEFTLTISSLQPEDFA

 16%|█████████████████▉                                                                                                | 55/349 [01:22<06:14,  1.27s/it]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTPGEPASISCRSSQSLVSSKGNTYLHWYLQKPGQSPQFLIYKVSNRFSGVPDRFSGSGSGTDFTLKISRVEAEDVGVYFCSQSTHFPRTFGGGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSAMSASVGDRVTITCRASHEISGYLSWFQQKPGGVIKRLINAASTLASGVPSRFSGSRSGTEFTLTISSLQPEDFATYYCLQYSSYPWTFGGGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
77.58477 76.17127
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVLMTQTPLSSPVTLGQPASISCRSSQSIIHSNGNTYLEWYLQKPGQSPQLLIYKVSNRFSGVPDRFTGSGSGTDFTLKISRVEAEDVGVYYCFQGSHVPWTFGQGTKLEIK (Caused 

 18%|████████████████████▉                                                                                             | 64/349 [01:35<06:17,  1.33s/it]

82.09687 86.12157
80.33337 86.63557
84.57637 80.88797


 19%|█████████████████████▌                                                                                            | 66/349 [01:36<06:00,  1.27s/it]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKVSGYTFTDYEMHWVRQAPGKGLEWMGVIDPETGGTAYNQKFKGRVTLTADTSTDTAYMELSSLRSEDTAVYYCTRGTTVVGLDYWGQGTTLTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
83.29177 89.01877
86.99197 84.50457
79.54177 79.06547
85.69677 87.52347
87.69237 87.28977
79.78997 84.01877
68.23287 75.5146
69.04 67.38327
83.04 80.49117
84.52997 78.62837
77.70837 81.56257
76.53857 89.76647
76.13457 83.17767
('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Retry 1
73.29067 81.63557
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSSLSASVGDRVTITCHASQDISSYIVWYQQKPGKAPKLLIYHGTNLEDGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCVHYAQFPYTFGQGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: 

 19%|██████████████████████▏                                                                                           | 68/349 [01:58<10:42,  2.29s/it]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QIQLVQSGPELKKPGSSVKISCKASGYTFTNYGMNWVRQAPGKGLKWMGWINTYTGEPTYADDFKGRFTITAETSTSTLYLQLNNLRSEDTATYFCVRFMSKGDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGGSLRLSCAASGLIFRSYGMSWVRQAPGKGLEWVATINSGGTYTYYPDSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCANSYSGAMDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
81.96587 74.95547
Retry 1
Retry 1
81.70947 85.13517
Retry 2
79.09847 87.32147
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGSELKKPGASVKVSCKASGYTFTTAGMQWVQKSPGQGLEWMGWINSHS

 30%|█████████████████████████████████▋                                                                               | 104/349 [02:19<04:12,  1.03s/it]

Retry 2
35.42067 86.0286
Retry 2
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVKPGGSLRLSCAASGFTFSDYYMTWVRQAPGKGLEWVASINYDGRNTYYLDSLKSRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARGYYYYGSSPNYFDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 2
73.78157 66.17127


 35%|███████████████████████████████████████▏                                                                         | 121/349 [02:23<02:55,  1.30it/s]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKASGYTFPIYTMHWVRQAPGQGLEWMGYINPSIGYANYNQKFRDRVTITADTSTSTAYMELSSLRSEDTAVYYCARGGYGDSLFAYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
75.29917 86.11617
82.47867 83.57147
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTPGEPASISCRSSQSLANSYGNTYLSWYLQKPGQSPQLLIYGISNRFSGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCLQGTHQPYTFGQGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVESGGGVVQPGRSLRLSCAVSGFSLTRFGVHWVRQAPGKGLEWVAVIWRGGSTDYNAAFMSRLTISKDNSKNTVYLQMN

 35%|███████████████████████████████████████▌                                                                         | 122/349 [02:41<04:40,  1.24s/it]

83.16247 83.08047
78.31937 81.10627


 42%|███████████████████████████████████████████████▉                                                                 | 148/349 [02:42<02:07,  1.57it/s]

84.20837 84.39257
('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLQESGPGLVKPSQTLSLTCTVSGDSITSGYWNWIRKHPGKGLEYMGYIRYSGSTYYSPSLKSRFTISRDTSKNQYSLKLSSVTAADTATYYCSNWAYWGQGTLVTVSA (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
84.07567 86.07487
Retry 1
Retry 1
72.30777 74.90657
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQLTQSPSSLSASVGDRVTITCTASSTVSSTYLHWYQQKPGKAPKLLIYSTSNLASGVPSRFSGSGSGTDYTLTISSLQPEDFATYYCHQYHRSPPTFGQGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
78.29067 88.41127
Retry 1
68.64 87.66367
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with ur

 42%|███████████████████████████████████████████████▉                                                                 | 148/349 [02:56<02:07,  1.57it/s]

Retry 1
82.54247 86.77577
85.55087 92.69917
81.36757 89.02657
76.66677 80.65227
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQVVQSGTELKKPGASVKMSCKASGYTFTSYWMHWVKQAPGQGLEWIGAIYPGNSDTSYNQKFKGKAKLTAVTSPNTAYMELSSLRSEDSAVYYCTTTTYGYDWFAYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
78.06037 81.38397
77.24587 82.81257
82.58337 81.9826
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIVMTQSPSFLSASVGDRVTITCKASQDVGTAVAWYQQKPGKAPKLLIYWASTRHTGVPDRFTGSGSGTDFTLTISSLQSEDFAVYFCHQHSSNPLTFGQGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQS

 43%|████████████████████████████████████████████████▌                                                                | 150/349 [03:15<04:53,  1.47s/it]

70.90167 87.80377


 50%|████████████████████████████████████████████████████████▎                                                        | 174/349 [03:16<02:17,  1.27it/s]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLQESGPGLVKPSETLSLTCTVSGYTITSGYDWSWIRQPPGKGLEWIGYINYGGSTNYKPSLGSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARYNEYKSYIYDWYFDFWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
76.81037 85.5146
82.14887 84.10717
66.47067 75.56077
83.55377 85.04
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTLGQPASISCRSSQTIVHSDGNTYLEWYQQRPGQSPRLLIYKVSNRFSGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCFQGSHVPLTFGQGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGRSLRLSCTASGFTFTDNYMSWVRQAPGKGLEWVGFIRNKANGYTTEY

 50%|████████████████████████████████████████████████████████▎                                                        | 174/349 [03:26<02:17,  1.27it/s]

Retry 1
78.10927 76.42867
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLQESGPGLVKPSETLSLTCTVSGYTITSGYDWSWIRQPPGKGLEWIGYINYGGSTNYKPSLGSRVTISVDTSKNQFSLKLSSVTAADTAVYYCARYNEYKSYIYDWYFDFWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
70.47417 77.42997
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EIQLVQSGAEVKKPGASVKVSCKASGYTFTDYYINWVRQAPGQRLEWIGWIYPGNVNVKYNEKFKGRATLTVDTSASTGYMELSSLRSEDTAVYFCARSIGLRYFDNWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
77.07637 88.03577
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVQSGAEVKKPGASVKVSCKASGYAFITYLIEWVRQAPGQGLEWIGVIHPGSGNSHYNE

 52%|██████████████████████████████████████████████████████████▌                                                      | 181/349 [04:00<04:45,  1.70s/it]

84.62187 85.32717


 57%|████████████████████████████████████████████████████████████████▊                                                | 200/349 [04:01<02:36,  1.05s/it]

85.61987 85.46737
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKASGYTFTNYGMNWVRQAPGQGLEWMGWINTYTGESTYADDFEGRFVFSMDTSASTAYLQISSLKAEDTAVYYCARESLYDYYSMDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
78.29177 85.65427
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTLGQPASISCRASKSISKYLAWYQQKPGKAPKLLIYSGSTLQSGIPPRFSGSGYGTDFTLTINNIESEDAAYYFCQQHDESPYTFGEGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
82.54177 86.40197
70.08267 79.15897
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGGSLRLSCAASGYSITSDYAWNWVRQAPGKGLEWVGY

 57%|████████████████████████████████████████████████████████████████▊                                                | 200/349 [04:16<02:36,  1.05s/it]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLQESGPGLVKPSENLSITCTVSGFSLSNYDISWIRQPPGKGLEWLGVIWTGGATNYNPAFKSRLTISRDNSKSQVSLKMSSLQAADTAVYYCVRDSNYRYDEPFTYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
78.30657 82.21247
84.79347 90.17867
80.72657 87.09827
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTPGEPASISCRSSQSIVHSNGNTYLQWYLQKPGQSPQLLIYKVSNRLYGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCFQGSHVPWTFGQGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
51.61167 84.71967
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTLGQPASISCRASKSISKYLAWYQQKPGKAPKLLIYSGSTL

 62%|██████████████████████████████████████████████████████████████████████▌                                          | 218/349 [04:26<02:33,  1.18s/it]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLQESGPGLVKPSENLSITCTVSGFSLSNYDISWIRQPPGKGLEWLGVIWTGGATNYNPAFKSRLTISRDNSKSQVSLKMSSLQAADTAVYYCVRDSNYRYDEPFTYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QIQLVQSGAEVKKPGASVKISCKASGYSFTDYHMNWVRQAPGKGLEWIGNIDPYYGSPTYNHKFKGRVTLTVDTSTSTAYMELSSLRSEDTAVYYCANYGRGNSFPYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
72.17957 86.46027
Retry 1
77.8756 87.25667
82.8756 88.67267
Retry 2
Retry 3
75.17097 86.76997
89.10267 81.12157
85.51287 86.47327
80.8126 85.04
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=

 64%|███████████████████████████████████████████████████████████████████████▉                                         | 222/349 [04:34<02:39,  1.26s/it]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIQMTQSPSTLSASVGDRVTITCKASQDVGIVVAWYQQKPGKAPKLLIYWASIRHTGVPSRFSGSGSGTEFTLTISSLQPDDFATYYCQQYSNYPLYTFGQGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
84.27977 85.5146
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVKPGGSLRLSCAASGFTFSSYWMHWVRQAPGKGLEWVGEIIPINGHTNYNEKFKSRFTISLDNSKNTLYLQMGSLRAEDTAVYYCARGGYYYYGSRDYFDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
Retry 3
76.44637 78.08417
74.78637 84.86117
82.88147 85.09357
Retry 2
80.08337 83.13087
83.6256 83.64497
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&out

 64%|███████████████████████████████████████████████████████████████████████▉                                         | 222/349 [04:46<02:39,  1.26s/it]

76.54477 76.62167
76.93977 81.01857
82.95087 86.7296
73.14527 79.20567
84.53397 85.04
80.94027 68.9646
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLQESGPGLVKPSETLSLTCTVSGFSLSSYDISWIRQPPGKGLEWLGVIWTGGGTNYNSAFMSRLTISKDNSKSQVSLKLSSVTAADTAVYYCVRMDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Retry 1
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIVLTQSPSFLSASVGDRVTITCKASQGVGTAVGWYQQKPGKAPKLLIYWASTRHTGVPDRFSGSGSGTEFTLTISSLQPEDFAAYYCQQYSTYPFTFGGGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 3
Retry 1
75.58337 72.27277
HTTPSConnectionPool(host='sam.curiaglobal.com', port=4

 67%|███████████████████████████████████████████████████████████████████████████▊                                     | 234/349 [05:02<03:00,  1.57s/it]

79.11027 87.7576
82.24587 89.19647


 76%|█████████████████████████████████████████████████████████████████████████████████████▊                           | 265/349 [05:04<01:05,  1.28it/s]

78.84627 84.12047


 81%|██████████████████████████████████████████████████████████████████████████████████████████▉                      | 281/349 [05:04<00:38,  1.76it/s]

78.15137 92.40917
82.30777 86.91967
79.05987 83.70547


 81%|███████████████████████████████████████████████████████████████████████████████████████████▉                     | 284/349 [05:05<00:36,  1.78it/s]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGSELKKPGASVKVSCKASGYTFTNYGMNWVRQAPGQGLKWMGWINNNNAEPTYAQDFRGRFVFSLDTSASTAYLQISSLKTEDTAVYYCARDVMDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVTLKESGPVLVKPTETLTLTCTVSGISLSSFAISWIRQPPGKALEWLAVIWTGGGTDYNSALKSRLTISKDTSKSQVVLTMTNMDPVDTATYYCASHWYLDVWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
Retry 1
81.79177 62.38327
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=QIVLTQSPAILSLSPGERATMSCTASSSVSSSYLHWYQQKPGKAPKLWIYSTSNLASGVPSRFSGSGSGTDYTLTISSLQAEDFATYYCHQYHRSPPTFGQGTK

 82%|████████████████████████████████████████████████████████████████████████████████████████████▌                    | 286/349 [05:13<00:47,  1.31it/s]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVVMTQSPLSLPVTLGQPASISCRASESVDTSDNSFIHWYQQRPGQSPRLLIYRSSTLESGVPDRFSGSGSGTDFTLKISRVEAEDVGVYYCQQNYDVPWTFGQGTKVEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DIVMTQSPDSLAVSLGERATINCKSSESVDNYGISFLNWFQQKPGKAPKLLIYAASNQGSGVPDRFSGSGSGTDFTLTISSLQAEDVAVYFCQQSKEVPRTFGGGTKLEIK (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
Retry 1
78.84927 84.35197
79.50827 81.93697
84.87397 84.85987
83.3756 78.78387


 83%|██████████████████████████████████████████████████████████████████████████████████████████████▏                  | 291/349 [05:16<00:42,  1.35it/s]

78.97447 86.30847
86.44077 85.46737
78.57147 83.13087
76.165 78.55867


 84%|██████████████████████████████████████████████████████████████████████████████████████████████▊                  | 293/349 [05:18<00:41,  1.35it/s]

85.76927 82.85057
77.82057 71.57667


 86%|█████████████████████████████████████████████████████████████████████████████████████████████████▏               | 300/349 [05:20<00:30,  1.60it/s]

88.22037 78.03747
80.3756 75.98137


 87%|█████████████████████████████████████████████████████████████████████████████████████████████████▊               | 302/349 [05:21<00:27,  1.68it/s]

71.16387 83.36457


 87%|██████████████████████████████████████████████████████████████████████████████████████████████████▍              | 304/349 [05:21<00:24,  1.87it/s]

85.49597 89.51337


 87%|██████████████████████████████████████████████████████████████████████████████████████████████████▊              | 305/349 [05:22<00:24,  1.81it/s]

86.59667 91.65187
91.22887 83.83187


 88%|███████████████████████████████████████████████████████████████████████████████████████████████████▍             | 307/349 [05:23<00:21,  1.95it/s]

80.72657 87.09827


 88%|███████████████████████████████████████████████████████████████████████████████████████████████████▋             | 308/349 [05:24<00:22,  1.81it/s]

78.755 84.95547


 89%|████████████████████████████████████████████████████████████████████████████████████████████████████             | 309/349 [05:25<00:26,  1.54it/s]

79.04967 85.04467
71.54477 65.76587


 89%|████████████████████████████████████████████████████████████████████████████████████████████████████▋            | 311/349 [05:26<00:23,  1.62it/s]

HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=EVQLVESGGGLVQPGGSLRLSCAASGFTFSDYGIHWVRQAPGKGLEWVAYISRGSSTIYYADTVKGRFTISRDNAKNSLYLQMNSLRAEDTAVYYCARGGYDTRNAMDYWGQGTTVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
Retry 1
84.29177 84.01877
77.66397 81.89197
77.39677 82.70277
65.90167 84.10717
66.52547 71.91597
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKASGYTFASYYMHWMRQAPGQGLEWIGWINPGNVNTKYNEKFKGRATLTVDTSTNTAYMELSSLRSEDTAVYYCARSTYYRPLDYWGQGTLVTVSS (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
73.64417 Retry70.85597 
1
83.71797 87.54467
84.755 88.36287


 89%|█████████████████████████████████████████████████████████████████████████████████████████████████████            | 312/349 [05:32<01:01,  1.67s/it]

75.21377 81.77577
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vh&region=1&output=3&seqs=QVQLVQSGAEVKKPGASVKVSCKASGYTFTNYGMNWVRQARGQRLEWIGWINTYTGEPTYADDFKGRLTISKDTSKNQVVLTMTNMDPVDTATYYCTTYATSWYWGQG (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))
76.51267 84.15897
Retry 1
83.47117 85.14027
78.60477 77.83787
81.02467 87.94397
81.18647 92.56647
80.59837 87.25237
86.04177 92.34517
78.94967 84.11217
64.04557 77.82187
80.59837 81.58887
86.54477 86.30847
90.63567 91.91597
78.94077 82.92797
76.90687 82.80377
Retry 1
81.34157 87.94397
86.04177 92.34517
79.25627 86.20547
78.02427 87.94397
80.38467 84.73217
78.445 75.72737
HTTPSConnectionPool(host='sam.curiaglobal.com', port=443): Max retries exceeded with url: /t20/cgi-bin/blast.py?chain=vk&region=1&output=3&seqs=DVLMTQTPLTLSVTLGQPASISCKSSQSLLHSDGKTYLNWLQQRPGQSPKRLIYLVSKLDSGVPDRFTGSGSGTDFTLKISRVEAEDVGIYYCWQGTHLWTFGGGTKVEIK

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 349/349 [09:33<00:00,  1.64s/it]


In [None]:
sns.kdeplot(t20_df['H_Score'], fill=True, label='VH')
sns.kdeplot(t20_df['L_score'], fill=True, label='VL')

plt.xlabel("Score")
plt.ylabel("Density")
plt.title("T20 Score Distributions")
plt.legend()
plt.show()

In [24]:
SCORE_REGEX = re.compile('<h3>The Z-score value of the Query sequence is: (-?[0-9.]+)</h3>')
def get_z_score_online(seq):
    chain = Chain(seq, scheme='imgt')
    chain_type = 'human_heavy' if chain.chain_type == 'H' else ('human_lambda' if chain.chain_type == 'L' else 'human_kappa')
    html = None
    for retry in range(5):
        url = f'http://www.bioinf.org.uk/abs/shab/shab.cgi?aa_sequence={seq}&DB={chain_type}'
        request = requests.get(url)
        time.sleep(0.5 + retry * 5)
        if request.ok:
            html = request.text
            break
        else:
            print('Retry', retry+1)
    if not html:
        raise ValueError('Z-score server is not accessible')
    matches = SCORE_REGEX.findall(html)
    if not matches:
        print(html)
        # raise ValueError(f'Error calling url {url}')
        return None, None
    return float(matches[0]), chain_type

def get_pair_data_zscore(h_seq, l_seq):
    h_z_score, h_type = get_z_score_online(h_seq)
    l_z_score, l_type = get_z_score_online(l_seq)
    return [h_z_score, h_type, l_z_score, l_type, h_seq, l_seq]

def process_z_score_line(line):
    h_seq = line[1]['h_seq']
    l_seq = line[1]['l_seq']
    name = [line[1]['name']]
    for retry in range(10):
        try:
            data = get_pair_data_zscore(h_seq, l_seq)
            if len(data) > 2:
                break
        except:
            time.sleep(5)
            continue
    if len(data) != 2:
        new_data = name + data
        new_line_df = pd.DataFrame([new_data], columns=zscore_filter_df.columns)
        return new_line_df
    else:
        return None

In [23]:
zscore_filter_df = pd.DataFrame(columns=['Raw_name', 'H_score', 'H_gene', 'L_score', 'L-gene', 'H_seq', 'L_seq'])

with concurrent.futures.ProcessPoolExecutor() as executor:
    results = list(tqdm(executor.map(process_z_score_line, filter_human_df.iterrows()), total=len(filter_human_df)))

zscore_filter_df = pd.concat([result for result in results if result is not None], ignore_index=True)
Not_successful_index_z_score = [i for i, result in enumerate(results) if result is None]

save_filter_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/z_score_filter_result.csv'
zscore_filter_df.to_csv(save_filter_fpath)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 116/116 [00:37<00:00,  3.11it/s]


In [25]:
zscore_lab_df = pd.DataFrame(columns=['Raw_name', 'H_score', 'H_gene', 'L_score', 'L-gene', 'H_seq', 'L_seq'])

with concurrent.futures.ProcessPoolExecutor() as executor:
    results = list(tqdm(executor.map(process_z_score_line, lab_human_df.iterrows()), total=len(lab_human_df)))

zscore_lab_df = pd.concat([result for result in results if result is not None], ignore_index=True)
Not_successful_index_z_score = [i for i, result in enumerate(results) if result is None]

save_lab_fpath = '/data/home/waitma/antibody_proj/antidiff/data/lab_data/z_score_lab_filter_result.csv'
zscore_lab_df.to_csv(save_lab_fpath)


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 349/349 [01:48<00:00,  3.22it/s]


In [10]:
save_fpath = '/data/home/waitma/antibody_proj/antidiff/checkpoints/batch_one_sample_2023_11_16__22_44_23/z_score_result.csv'
zscore_df = pd.read_csv(save_fpath)

In [11]:
zscore_df

Unnamed: 0,Raw_name,H_Score,H_gene,L_score,L-gene,H_seq,L_seq
0,3A3human_0,-0.043,human_heavy,-0.023,human_kappa,EVQLVESGGGLVQPGGSLRLSCVASGFTFSNFWMNWVRQAPGKGLE...,DIQMTQSPSPLSASVGDRANITCRASQDVSTDLGWYQQKPGKAPKV...
1,MAK195human_0,0.792,human_heavy,-0.428,human_kappa,QVQLVESGGGVVQPGGSLRLSCAASGFSLTDYGVAWVRQPPGKGLE...,DIQMAQSPSSLSASVGDRVTITCRASQAVSSALTWYQQIPGKSPKL...
2,31#human_0,-0.142,human_heavy,-0.568,human_kappa,EVQLVQSGAEVKKPGESLKISCKASGYTFTDYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATISCKSSQSLLNTNSQKNYIAWYQQKP...
3,56#human_0,-1.133,human_heavy,-0.853,human_kappa,QVQLQESGPRLVEPSETLSLTCTVSGYSITSDYDWGWIRQPPGRGL...,DTLLTQSPDHLSVTLGERATISCRASQNIGTSLNWYQQKPGQSPKR...
4,74#human_0,-0.407,human_heavy,-0.267,human_kappa,EVQLVQSGAEVKKPGESLKISCQVSGYTFINYWIGWVRQMPGKGLE...,DIVMTQSPDSLTVSVGERATINCRSSQTLLNSNTQKNYVTWYQQKA...
...,...,...,...,...,...,...,...
311,149G11human_0,-0.847,human_heavy,-0.385,human_kappa,EVRLVQSGAEVKKPGESLTISCKISGYTFTNYWIGWVRQMPGKGLE...,DIVMTQSPDSLAVSLGERATFNCKSSQNLLYNSNQKSYLAWYQQKP...
312,mu515H7human_0,0.612,human_heavy,-0.864,human_kappa,EVQLVESGGGLVQPGGSLRLSCATSGFTFTDNYMDWVRQAPGKGLE...,IVLTQSPDSLAVSLGERATINCKSSQSLFNSRTRKNYLTWYQQKPG...
313,mu16B5human_0,-1.985,human_heavy,-1.884,human_kappa,QVRLVQSGAEVKKPGASVGVSCKASGFNIKDIYIHWVRQVPGQGPE...,DIVMTQFPNSLAVSLGERATINCKSSQSLLNSRTRKNYLSWYQKKA...
314,21B12human_0,-0.432,human_heavy,-0.390,human_kappa,QVQLVQSGSELKKPGASVKVSCKASGYTFTNYGLNWVRQAPGQGLE...,DIVMTQSPDSLAVSLGERATIDCKSSQSLLYSSNQKNYLTWYQQKP...


In [None]:
sns.kdeplot(zscore_df['H_Score'], fill=True, label='VH', color='gray')
sns.kdeplot(zscore_df['L_score'], fill=True, label='VL', color='cyan')

plt.xlabel("Score")
plt.ylabel("Density")
plt.title("Z-Score Distributions")
plt.legend()

In [35]:
zscore_df[zscore_df['L-gene'] == 'human_lambda']

Unnamed: 0,Raw_name,H_Score,H_gene,L_score,L-gene,H_seq,L_seq
245,18V4Fhuman_0,0.233,human_heavy,-2.322,human_lambda,QVPLVESGGGVVQPGKSLRLACAASGFSLTGYGMHWVRQAPGKGLE...,QTVVTQEPSLTVSPGGTVTFTCASSTGAVTTSNYPSWFQQKLGQAP...
249,8G8human_0,-0.919,human_heavy,-3.229,human_lambda,QVQLVQSGSELKNPGASVKVSCKAFGYTFTNYGINWVRQAPGQGLE...,LPVLTQPPAVSASLGASIKLTCTLSSQHSTYTIEWSQQRPERSPRF...
258,2D9human_0,0.398,human_heavy,-0.975,human_lambda,EVQVLESGGGSVQPGGSLRLSCAASGISLSSFAESWVRQAPGKGLE...,QTVVTQEPSVSVSPGGTVTLTCGLSTGAVSTSNYLSWYQQTPGQAP...
263,SP34human_0,0.076,human_heavy,-2.273,human_lambda,EVQLVESGGGLVQPGGSLTLSCAASGFTFNTYAMQWVRQASGKGLE...,QTVVTQEPSLTVSPGGTVTLTCASSTGAVTTSNYPNWLQQKPGQAP...
308,anti-CD3 antiboyhuman_0,-0.112,human_heavy,-1.816,human_lambda,EVQLVESGGGLVQPGESLRLSCVASGFTFNTYAMHWVRQASGKGLE...,QTVVTQEPSVTVSPGGTVTLTCGLSTGAVTTSNYPNWFQQKPGQAP...
315,mu19F6human_0,1.179,human_heavy,-0.095,human_lambda,EVQLVESGGALVQPGGSLRLSCTASGFTFSKYVMVWVRQAPGKGLE...,NLVLTQPHSVSGSPGKTVTISCTRSSGDIGDSYVHWYRQRPGSSPI...


In [24]:
test_chain

DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPPTFGQGTRLEIK
                          ^^^^^^                 ^^^                                    ^^^^^^^^^          