In [1]:
%reload_ext autoreload
%autoreload 2

from pathlib import Path

import pandas as pd

from cpra.cpra_calculation import (
    _cpra_donor_filtering_single,
    _cpra_haplotype_frequency_single,
)
from cpra.pdf_extraction.v2503 import (
    extract_antibody_results,
    extract_patient_info,
    extract_pdf_tables,
    extract_unacceptable_antigens,
)

In [2]:
data_dir = Path("/mnt/nfs/home/wenruiwu/projects/cpra/data_private")

# 01. Extract unacceptable antigens from LSA PDF (BFR)

In [3]:
KW_ANTIBODY = ["特异性", "结果判读"]
KW_PATIENT_INFO = ["患者姓名", "患者性别", "出生日期"]
KW_SPECIFICITY = ["Specificity", "特异性"]
KW_MFI = ["MFI", "荧光中位值"]

In [4]:
df_list = extract_pdf_tables(data_dir / "LZY_ZSDY_25LSA04176_20250325_25Y00961.pdf")

Successfully opened PDF file: /mnt/nfs/home/wenruiwu/projects/cpra/data_private/LZY_ZSDY_25LSA04176_20250325_25Y00961.pdf
Total pages: 7

--- Processing page 1 ---
Found 3 table(s) on page 1.

--- Processing page 2 ---
Found 1 table(s) on page 2.

--- Processing page 3 ---
Found 1 table(s) on page 3.

--- Processing page 4 ---
Found 2 table(s) on page 4.

--- Processing page 5 ---
Found 1 table(s) on page 5.

--- Processing page 6 ---
Found 1 table(s) on page 6.

--- Processing page 7 ---
Found 1 table(s) on page 7.


In [5]:
df_antibody_results = extract_antibody_results(df_list, header_keywords=KW_ANTIBODY)
print(df_antibody_results[df_antibody_results["tag"] == "HLA_I"].head())
print(df_antibody_results[df_antibody_results["tag"] == "HLA_II"].head())

  特异性_Specificity 血清学_Serology 抗原表位_Epitope 荧光中位值_MFI 结果判读_Result    tag
0         A*02:03         A203         107W     16860          强阳  HLA_I
1         A*02:05           A2    152V,107W     16809          强阳  HLA_I
2         A*68:01          A68         152V     16733          强阳  HLA_I
3         A*02:02           A2    152V,107W     16642          强阳  HLA_I
4         A*01:01           A1         66NM     16546          强阳  HLA_I
           特异性_Specificity 血清学_Serology 抗原表位_Epitope 荧光中位值_MFI 结果判读_Result  \
96              DRB1*07:01          DR7          57V     21589          强阳   
97   DQA1*05:01 DQB1*02:02          DQ2         130R     21386          强阳   
98   DQA1*05:01 DQB1*03:01          DQ7         130R     21175          强阳   
99   DQA1*05:01 DQB1*02:01          DQ2         130R     21003          强阳   
100  DQA1*02:01 DQB1*02:02          DQ2         130R     20974          强阳   

        tag  
96   HLA_II  
97   HLA_II  
98   HLA_II  
99   HLA_II  
100  HLA_II  


In [6]:
df_patient_info = extract_patient_info(df_list, header_keywords=KW_PATIENT_INFO)
# print(df_patient_info)
print(df_patient_info.columns)

Index(['患者姓名_Patient Name', '患者性别_Gender', '出生日期_Birth date', '患者年龄_Age',
       '送检单位/个人_Sample Origin', '送检医生_Doctor Name', '联系电话_Contact No.',
       '临床诊断_Diagnosis', '病房/床号_Room/Bed No.', '检测目的_Purpose',
       '病历号_Medical Record No.', '移植日期_Transplant Date',
       '采样日期_Collection Date', '接收日期_Receiving Date', '样本编号_Sample ID',
       '样本类型_Sample Type', '检测项目_Test Name', '检测方法_Test Method'],
      dtype='object')


In [7]:
unacceptable_antigens = extract_unacceptable_antigens(
    df_antibody_results,
    specificity_keywords=KW_SPECIFICITY,
    mfi_keywords=KW_MFI,
    mfi_cutoff=750,
)
print(unacceptable_antigens)

['A*02:03', 'A*02:05', 'A*68:01', 'A*02:02', 'A*01:01', 'A*68:02', 'B*15:02', 'B*51:01', 'A*25:01', 'B*78:01', 'A*02:01', 'B*15:18', 'B*57:01', 'B*18:01', 'B*08:01', 'B*14:01', 'B*15:01', 'B*15:16', 'A*32:01', 'B*15:13', 'B*59:01', 'A*69:01', 'B*07:02', 'B*14:02', 'B*54:01', 'B*50:01', 'B*53:01', 'B*35:01', 'A*80:01', 'B*49:01', 'B*42:01', 'B*39:01', 'B*35:08', 'B*45:01', 'B*15:03', 'B*38:01', 'B*56:01', 'A*29:02', 'B*81:01', 'B*27:08', 'B*58:01', 'B*44:03', 'B*15:12', 'B*41:01', 'B*52:01', 'C*08:01', 'B*82:02', 'B*44:02', 'B*67:01', 'B*37:01', 'B*55:01', 'B*27:03', 'C*06:02', 'B*27:05', 'A*23:01', 'C*08:02', 'A*11:02', 'A*29:01', 'A*24:02', 'A*24:03', 'B*40:01', 'C*05:01', 'A*33:01', 'B*07:03', 'C*18:01', 'B*40:02', 'A*33:03', 'C*04:03', 'B*47:01', 'A*36:01', 'A*34:02', 'C*15:02', 'C*17:01', 'C*02:02', 'C*07:02', 'C*12:02', 'A*31:01', 'C*07:01', 'B*73:01', 'B*48:01', 'A*66:02', 'A*30:01', 'C*04:01', 'C*14:02', 'A*03:01', 'C*16:01', 'C*03:03', 'C*03:04', 'A*74:01', 'A*66:01', 'C*01:02'

# 02. CPRA calculation

In [8]:
COLUMN_UAS = {"recipient_id": "id", "unacceptable_antigen": "uas"}
COLUMN_HLA = {"donor_id": "id", "hla": "hla"}
COLUMN_HAPLOTYPE = {"frequency": "freq"}

In [9]:
df_hla = pd.read_csv(data_dir / "250914_donor_hla.csv")
print(df_hla.head())

        id      hla
0  391;392  A*02:03
1  314;316  A*11:01
2      717  A*02:01
3        7  A*02:06
4      713  A*02:07


In [10]:
df_haplotype = pd.read_csv(data_dir / "250914_haplotype_frequency.csv")
print(df_haplotype.head())

         A        B        C        DQB1        DRB1      freq
0  A*33:03  B*58:01  C*03:02  DQB1*02:01  DRB1*03:01  0.039326
1  A*02:07  B*46:01  C*01:02  DQB1*03:03  DRB1*09:01  0.035915
2  A*11:01  B*15:02  C*08:01  DQB1*03:01  DRB1*12:02  0.025820
3  A*11:01  B*46:01  C*01:02  DQB1*03:03  DRB1*09:01  0.018996
4  A*02:03  B*38:02  C*07:02  DQB1*05:02  DRB1*16:02  0.014012


In [11]:
cpra_donor_filtering = _cpra_donor_filtering_single(
    unacceptable_antigens,
    donor_hla=df_hla,
    donor_hla_columns=COLUMN_HLA,
)

cpra_haplotype_frequency = _cpra_haplotype_frequency_single(
    unacceptable_antigens,
    haplotype_frequency=df_haplotype,
    loci=["A", "B", "C", "DRB1", "DQB1"],
    haplotype_frequency_column=COLUMN_HAPLOTYPE,
)
print(cpra_donor_filtering, cpra_haplotype_frequency)

100.0 100.0


# 03. Summary

In [12]:
print("===== Patient Information =====\n")
for col, value in zip(df_patient_info.columns, df_patient_info.values[0]):
    # print(f"{col}: {value}")
    print(f"{col}: ---")

print("\n===== CPRA =====\n")

print(f"Donor Filtering: {cpra_donor_filtering:.2f}%")
print(f"Haplotype Frequency: {cpra_haplotype_frequency:.2f}%\n")


===== Patient Information =====

患者姓名_Patient Name: ---
患者性别_Gender: ---
出生日期_Birth date: ---
患者年龄_Age: ---
送检单位/个人_Sample Origin: ---
送检医生_Doctor Name: ---
联系电话_Contact No.: ---
临床诊断_Diagnosis: ---
病房/床号_Room/Bed No.: ---
检测目的_Purpose: ---
病历号_Medical Record No.: ---
移植日期_Transplant Date: ---
采样日期_Collection Date: ---
接收日期_Receiving Date: ---
样本编号_Sample ID: ---
样本类型_Sample Type: ---
检测项目_Test Name: ---
检测方法_Test Method: ---

===== CPRA =====

Donor Filtering: 100.00%
Haplotype Frequency: 100.00%

