In [1]:
import pandas as pd
import requests
import gzip

#### PanelApp

In [None]:
panelapp_url = "https://panelapp.genomicsengland.co.uk/api/v1/panels/"
panel_list = requests.get(panelapp_url).json()["results"]

panelapp_ar_genes = set()
for panel in panel_list:
    panel_id = panel["id"]
    # print(f"Processing PanelApp panel {panel_id} - {panel['name']}")
    panel_detail_url = f"https://panelapp.genomicsengland.co.uk/api/v1/panels/{panel_id}/"
    detail = requests.get(panel_detail_url).json()
    for gene in detail.get("genes", []):
        # print(f"  Processing gene {gene['gene_data']['gene_symbol']}: {gene['mode_of_inheritance']}")
        if "mode_of_inheritance" in gene and "BIALLELIC" in gene["mode_of_inheritance"]:
            print(f"  Found biallelic gene {gene['gene_data']['gene_symbol']}")
            panelapp_ar_genes.add(gene["gene_data"]["gene_symbol"])

print(len(panelapp_ar_genes), "Autosomal recessive genes found in PanelApp")
panelapp_df = pd.DataFrame(sorted(panelapp_ar_genes), columns=["symbol"])
panelapp_df["source"] = "PanelApp"

In [9]:
import pandas as pd

# 1. データの読み込み
gencc_df = pd.read_csv("gencc-submissions.tsv", sep="\t")
hgnc_df = pd.read_csv("HGNC_BioMart_all_results.txt", sep="\t", dtype=str)

# 2. AR遺伝子だけ抽出（'inheritance'列に'Autosomal recessive'が含まれる行）
ar_genes = gencc_df[gencc_df['moi_title'].str.contains("Autosomal recessive", na=False)]

# 3. 遺伝子シンボルの一意なリストを取得
ar_symbols = ar_genes['gene_symbol'].dropna().unique()


In [12]:
hgnc_df.head()

Unnamed: 0,HGNC ID,Status,Approved symbol,Alias symbol,Previous symbol
0,HGNC:5,Approved,A1BG,,
1,HGNC:37133,Approved,A1BG-AS1,FLJ23569,NCRNA00181
2,HGNC:37133,Approved,A1BG-AS1,FLJ23569,A1BGAS
3,HGNC:37133,Approved,A1BG-AS1,FLJ23569,A1BG-AS
4,HGNC:24086,Approved,A1CF,ACF,


In [11]:

# 4. gene_symbol（GenCC）と symbol（HGNC）を照合してHGNC IDを付加
ar_with_hgnc = pd.DataFrame({'gene_symbol': ar_symbols})
merged_df = ar_with_hgnc.merge(hgnc_df[['symbol', 'hgnc_id']], left_on='gene_symbol', right_on='symbol', how='left')

# 5. 出力（必要な列だけ）
merged_df[['gene_symbol', 'hgnc_id']].to_csv("gencc_AR_genes_with_HGNC.tsv", sep="\t", index=False)


KeyError: "None of [Index(['symbol', 'hgnc_id'], dtype='object')] are in the [columns]"

In [None]:
# Define function for symbol conversion to Approved symbol from Alias or Previous symbol
fp_hgnc_all_results = "HGNC_BioMart/HGNC_BioMart_all_results.txt"
df_hgnc_biomart_data = pd.read_table(fp_hgnc_all_results, sep='\t', header=0)

approved_symbols = set(df_hgnc_biomart_data['Approved symbol'])
alias_symbols = set(df_hgnc_biomart_data['Alias symbol'])
prev_symbols = set(df_hgnc_biomart_data['Previous symbol'])

In [5]:
# 1. GenCCのデータ読み込み（公式TSV URLから直接でもOK）
gencc_file = "gencc-submissions.tsv"  # ローカルに保存したTSVファイル名
gencc_df = pd.read_csv(gencc_file, sep="\t")

# 2. autosomal recessive のみ抽出
ar_df = gencc_df[gencc_df["moi_title"].str.contains("Autosomal recessive", na=False)].copy()

# 3. 必要なカラムだけ抽出（例：gene_symbol, hgnc_id）
ar_df = ar_df[["gene_symbol", "hgnc_id"]].drop_duplicates()

# 4. HGNC ID を "HGNC:" 付きに整形（必要に応じて）
ar_df["hgnc_id"] = ar_df["hgnc_id"].apply(lambda x: f"HGNC:{int(x)}" if pd.notnull(x) else "")

# 5. 保存
ar_df.to_csv("gencc_ar_genes.tsv", sep="\t", index=False)


KeyError: "['hgnc_id'] not in index"