### サンプルごとのHN-scoreをカウントする

#### 必要なファイル

1. メタデータを記載したファイル
2. 発現比を計算したファイル 

&nbsp;

#### 基本的なアプローチ

- 発現比が計算されているファイルのカラム名(例：SRR22741460)をメタデータに記載されているサンプル名に置き換えることでサンプルごとにカウントする
- サンプルごとにカウントしたHN-scoreをもとに、HN-scoreを算出する

&nbsp;

#### バージョンチェック

- 以下のバージョンで実行
- `python==3.10.12`
- `polars==1.9.0`

In [1]:
import datetime
import polars as pl

In [2]:
now = datetime.datetime.now()
print(now)

2024-11-03 03:54:09.144141


In [3]:
# set the threshold
threshold = 5

In [4]:
# polars method: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.str.replace_all.html#polars-expr-str-replace-all

metadata = pl.read_csv(
    "../Data/Data_HNscore/HS_rice_meta-data.csv",
    separator=","
).with_columns([
    pl.col("Tissue") # サンプル名を処理しやすくするために、スペースなどをなるべく取り除く
        .str.replace_all(",", " ")  # , to space
        .str.replace_all(r"\s+", " ")  # replace continuous spaces with one space
        .str.replace_all(r"[()]", "")  # remove parentheses (  )
        .str.replace_all(r"\s+$", "")  # remove trailing spaces ('\s+' matches any whitespace character, and '$' matches the end of the string)
        .str.replace_all(" ", "-")  # replace remaining spaces with hyphen
    ]
)
display(metadata.head())

All-pair,Project-pair,Organism,Sub-species,Cultivar,Genotype,SRP accession,GEO Accession,Stress,Control,Library_Layout,Stress temperature (day/night) (℃),Control temperature (day/night)(℃),Time,Heat recovery,Treatment condition,Tissue,Period,Instrument,LibrarySelection,DOI,GSM_Pair_name,note,Technical Note
i64,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
1,1,"""O.sativa""","""ssp. japonica""","""Lagrue""","""WT""","""SRP413097""","""GSE220996""","""SRR22741460""","""SRR22741464""","""PAIRED""","""30/28""","""30/22.2""","""10h""",,"""green house""","""seed-endosperm-R6-caryopsis""","""R2 stage (booting) - R6 stage …","""Illumina HiSeq 2000""","""cDNA""","""10.1038/s41598-023-31399-w""",,"""LaGrue(HNT-tolerant), high-nig…",
2,2,"""O.sativa""","""ssp. japonica""","""Lagrue""","""WT""","""SRP413097""","""GSE220996""","""SRR22741461""","""SRR22741465""","""PAIRED""","""30/28""","""30/22.2""","""10h""",,"""green house""","""seed-endosperm-R6-caryopsis""","""R2 stage (booting) - R6 stage …","""Illumina HiSeq 2000""","""cDNA""","""10.1038/s41598-023-31399-w""",,"""LaGrue(HNT-tolerant), high-nig…",
3,3,"""O.sativa""","""ssp. japonica""","""Lagrue""","""WT""","""SRP413097""","""GSE220996""","""SRR22741462""","""SRR22741466""","""PAIRED""","""30/28""","""30/22.2""","""10h""",,"""green house""","""seed-endosperm-R6-caryopsis""","""R2 stage (booting) - R6 stage …","""Illumina HiSeq 2000""","""cDNA""","""10.1038/s41598-023-31399-w""",,"""LaGrue(HNT-tolerant), high-nig…",
4,4,"""O.sativa""","""ssp. japonica""","""Lagrue""","""WT""","""SRP413097""","""GSE220996""","""SRR22741463""","""SRR22741467""","""PAIRED""","""30/28""","""30/22.2""","""10h""",,"""green house""","""seed-endosperm-R6-caryopsis""","""R2 stage (booting) - R6 stage …","""Illumina HiSeq 2000""","""cDNA""","""10.1038/s41598-023-31399-w""",,"""LaGrue(HNT-tolerant), high-nig…",
5,5,"""O.sativa""","""ssp. japonica""","""Cypress""","""WT""","""SRP413097""","""GSE220996""","""SRR22741468""","""SRR22741472""","""PAIRED""","""30/28""","""30/22.2""","""10h""",,"""green house""","""seed-endosperm-R6-caryopsis""","""R2 stage (booting) - R6 stage …","""Illumina HiSeq 2000""","""cDNA""","""10.1038/s41598-023-31399-w""",,"""Cypress(HNT-sensitive), high-n…",


In [5]:
HNratio = pl.read_csv("../Data/Data_HNscore/HNratio_rice_240806_all.csv", separator=",")
display(HNratio)

GENEID,SRR22741460,SRR22741461,SRR22741462,SRR22741463,SRR22741468,SRR22741469,SRR22741470,SRR22741471,SRR10991576,SRR10991577,SRR10991578,SRR10991582,SRR10991583,SRR10991584,SRR8140273,SRR8140274,SRR8140275,SRR8140276,SRR8140277,SRR8140278,SRR8140285,SRR8140286,SRR8140287,SRR8140288,SRR8140289,SRR8140290,SRR10423442,SRR10423443,SRR10423444,SRR10423430,SRR10423431,SRR10423432,SRR10423433,SRR10423434,SRR10423435,SRR10423418,…,SRR15060432,SRR15060433,SRR15060434,SRR15060435,SRR15060436,SRR15060437,SRR15060438,SRR15060439,SRR15060440,SRR15060441,SRR15060442,SRR15060443,SRR15060444,SRR15060445,SRR15060446,SRR15060447,SRR15060448,SRR15060449,SRR15060450,SRR15060451,SRR15060452,SRR15060453,SRR15060454,SRR15060455,SRR15060456,SRR22854097,SRR22854098,SRR22854099,SRR22854094,SRR22854095,SRR22854096,SRR23051879,SRR23051878,SRR23051877,SRR23051876,SRR23051875,SRR23051874
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""EPlORYSAT000373610""",1.0,1.0,64.003051,64.326859,0.015755,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,188.995833,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,200.462188,1.0,1.0,1.0,0.017064,1.0,70.185139,1.284229,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373621""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373643""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373795""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373851""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Os12g0641100""",0.330482,0.881209,0.498074,0.510708,0.808446,0.986463,1.23701,0.690745,1.967604,2.090501,1.667619,0.663144,0.915042,1.175697,0.290975,0.287567,0.243554,0.240388,0.361402,0.367039,0.361091,0.361422,0.356863,0.362518,0.45673,0.463282,5.218504,3.569581,3.710993,2.168091,1.36272,1.019475,2.594985,1.684937,1.135803,10.381417,…,1.254025,0.520597,1.459464,0.983362,0.992884,1.093461,1.780748,1.34706,1.17272,2.118824,1.495923,1.029656,0.80882,1.615215,1.139736,0.948756,0.962316,0.721113,1.061525,1.037891,1.287411,2.775175,0.525017,2.270865,0.577094,0.639495,0.560287,0.520431,0.792163,0.735383,0.738452,1.377318,0.974089,0.60175,0.558296,0.972543,1.624633
"""Os12g0641200""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.34581,1.337851,0.722855,0.202596,0.310684,0.482447,2.147339,2.66812,2.470606,2.420584,1.210811,1.68822,1.782365,2.02234,0.864469,0.57654,2.820844,3.900592,1.0,1.0,1.363146,0.224637,2.561818,5.592903,0.891283,1.0,2.886571,0.399646,…,1.626003,0.554418,2.327543,0.708659,1.253904,0.747667,1.21419,1.394378,0.3072,1.54719,0.853705,0.422481,0.317257,2.731734,1.396359,1.205544,0.483574,0.733506,0.479358,0.468497,1.266185,1.540311,0.340326,0.938826,0.744195,0.902789,0.293271,0.734621,2.713237,0.723214,1.469847,8.251797,12.227851,3.864073,1.986833,0.582545,14.041889
"""Os12g0641300""",1.0,1.0,1.0,1.0,0.316684,1.0,1.0,1.0,1.814044,1.959658,1.651451,0.363303,0.489477,0.513967,2.183846,2.08417,1.371521,1.332527,1.70133,1.59343,2.331243,2.217381,1.709113,1.78951,2.844805,2.677968,0.253169,0.211079,0.145295,1.157339,1.6192,1.448443,1.300356,2.497598,1.559386,1.36379,…,1.137144,0.462542,1.408695,0.915513,0.854722,0.741904,1.555971,1.282565,1.182925,1.995457,1.473107,1.037217,0.94501,2.389791,1.674603,1.253617,1.145706,0.964097,1.156783,1.536818,0.871979,1.831006,0.543172,2.212088,0.656573,1.132524,0.721943,0.54759,1.798458,1.377382,1.720096,1.417665,1.672338,1.197103,0.377292,1.064871,1.447654
"""Os12g0641400""",0.656297,0.944844,0.682396,0.827123,0.600326,1.508617,0.951318,0.385374,1.11855,1.06925,1.000182,0.951502,1.288889,1.349027,0.777332,0.783799,0.715868,0.711444,0.690354,0.699451,0.817806,0.806047,0.577829,0.580564,0.900966,0.90491,2.492621,3.345792,2.583717,1.302779,1.227777,0.929066,1.208109,1.311805,0.985016,1.73784,…,1.14999,0.401833,1.203462,0.904723,0.752594,0.545916,0.987183,0.839881,0.904542,1.304546,1.016111,0.715164,0.688706,1.305155,0.979802,0.888161,0.72413,0.595877,0.785112,0.914574,0.664386,1.343614,0.443142,1.710404,0.427035,0.573803,0.430716,0.43136,0.943933,0.783158,0.879249,0.73665,1.022858,0.340235,0.377274,0.546013,1.023622


In [6]:
rename_dict = dict(
    zip( # 発現比を記載したファイルのカラム名に該当する値をメタデータのカラムから選択し､それに対応するサンプル名を辞書型として格納する
        metadata['Stress'], metadata['Tissue'] # 例: メタデータの｢Stress｣カラムに｢SRR22741460｣があったら､対応するサンプルは｢Tissue｣カラムの｢"seed-endosperm-R6-caryopsis"｣
    )
)

print(rename_dict)

# HNratioデータフレームのカラム名を置き換える
new_columns = []
seen = set()
for col in HNratio.columns:
    new_col = rename_dict.get(col, col)
    if new_col in seen:
        suffix = 1
        while f"{new_col}_{suffix}" in seen:
            suffix += 1
        new_col = f"{new_col}_{suffix}"
    seen.add(new_col)
    new_columns.append(new_col)

HNratio = HNratio.rename(dict(zip(HNratio.columns, new_columns)))

display(HNratio)

{'SRR22741460': 'seed-endosperm-R6-caryopsis', 'SRR22741461': 'seed-endosperm-R6-caryopsis', 'SRR22741462': 'seed-endosperm-R6-caryopsis', 'SRR22741463': 'seed-endosperm-R6-caryopsis', 'SRR22741468': 'seed-endosperm-R6-caryopsis', 'SRR22741469': 'seed-endosperm-R6-caryopsis', 'SRR22741470': 'seed-endosperm-R6-caryopsis', 'SRR22741471': 'seed-endosperm-R6-caryopsis', 'SRR10991576': '3-week-old-seedling', 'SRR10991577': '3-week-old-seedling', 'SRR10991578': '3-week-old-seedling', 'SRR10991582': '3-week-old-seedling', 'SRR10991583': '3-week-old-seedling', 'SRR10991584': '3-week-old-seedling', 'SRR8140273': 'seedling-8-day-old', 'SRR8140274': 'seedling-8-day-old', 'SRR8140275': 'seedling-8-day-old', 'SRR8140276': 'seedling-8-day-old', 'SRR8140277': 'seedling-8-day-old', 'SRR8140278': 'seedling-8-day-old', 'SRR8140285': 'seedling-8-day-old', 'SRR8140286': 'seedling-8-day-old', 'SRR8140287': 'seedling-8-day-old', 'SRR8140288': 'seedling-8-day-old', 'SRR8140289': 'seedling-8-day-old', 'SRR814

GENEID,seed-endosperm-R6-caryopsis,seed-endosperm-R6-caryopsis_1,seed-endosperm-R6-caryopsis_2,seed-endosperm-R6-caryopsis_3,seed-endosperm-R6-caryopsis_4,seed-endosperm-R6-caryopsis_5,seed-endosperm-R6-caryopsis_6,seed-endosperm-R6-caryopsis_7,3-week-old-seedling,3-week-old-seedling_1,3-week-old-seedling_2,3-week-old-seedling_3,3-week-old-seedling_4,3-week-old-seedling_5,seedling-8-day-old,seedling-8-day-old_1,seedling-8-day-old_2,seedling-8-day-old_3,seedling-8-day-old_4,seedling-8-day-old_5,seedling-8-day-old_6,seedling-8-day-old_7,seedling-8-day-old_8,seedling-8-day-old_9,seedling-8-day-old_10,seedling-8-day-old_11,caryopsis-7-days-after-pollination,caryopsis-7-days-after-pollination_1,caryopsis-7-days-after-pollination_2,caryopsis-7-days-after-pollination_3,caryopsis-7-days-after-pollination_4,caryopsis-7-days-after-pollination_5,caryopsis-7-days-after-pollination_6,caryopsis-7-days-after-pollination_7,caryopsis-7-days-after-pollination_8,caryopsis-7-days-after-pollination_9,…,Base_23,Base_24,Base_25,Base_26,Sheath,Sheath_1,Sheath_2,Sheath_3,Sheath_4,Sheath_5,Sheath_6,Sheath_7,Sheath_8,Sheath_9,Sheath_10,Sheath_11,Sheath_12,Sheath_13,Sheath_14,Sheath_15,Sheath_16,Sheath_17,Sheath_18,Sheath_19,Sheath_20,Whole-shoot,Whole-shoot_1,Whole-shoot_2,Whole-shoot_3,Whole-shoot_4,Whole-shoot_5,Whole-plants-7-day-old-seedlings,Whole-plants-7-day-old-seedlings_1,Whole-plants-7-day-old-seedlings_2,Whole-plants-7-day-old-seedlings_3,Whole-plants-7-day-old-seedlings_4,Whole-plants-7-day-old-seedlings_5
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""EPlORYSAT000373610""",1.0,1.0,64.003051,64.326859,0.015755,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,188.995833,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,200.462188,1.0,1.0,1.0,0.017064,1.0,70.185139,1.284229,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373621""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373643""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373795""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
"""EPlORYSAT000373851""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Os12g0641100""",0.330482,0.881209,0.498074,0.510708,0.808446,0.986463,1.23701,0.690745,1.967604,2.090501,1.667619,0.663144,0.915042,1.175697,0.290975,0.287567,0.243554,0.240388,0.361402,0.367039,0.361091,0.361422,0.356863,0.362518,0.45673,0.463282,5.218504,3.569581,3.710993,2.168091,1.36272,1.019475,2.594985,1.684937,1.135803,10.381417,…,1.254025,0.520597,1.459464,0.983362,0.992884,1.093461,1.780748,1.34706,1.17272,2.118824,1.495923,1.029656,0.80882,1.615215,1.139736,0.948756,0.962316,0.721113,1.061525,1.037891,1.287411,2.775175,0.525017,2.270865,0.577094,0.639495,0.560287,0.520431,0.792163,0.735383,0.738452,1.377318,0.974089,0.60175,0.558296,0.972543,1.624633
"""Os12g0641200""",1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.34581,1.337851,0.722855,0.202596,0.310684,0.482447,2.147339,2.66812,2.470606,2.420584,1.210811,1.68822,1.782365,2.02234,0.864469,0.57654,2.820844,3.900592,1.0,1.0,1.363146,0.224637,2.561818,5.592903,0.891283,1.0,2.886571,0.399646,…,1.626003,0.554418,2.327543,0.708659,1.253904,0.747667,1.21419,1.394378,0.3072,1.54719,0.853705,0.422481,0.317257,2.731734,1.396359,1.205544,0.483574,0.733506,0.479358,0.468497,1.266185,1.540311,0.340326,0.938826,0.744195,0.902789,0.293271,0.734621,2.713237,0.723214,1.469847,8.251797,12.227851,3.864073,1.986833,0.582545,14.041889
"""Os12g0641300""",1.0,1.0,1.0,1.0,0.316684,1.0,1.0,1.0,1.814044,1.959658,1.651451,0.363303,0.489477,0.513967,2.183846,2.08417,1.371521,1.332527,1.70133,1.59343,2.331243,2.217381,1.709113,1.78951,2.844805,2.677968,0.253169,0.211079,0.145295,1.157339,1.6192,1.448443,1.300356,2.497598,1.559386,1.36379,…,1.137144,0.462542,1.408695,0.915513,0.854722,0.741904,1.555971,1.282565,1.182925,1.995457,1.473107,1.037217,0.94501,2.389791,1.674603,1.253617,1.145706,0.964097,1.156783,1.536818,0.871979,1.831006,0.543172,2.212088,0.656573,1.132524,0.721943,0.54759,1.798458,1.377382,1.720096,1.417665,1.672338,1.197103,0.377292,1.064871,1.447654
"""Os12g0641400""",0.656297,0.944844,0.682396,0.827123,0.600326,1.508617,0.951318,0.385374,1.11855,1.06925,1.000182,0.951502,1.288889,1.349027,0.777332,0.783799,0.715868,0.711444,0.690354,0.699451,0.817806,0.806047,0.577829,0.580564,0.900966,0.90491,2.492621,3.345792,2.583717,1.302779,1.227777,0.929066,1.208109,1.311805,0.985016,1.73784,…,1.14999,0.401833,1.203462,0.904723,0.752594,0.545916,0.987183,0.839881,0.904542,1.304546,1.016111,0.715164,0.688706,1.305155,0.979802,0.888161,0.72413,0.595877,0.785112,0.914574,0.664386,1.343614,0.443142,1.710404,0.427035,0.573803,0.430716,0.43136,0.943933,0.783158,0.879249,0.73665,1.022858,0.340235,0.377274,0.546013,1.023622


In [7]:
# define a function to categorize the HNratio
def categorize(x):
    if x >= threshold:
        return 'UP'
    elif x <= 1 / threshold:
        return 'DOWN'
    else:
        return 'UNCHANGED'

# apply the categorize function to all columns except GENEID
result = HNratio.with_columns([
    pl.col(col).map_elements(categorize, return_dtype=pl.String)
    for col in HNratio.columns if col != 'GENEID'
])

# display the result
display(result)

GENEID,seed-endosperm-R6-caryopsis,seed-endosperm-R6-caryopsis_1,seed-endosperm-R6-caryopsis_2,seed-endosperm-R6-caryopsis_3,seed-endosperm-R6-caryopsis_4,seed-endosperm-R6-caryopsis_5,seed-endosperm-R6-caryopsis_6,seed-endosperm-R6-caryopsis_7,3-week-old-seedling,3-week-old-seedling_1,3-week-old-seedling_2,3-week-old-seedling_3,3-week-old-seedling_4,3-week-old-seedling_5,seedling-8-day-old,seedling-8-day-old_1,seedling-8-day-old_2,seedling-8-day-old_3,seedling-8-day-old_4,seedling-8-day-old_5,seedling-8-day-old_6,seedling-8-day-old_7,seedling-8-day-old_8,seedling-8-day-old_9,seedling-8-day-old_10,seedling-8-day-old_11,caryopsis-7-days-after-pollination,caryopsis-7-days-after-pollination_1,caryopsis-7-days-after-pollination_2,caryopsis-7-days-after-pollination_3,caryopsis-7-days-after-pollination_4,caryopsis-7-days-after-pollination_5,caryopsis-7-days-after-pollination_6,caryopsis-7-days-after-pollination_7,caryopsis-7-days-after-pollination_8,caryopsis-7-days-after-pollination_9,…,Base_23,Base_24,Base_25,Base_26,Sheath,Sheath_1,Sheath_2,Sheath_3,Sheath_4,Sheath_5,Sheath_6,Sheath_7,Sheath_8,Sheath_9,Sheath_10,Sheath_11,Sheath_12,Sheath_13,Sheath_14,Sheath_15,Sheath_16,Sheath_17,Sheath_18,Sheath_19,Sheath_20,Whole-shoot,Whole-shoot_1,Whole-shoot_2,Whole-shoot_3,Whole-shoot_4,Whole-shoot_5,Whole-plants-7-day-old-seedlings,Whole-plants-7-day-old-seedlings_1,Whole-plants-7-day-old-seedlings_2,Whole-plants-7-day-old-seedlings_3,Whole-plants-7-day-old-seedlings_4,Whole-plants-7-day-old-seedlings_5
str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,…,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""EPlORYSAT000373610""","""UNCHANGED""","""UNCHANGED""","""UP""","""UP""","""DOWN""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""DOWN""","""UNCHANGED""","""UP""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
"""EPlORYSAT000373621""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
"""EPlORYSAT000373643""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
"""EPlORYSAT000373795""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
"""EPlORYSAT000373851""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Os12g0641100""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
"""Os12g0641200""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP""","""UP""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UP"""
"""Os12g0641300""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""DOWN""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""
"""Os12g0641400""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""",…,"""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED""","""UNCHANGED"""


In [8]:
# remove the suffix from the column name
def remove_suffix(col_name):
    return col_name.rsplit('_', 1)[0]


# get the sample column list
sample_columns = set(remove_suffix(col) for col in result.columns if col != 'GENEID')

print(sample_columns)

counts = []
for sample in sample_columns: # sample_columnsはサンプル名のリスト
    # get the dataframe for each sample
    sample_df = result.select(
        ['GENEID'] + [col for col in result.columns if remove_suffix(col) == sample]
    )
    # count the number of UP, DOWN, UNCHANGED
    count = sample_df.with_columns([
        (pl.sum_horizontal([
            (pl.col(col) == "UP").cast(pl.Int32) for col in sample_df.columns if col != "GENEID"
        ])).alias(f"UP({sample})"),
        (pl.sum_horizontal([
            (pl.col(col) == "DOWN").cast(pl.Int32) for col in sample_df.columns if col != "GENEID"
        ])).alias(f"DOWN({sample})"),
        pl.sum_horizontal([
            (pl.col(col) == "UNCHANGED").cast(pl.Int32) for col in sample_df.columns if col != "GENEID"
        ]).alias(f"UNCHANGED({sample})")
    ])
    # append the count to the counts list
    counts.append(count)

# combine all samples
final_count = counts[0] # initialize the final_count with the first count
for count in counts[1:]: # [1:] is the list of counts from the second sample to the last sample
    final_count = final_count.join(
        count, 
        on=['GENEID'], 
        how='left',
        coalesce=True
    )

final_count = final_count.select(
    ['GENEID'] + [col for col in final_count.columns if col.startswith('UP(') or col.startswith('DOWN(') or col.startswith('UNCHANGED(')]
)

# (UP,DOWN, UNCHANGEDの3カラム × sample数 (15個) ) + GENEIDカラムのデータフレームを作成
display(final_count)

{'seed-endosperm-R6-caryopsis', 'leaf', 'Tip', 'Base', 'endosperm-grain-filling-stage', 'Spikeletsof-the-middle-third-of-the-panicle', 'Whole-plants-7-day-old-seedlings', 'Whole-shoot', 'Middle', 'seedling-8-day-old', 'Leaf-blades', 'shoot2-week-old', 'Sheath', '3-week-old-seedling', 'caryopsis-7-days-after-pollination'}


GENEID,UP(seed-endosperm-R6-caryopsis),DOWN(seed-endosperm-R6-caryopsis),UNCHANGED(seed-endosperm-R6-caryopsis),UP(leaf),DOWN(leaf),UNCHANGED(leaf),UP(Tip),DOWN(Tip),UNCHANGED(Tip),UP(Base),DOWN(Base),UNCHANGED(Base),UP(endosperm-grain-filling-stage),DOWN(endosperm-grain-filling-stage),UNCHANGED(endosperm-grain-filling-stage),UP(Spikeletsof-the-middle-third-of-the-panicle),DOWN(Spikeletsof-the-middle-third-of-the-panicle),UNCHANGED(Spikeletsof-the-middle-third-of-the-panicle),UP(Whole-plants-7-day-old-seedlings),DOWN(Whole-plants-7-day-old-seedlings),UNCHANGED(Whole-plants-7-day-old-seedlings),UP(Whole-shoot),DOWN(Whole-shoot),UNCHANGED(Whole-shoot),UP(Middle),DOWN(Middle),UNCHANGED(Middle),UP(seedling-8-day-old),DOWN(seedling-8-day-old),UNCHANGED(seedling-8-day-old),UP(Leaf-blades),DOWN(Leaf-blades),UNCHANGED(Leaf-blades),UP(shoot2-week-old),DOWN(shoot2-week-old),UNCHANGED(shoot2-week-old),UP(Sheath),DOWN(Sheath),UNCHANGED(Sheath),UP(3-week-old-seedling),DOWN(3-week-old-seedling),UNCHANGED(3-week-old-seedling),UP(caryopsis-7-days-after-pollination),DOWN(caryopsis-7-days-after-pollination),UNCHANGED(caryopsis-7-days-after-pollination)
str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
"""EPlORYSAT000373610""",2,1,5,11,11,183,1,0,16,1,2,24,0,1,1,0,0,6,0,0,6,1,1,4,1,2,14,0,0,12,0,0,2,3,0,7,2,0,19,0,0,6,0,0,15
"""EPlORYSAT000373621""",0,0,8,0,0,205,0,0,17,0,0,27,0,0,2,0,0,6,0,0,6,0,0,6,0,0,17,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,0,0,15
"""EPlORYSAT000373643""",0,0,8,47,31,127,0,0,17,0,0,27,0,0,2,0,0,6,0,0,6,0,0,6,0,0,17,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,0,0,15
"""EPlORYSAT000373795""",0,0,8,7,15,183,0,0,17,0,0,27,0,0,2,0,0,6,0,0,6,0,0,6,0,0,17,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,0,0,15
"""EPlORYSAT000373851""",0,0,8,7,17,181,0,0,17,0,0,27,0,0,2,0,0,6,0,0,6,0,0,6,0,0,17,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,0,0,15
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Os12g0641100""",0,0,8,3,8,194,1,0,16,3,1,23,0,0,2,1,0,5,0,0,6,0,0,6,0,1,16,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,3,0,12
"""Os12g0641200""",0,0,8,4,6,195,0,0,17,2,0,25,0,0,2,2,0,4,3,0,3,0,0,6,0,0,17,0,0,12,0,0,2,2,0,8,0,0,21,0,0,6,1,1,13
"""Os12g0641300""",0,0,8,4,10,191,1,0,16,3,1,23,1,0,1,1,0,5,0,0,6,0,0,6,0,1,16,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,0,2,13
"""Os12g0641400""",0,0,8,4,12,189,1,0,16,3,1,23,0,0,2,1,0,5,0,0,6,0,0,6,0,1,16,0,0,12,0,0,2,0,0,10,0,0,21,0,0,6,0,0,15


In [9]:
# get the sample name list
sample_names = [col.split('(')[1].split(')')[0] for col in final_count.columns if col.startswith('UP(')]
print(sample_names)

# calculate HNscore for each sample
HNscore_sample_count = final_count.with_columns([
    (
        pl.col(f'UP({sample})') - pl.col(f'DOWN({sample})')
    ).alias(
        f'HN{threshold}({sample})'
    )
    for sample in sample_names # sample_names is the list of sample names
])

# select the columns that start with HN and end with the sample name
HNscore_sample_count = HNscore_sample_count.select(
    ['GENEID'] + 
    [col for col in HNscore_sample_count.columns if col.startswith(f'HN{threshold}(') and col.endswith(')')]
).with_columns( # すべてのサンプルのHNscoreを合計したHNscoreを算出
    pl.sum_horizontal([
        pl.col(col) for col in HNscore_sample_count.columns if col.startswith(f'HN{threshold}(') and col.endswith(')')
    ]).alias(f'HN{threshold}(all)')
).sort(
    by=[f'HN{threshold}(all)'],
    descending=True
)

# save the result to csv
HNscore_sample_count.write_csv(
    f'../Data/Data_HNscore/HNscore_sample_count_HN{threshold}.tsv',
    separator='\t'
)

print
display(HNscore_sample_count)

['seed-endosperm-R6-caryopsis', 'leaf', 'Tip', 'Base', 'endosperm-grain-filling-stage', 'Spikeletsof-the-middle-third-of-the-panicle', 'Whole-plants-7-day-old-seedlings', 'Whole-shoot', 'Middle', 'seedling-8-day-old', 'Leaf-blades', 'shoot2-week-old', 'Sheath', '3-week-old-seedling', 'caryopsis-7-days-after-pollination']


GENEID,HN5(seed-endosperm-R6-caryopsis),HN5(leaf),HN5(Tip),HN5(Base),HN5(endosperm-grain-filling-stage),HN5(Spikeletsof-the-middle-third-of-the-panicle),HN5(Whole-plants-7-day-old-seedlings),HN5(Whole-shoot),HN5(Middle),HN5(seedling-8-day-old),HN5(Leaf-blades),HN5(shoot2-week-old),HN5(Sheath),HN5(3-week-old-seedling),HN5(caryopsis-7-days-after-pollination),HN5(all)
str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
"""Os04g0107900""",-4,184,7,8,2,5,6,6,7,12,2,10,7,6,-3,255
"""Os01g0136100""",-1,194,3,1,1,5,6,6,0,12,1,9,2,6,0,245
"""Os02g0259900""",-4,194,1,1,1,5,6,6,0,12,0,10,0,6,0,238
"""Os02g0259850""",-4,191,2,-1,1,5,6,6,2,12,0,10,0,6,1,237
"""Os03g0245800""",-4,196,2,0,2,5,6,6,0,12,0,10,0,6,-4,237
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Os01g0952800""",0,-108,-12,-14,0,-1,0,-1,-12,-4,1,-2,-18,-2,0,-173
"""Os07g0142100""",0,-107,-14,-20,0,-2,-3,0,-12,0,2,0,-20,0,0,-176
"""Os03g0307200""",-3,-133,-9,-19,0,0,0,-3,-11,0,0,2,-5,0,1,-180
"""Os07g0142200""",0,-102,-17,-18,0,0,-1,-2,-13,-12,1,-1,-16,-4,-1,-186


## HN-score validation


In [10]:
# select HN5 columns (exclude HN5(all))
hn5_columns = [col for col in HNscore_sample_count.columns if col.startswith("HN5(") and col != "HN5(all)"]
display(hn5_columns)

['HN5(seed-endosperm-R6-caryopsis)',
 'HN5(leaf)',
 'HN5(Tip)',
 'HN5(Base)',
 'HN5(endosperm-grain-filling-stage)',
 'HN5(Spikeletsof-the-middle-third-of-the-panicle)',
 'HN5(Whole-plants-7-day-old-seedlings)',
 'HN5(Whole-shoot)',
 'HN5(Middle)',
 'HN5(seedling-8-day-old)',
 'HN5(Leaf-blades)',
 'HN5(shoot2-week-old)',
 'HN5(Sheath)',
 'HN5(3-week-old-seedling)',
 'HN5(caryopsis-7-days-after-pollination)']

In [11]:
# count positive and negative samples
HNscore_sample_validate = HNscore_sample_count.with_columns([
    pl.fold(0, lambda acc, x: acc + (x > 0).cast(pl.Int32), hn5_columns).alias("positive_samples_count"),
    pl.fold(0, lambda acc, x: acc + (x < 0).cast(pl.Int32), hn5_columns).alias("negative_samples_count")
])

# assign tag to the contribution of samples
def tag_contribution(count, type, hn5_column):
    total_samples = len(hn5_column)
    if count == total_samples:
        return f"all samples {type} contribute to the score"
    else:
        return f"{count} / {total_samples} samples {type} contribute to the score"
    
HNscore_sample_validate = HNscore_sample_validate.with_columns(
    [
        pl.lit(len(hn5_columns)).alias("total_samples"),
        pl.col("positive_samples_count").map_elements(lambda x: tag_contribution(x, "positively", hn5_columns), return_dtype=pl.Utf8).alias("positive_contribution"),
        pl.col("negative_samples_count").map_elements(lambda x: tag_contribution(x, "negatively", hn5_columns), return_dtype=pl.Utf8).alias("negative_contribution")
]
).sort(
    by=["HN5(all)"],
    descending=True
).drop(
    hn5_columns
)

# save the result to csv
HNscore_sample_validate.write_csv(
    f'../Data/Data_HNscore/HNscore_sample_validate_HN{threshold}_rice.tsv',
    separator='\t'
)

display(HNscore_sample_validate.head())

GENEID,HN5(all),positive_samples_count,negative_samples_count,total_samples,positive_contribution,negative_contribution
str,i32,i32,i32,i32,str,str
"""Os04g0107900""",255,13,2,15,"""13 / 15 samples positively con…","""2 / 15 samples negatively cont…"
"""Os01g0136100""",245,12,1,15,"""12 / 15 samples positively con…","""1 / 15 samples negatively cont…"
"""Os02g0259900""",238,10,1,15,"""10 / 15 samples positively con…","""1 / 15 samples negatively cont…"
"""Os02g0259850""",237,11,2,15,"""11 / 15 samples positively con…","""2 / 15 samples negatively cont…"
"""Os03g0245800""",237,9,2,15,"""9 / 15 samples positively cont…","""2 / 15 samples negatively cont…"


&nbsp;

&nbsp;


## Search specific gene related to GO term

In [12]:
# Search specific gene related to GO:0097577	sequestering of iron ion
specific_gene = ['Os09g0396900', 'Os11g0106700', 'Os12g0106000']
specific_gene_df = HNscore_sample_count.filter(
    pl.col('GENEID').is_in(specific_gene)
)
display(specific_gene_df)

GENEID,HN5(seed-endosperm-R6-caryopsis),HN5(leaf),HN5(Tip),HN5(Base),HN5(endosperm-grain-filling-stage),HN5(Spikeletsof-the-middle-third-of-the-panicle),HN5(Whole-plants-7-day-old-seedlings),HN5(Whole-shoot),HN5(Middle),HN5(seedling-8-day-old),HN5(Leaf-blades),HN5(shoot2-week-old),HN5(Sheath),HN5(3-week-old-seedling),HN5(caryopsis-7-days-after-pollination),HN5(all)
str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
"""Os09g0396900""",0,93,8,7,0,3,2,0,7,0,0,0,-2,-1,0,117
"""Os12g0106000""",0,33,1,2,1,3,5,0,-1,12,0,0,2,0,0,58
"""Os11g0106700""",-1,25,1,3,1,3,5,0,-1,12,0,0,1,0,0,49


In [13]:
# Search specific gene related to GO:0006879	intracellular iron ion homeostasis
specific_gene_down = ['Os01g0689451', 'Os02g0649900', 'Os02g0650300', 'Os03g0667300', 'Os05g0551000', 'Os07g0257200', 'Os07g0258400']
specific_gene_down_df = HNscore_sample_count.filter(
    pl.col('GENEID').is_in(specific_gene_down)
).sort(
    by=["HN5(all)", "GENEID"],
    descending=[False, False]
)
display(specific_gene_down_df)

GENEID,HN5(seed-endosperm-R6-caryopsis),HN5(leaf),HN5(Tip),HN5(Base),HN5(endosperm-grain-filling-stage),HN5(Spikeletsof-the-middle-third-of-the-panicle),HN5(Whole-plants-7-day-old-seedlings),HN5(Whole-shoot),HN5(Middle),HN5(seedling-8-day-old),HN5(Leaf-blades),HN5(shoot2-week-old),HN5(Sheath),HN5(3-week-old-seedling),HN5(caryopsis-7-days-after-pollination),HN5(all)
str,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32
"""Os07g0258400""",-2,-102,-12,-15,1,0,0,0,-12,-12,1,-2,-18,-3,5,-171
"""Os02g0650300""",0,-118,0,0,0,0,0,-3,0,-12,0,0,0,0,0,-133
"""Os02g0649900""",0,-105,-5,-9,1,1,1,-1,-6,-4,0,0,-8,5,3,-127
"""Os05g0551000""",0,-96,-5,-4,0,2,0,0,-6,-2,0,0,0,0,8,-103
"""Os01g0689451""",0,-83,0,0,0,1,0,0,-4,-2,0,0,0,0,8,-80
"""Os03g0667300""",0,-60,0,0,0,0,0,0,0,-12,0,-2,0,0,-1,-75
"""Os07g0257200""",0,-16,0,1,0,0,-3,-5,0,-12,0,0,0,-4,-4,-43
