In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
import re

## Read phenotype data

In [2]:
phenotype_data = pd.read_csv('phenotype_data.csv', sep=',')#.set_index(['Unnamed: 0'])#columns=drop=True)#.rename(columns={0:'sample', 1:'gene', 2: 'coverage'})
phenotype_data.index.name = None

## drop GrYld (t/ha), duplicate to GrYld (kg/ha)
phenotype_data.drop(columns = ["PrdGrYld(t/ha)"],inplace=True)
phenotype_data.head()

Unnamed: 0,Sort,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49 (no days),HrvPlHt,ZS91 (no days),PPD (no days),RN,GrYld(kg/ha),PrdGrYld(kg/ha),SEGrYld,TlrNo,PlntNo
0,1,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,,,,,,
1,2,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,,,,,,
2,3,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,,,,,,
3,4,Jubilant,RES,2014,NtL,SoP,45.0,95.0,54.0,136.0,0.0,,,,,,
4,5,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,,,,,,


In [3]:
## remove all characters within brackets in columns names
cols = phenotype_data.columns.to_list()
regex = ' ?\(.*?\) ?'
new_cols = []
for col in cols:
    new_col = re.sub(regex, '', col)
    new_cols += [new_col]
phenotype_data.columns = new_cols
phenotype_data.head()

Unnamed: 0,Sort,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo
0,1,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,,,,,,
1,2,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,,,,,,
2,3,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,,,,,,
3,4,Jubilant,RES,2014,NtL,SoP,45.0,95.0,54.0,136.0,0.0,,,,,,
4,5,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,,,,,,


In [4]:
## create experiment name column: study+year+location
phenotype_data['experiment'] = phenotype_data['Year'].astype(str) +'_'+ phenotype_data['Study'].astype(str) +'_'+ phenotype_data['Location'].astype(str)
phenotype_data.head()

Unnamed: 0,Sort,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo,experiment
0,1,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,,,,,,,2014_NtL_SoP
1,2,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,,,,,,,2014_NtL_SoP
2,3,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,,,,,,,2014_NtL_SoP
3,4,Jubilant,RES,2014,NtL,SoP,45.0,95.0,54.0,136.0,0.0,,,,,,,2014_NtL_SoP
4,5,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,,,,,,,2014_NtL_SoP


## remove phenotype outliers for each experiment

### loop through experiment and traits, classify outliers, if the value is outlier, then assign np.nan

In [6]:
experiments = list(phenotype_data['experiment'].unique())

In [13]:
traits = phenotype_data.columns[-12:-1]

In [15]:
phenotype_data.isna().sum()

Sort              0
Variety           0
Reason            0
Year              0
Study          4968
Location       2475
ZS49PlHt       5199
ZS49           3497
HrvPlHt       10992
ZS91           9798
PPD           11199
RN            11703
GrYld          7913
PrdGrYld      10152
SEGrYld       10152
TlrNo          7348
PlntNo        10317
experiment        0
dtype: int64

In [16]:
phenotype_data_clean=phenotype_data

In [17]:
# Define a function to replace outliers with NaN
def replace_outliers_with_nan(column):
    q1 = column.quantile(0.25)
    q3 = column.quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    # return column.apply(lambda x: np.nan if x < lower_bound or x > upper_bound else x)
    column[(column < lower_bound) | (column > upper_bound)] = np.nan
    return column

# Iterate through each trait column for each year and replace outliers with NaN
for trait in traits:
    for experiment in phenotype_data_clean['experiment'].unique():
        mask = (phenotype_data_clean['experiment'] == experiment)
        phenotype_data_clean.loc[mask, trait] = replace_outliers_with_nan(phenotype_data_clean.loc[mask, trait])

In [18]:
phenotype_data_clean.isna().sum()

Sort              0
Variety           0
Reason            0
Year              0
Study          4968
Location       2475
ZS49PlHt       5422
ZS49           3826
HrvPlHt       11005
ZS91           9866
PPD           11201
RN            11738
GrYld          7940
PrdGrYld      10178
SEGrYld       10171
TlrNo          7393
PlntNo        10349
experiment        0
dtype: int64

## Read sequencing accession IDs

In [19]:
accessions_data = pd.read_csv('E-MTAB-7362.sdrf.txt', sep='\t')
accessions_data.head()

Unnamed: 0,Source Name,Comment[ENA_SAMPLE],Comment[BioSD_SAMPLE],Characteristics[organism],Characteristics[cultivar],Characteristics[age],Unit[time unit],Characteristics[developmental stage],Characteristics[genotype],Characteristics[organism part],...,Comment[technical replicate group],Technology Type,Comment[ENA_EXPERIMENT],Scan Name,Comment[SUBMITTED_FILE_NAME],Comment[ENA_RUN],Comment[FASTQ_URI],Comment[SPOT_LENGTH],Comment[READ_INDEX_1_BASE_COORD],Factor Value[cultivar]
0,P1,ERS2903440,SAMEA5092098,Hordeum vulgare subsp. vulgare,P0001 (020055-57),2,week,seedling development stage,wild type genotype,leaf,...,group 1,sequencing assay,ERX2907039,HGLJKBBXX_P1_S383_L002_R1_001.fastq.gz,HGLJKBBXX_P1_S383_L002_R1_001.fastq.gz,ERR2902346,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR290/006/...,302,152,P0001 (020055-57)
1,P1,ERS2903440,SAMEA5092098,Hordeum vulgare subsp. vulgare,P0001 (020055-57),2,week,seedling development stage,wild type genotype,leaf,...,group 1,sequencing assay,ERX2907039,HGLJKBBXX_P1_S383_L002_R2_001.fastq.gz,HGLJKBBXX_P1_S383_L002_R2_001.fastq.gz,ERR2902346,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR290/006/...,302,152,P0001 (020055-57)
2,P1,ERS2903440,SAMEA5092098,Hordeum vulgare subsp. vulgare,P0001 (020055-57),2,week,seedling development stage,wild type genotype,leaf,...,group 1,sequencing assay,ERX2907039,HH2WCBBXX_P1_S536_L006_R1_001.fastq.gz,HH2WCBBXX_P1_S536_L006_R1_001.fastq.gz,ERR2902347,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR290/007/...,302,152,P0001 (020055-57)
3,P1,ERS2903440,SAMEA5092098,Hordeum vulgare subsp. vulgare,P0001 (020055-57),2,week,seedling development stage,wild type genotype,leaf,...,group 1,sequencing assay,ERX2907039,HH2WCBBXX_P1_S536_L006_R2_001.fastq.gz,HH2WCBBXX_P1_S536_L006_R2_001.fastq.gz,ERR2902347,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR290/007/...,302,152,P0001 (020055-57)
4,P2,ERS2903441,SAMEA5092099,Hordeum vulgare subsp. vulgare,P0002 (02043-20),2,week,seedling development stage,wild type genotype,leaf,...,group 2,sequencing assay,ERX2907040,HGLJKBBXX_P2_S391_L002_R1_001.fastq.gz,HGLJKBBXX_P2_S391_L002_R1_001.fastq.gz,ERR2902348,ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR290/008/...,302,152,P0002 (02043-20)


## Read gene coverage data

In [20]:
genes_sample_cov = pd.read_csv("processed_coverage_matrix.csv",sep=',',header=0)
genes_sample_cov.rename(columns={'Unnamed: 0':'ENA_name'}, inplace=True )
genes_sample_cov.head()

Unnamed: 0,ENA_name,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,HORVU.MOREX.r3.1HG0062680,...,HORVU.MOREX.r3.7HG0699010,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260
0,ERS2903440,0.935491,0.901748,0.001374,0.107184,0.011146,0.115887,0.487823,1.309566,0.782961,...,0.520803,0.0,1.024048,0.64692,1.033514,1.076723,0.692114,0.030689,0.218337,0.966486
1,ERS2903441,0.952896,0.904623,0.00117,0.123464,0.014043,0.13663,0.532475,1.212697,0.604154,...,0.527794,0.0,1.009655,0.619661,1.073435,1.082212,0.574312,0.011703,0.176419,0.929491
2,ERS2903442,0.922124,0.926726,0.003186,0.115221,0.010088,0.114513,0.468142,1.238938,0.759823,...,0.478584,0.0,0.986903,0.60177,1.084602,1.013097,0.609381,0.009381,0.206018,0.913982
3,ERS2903444,0.972762,0.95413,0.001597,0.120309,0.011357,0.143732,0.530565,1.197054,0.637565,...,0.500577,0.0,0.96939,0.683879,1.161388,1.014817,0.620885,0.014196,0.136279,0.942064
4,ERS2903445,0.812351,0.799304,0.001957,0.131333,0.010872,0.0985,0.509459,1.332681,0.957599,...,0.476408,0.0,1.081322,0.731898,1.084584,0.976952,0.662535,0.03479,0.251794,0.976517


In [61]:
genes_sample_cov.shape

(502, 161)

In [62]:
accessions_data.shape

(3980, 37)

In [65]:
len(accessions_data['Comment[ENA_SAMPLE]'].unique())

895

## Match gene coverage to phenotype data

In [21]:
## get unique accession ID and name from accession data
acc = accessions_data[['Comment[ENA_SAMPLE]', 'Characteristics[cultivar]']].drop_duplicates().reset_index(drop=True)
acc.columns = ['ENA_name','Accession_ID']
acc.head()

Unnamed: 0,ENA_name,Accession_ID
0,ERS2903440,P0001 (020055-57)
1,ERS2903441,P0002 (02043-20)
2,ERS2903442,P0003 (02055-94)
3,ERS2903443,P0004 (02S169-51-45)
4,ERS2903444,P0005 (04053-034)


In [22]:
acc.shape

(895, 2)

In [23]:
gene_matrix_match_ID = genes_sample_cov.merge(acc,on='ENA_name',how='left')
gene_matrix_match_ID.head()

Unnamed: 0,ENA_name,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,HORVU.MOREX.r3.1HG0062680,...,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260,Accession_ID
0,ERS2903440,0.935491,0.901748,0.001374,0.107184,0.011146,0.115887,0.487823,1.309566,0.782961,...,0.0,1.024048,0.64692,1.033514,1.076723,0.692114,0.030689,0.218337,0.966486,P0001 (020055-57)
1,ERS2903441,0.952896,0.904623,0.00117,0.123464,0.014043,0.13663,0.532475,1.212697,0.604154,...,0.0,1.009655,0.619661,1.073435,1.082212,0.574312,0.011703,0.176419,0.929491,P0002 (02043-20)
2,ERS2903442,0.922124,0.926726,0.003186,0.115221,0.010088,0.114513,0.468142,1.238938,0.759823,...,0.0,0.986903,0.60177,1.084602,1.013097,0.609381,0.009381,0.206018,0.913982,P0003 (02055-94)
3,ERS2903444,0.972762,0.95413,0.001597,0.120309,0.011357,0.143732,0.530565,1.197054,0.637565,...,0.0,0.96939,0.683879,1.161388,1.014817,0.620885,0.014196,0.136279,0.942064,P0005 (04053-034)
4,ERS2903445,0.812351,0.799304,0.001957,0.131333,0.010872,0.0985,0.509459,1.332681,0.957599,...,0.0,1.081322,0.731898,1.084584,0.976952,0.662535,0.03479,0.251794,0.976517,P0006 (04053-099)


In [24]:
gene_matrix_match_ID.drop(columns=['Accession_ID']).to_csv("gene_matrix.csv")

In [25]:
## modify accession_ids names
accession_ids = list(gene_matrix_match_ID['Accession_ID'].values)
gene_matrix_match_ID['Accession_ID'] = [item.strip()[7:len(item)-1].strip() for item in accession_ids]
gene_matrix_match_ID.set_index('ENA_name',inplace=True)
gene_matrix_match_ID.head()

Unnamed: 0_level_0,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,HORVU.MOREX.r3.1HG0062680,HORVU.MOREX.r3.1HG0065060,...,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260,Accession_ID
ENA_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERS2903440,0.935491,0.901748,0.001374,0.107184,0.011146,0.115887,0.487823,1.309566,0.782961,0.543706,...,0.0,1.024048,0.64692,1.033514,1.076723,0.692114,0.030689,0.218337,0.966486,020055-57
ERS2903441,0.952896,0.904623,0.00117,0.123464,0.014043,0.13663,0.532475,1.212697,0.604154,0.554125,...,0.0,1.009655,0.619661,1.073435,1.082212,0.574312,0.011703,0.176419,0.929491,02043-20
ERS2903442,0.922124,0.926726,0.003186,0.115221,0.010088,0.114513,0.468142,1.238938,0.759823,0.484602,...,0.0,0.986903,0.60177,1.084602,1.013097,0.609381,0.009381,0.206018,0.913982,02055-94
ERS2903444,0.972762,0.95413,0.001597,0.120309,0.011357,0.143732,0.530565,1.197054,0.637565,0.483719,...,0.0,0.96939,0.683879,1.161388,1.014817,0.620885,0.014196,0.136279,0.942064,04053-034
ERS2903445,0.812351,0.799304,0.001957,0.131333,0.010872,0.0985,0.509459,1.332681,0.957599,0.574908,...,0.0,1.081322,0.731898,1.084584,0.976952,0.662535,0.03479,0.251794,0.976517,04053-099


In [26]:
gene_matrix_match_ID[gene_matrix_match_ID['HORVU.MOREX.r3.2HG0117260'] > 1.25][['HORVU.MOREX.r3.2HG0117260','Accession_ID']]

Unnamed: 0_level_0,HORVU.MOREX.r3.2HG0117260,Accession_ID
ENA_name,Unnamed: 1_level_1,Unnamed: 2_level_1
ERS2903595,1.310773,BR2**
ERS2903612,1.25568,C01P-53
ERS2903675,1.261148,Chevallier-Ottawa
ERS2903716,1.292744,Dash
ERS2903752,1.256847,Fathom
ERS2903761,1.319839,Flagship
ERS2904110,1.264306,SANALTA
ERS2904350,1.271398,WI4859


In [27]:
gene_matrix_match_ID[gene_matrix_match_ID['HORVU.MOREX.r3.2HG0117260'] < 0.9][['HORVU.MOREX.r3.2HG0117260','Accession_ID']]

Unnamed: 0_level_0,HORVU.MOREX.r3.2HG0117260,Accession_ID
ENA_name,Unnamed: 1_level_1,Unnamed: 2_level_1
ERS2903542,0.843413,ASPLUND SVALOF
ERS2903553,0.744551,B559
ERS2903905,0.742597,I93-608
ERS2903913,0.882113,ICB78-0058-7AP-2AP-1AP-4AP-0AP
ERS2903937,0.855397,INDIAN DWARF (CIho 13994)
ERS2904133,0.889279,Shinonome


### filter phenotype based Variety ID present in Gene coverage matrix accession IDs

In [30]:
phenotype_data_clean.head()

Unnamed: 0,Sort,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo,experiment
0,1,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,,,,,,,2014_NtL_SoP
1,2,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,,,,,,,2014_NtL_SoP
2,3,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,,,,,,,2014_NtL_SoP
3,4,Jubilant,RES,2014,NtL,SoP,45.0,95.0,54.0,136.0,0.0,,,,,,,2014_NtL_SoP
4,5,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,,,,,,,2014_NtL_SoP


In [31]:
gene_matrix_match_ID.head()

Unnamed: 0_level_0,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,HORVU.MOREX.r3.1HG0062680,HORVU.MOREX.r3.1HG0065060,...,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260,Accession_ID
ENA_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERS2903440,0.935491,0.901748,0.001374,0.107184,0.011146,0.115887,0.487823,1.309566,0.782961,0.543706,...,0.0,1.024048,0.64692,1.033514,1.076723,0.692114,0.030689,0.218337,0.966486,020055-57
ERS2903441,0.952896,0.904623,0.00117,0.123464,0.014043,0.13663,0.532475,1.212697,0.604154,0.554125,...,0.0,1.009655,0.619661,1.073435,1.082212,0.574312,0.011703,0.176419,0.929491,02043-20
ERS2903442,0.922124,0.926726,0.003186,0.115221,0.010088,0.114513,0.468142,1.238938,0.759823,0.484602,...,0.0,0.986903,0.60177,1.084602,1.013097,0.609381,0.009381,0.206018,0.913982,02055-94
ERS2903444,0.972762,0.95413,0.001597,0.120309,0.011357,0.143732,0.530565,1.197054,0.637565,0.483719,...,0.0,0.96939,0.683879,1.161388,1.014817,0.620885,0.014196,0.136279,0.942064,04053-034
ERS2903445,0.812351,0.799304,0.001957,0.131333,0.010872,0.0985,0.509459,1.332681,0.957599,0.574908,...,0.0,1.081322,0.731898,1.084584,0.976952,0.662535,0.03479,0.251794,0.976517,04053-099


In [32]:
phenotype_data_filt = phenotype_data_clean[phenotype_data_clean.Variety.isin(gene_matrix_match_ID.Accession_ID)]
phenotype_data_filt.head()

Unnamed: 0,Sort,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo,experiment
0,1,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,,,,,,,2014_NtL_SoP
1,2,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,,,,,,,2014_NtL_SoP
2,3,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,,,,,,,2014_NtL_SoP
4,5,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,,,,,,,2014_NtL_SoP
7,8,I01-176-1,RES,2014,NtL,SoP,65.0,84.0,66.0,119.0,28.0,,,,,,,2014_NtL_SoP


In [33]:
phenotype_data_filt.shape

(5000, 18)

In [34]:
## add gene coverage data coloumn to phenotype
all_data = phenotype_data_filt.merge(gene_matrix_match_ID.reset_index(),left_on='Variety',right_on='Accession_ID',how='left')
all_data = all_data.drop(columns=['Sort','Accession_ID']).reset_index(drop=True)
all_data.head()

Unnamed: 0,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,...,HORVU.MOREX.r3.7HG0699010,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260
0,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,...,0.51455,0.0,0.965461,0.598937,1.095395,0.974823,0.454833,0.020369,0.183325,0.900051
1,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,...,0.476762,0.0,0.925849,0.573107,1.095561,1.062924,0.508094,0.010444,0.16658,0.894517
2,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,...,0.488105,0.0,0.95085,0.613333,1.030065,1.045229,0.620392,0.005229,0.209935,0.814641
3,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,...,0.467646,0.0,1.07925,0.735495,1.058892,1.067617,0.840483,0.022393,0.195143,0.869565
4,I01-176-1,RES,2014,NtL,SoP,65.0,84.0,66.0,119.0,28.0,...,0.512037,0.0,1.077239,0.670165,1.016771,1.012892,0.494923,0.004564,0.157216,0.912037


## Before phenotype filteration, do sample clustering based on cov values for each gene

In [36]:
from sklearn.cluster import KMeans

In [37]:
GENES = [col for col in gene_matrix_match_ID.columns if col.startswith("HORVU")]

In [38]:
df_label_changes = pd.DataFrame(columns=['old_cluster',
                                        'mean',
                                        'new_cluster'])
df_label_changes

Unnamed: 0,old_cluster,mean,new_cluster


In [39]:
df_gene_cluster = gene_matrix_match_ID[['Accession_ID']]
df_gene_cluster.reset_index(inplace=True)
def replace_with_dictionary(array, dictionary):
    return [dictionary.get(value, value) for value in array]
##create empty dataframe to store the cluster label changes for each gene
df_label_changes = pd.DataFrame(columns=['old_cluster',
                                        'mean',
                                        'new_cluster'])

for gene in GENES:    
    coverage_values = gene_matrix_match_ID[gene]
    
    X = [[x] for x in coverage_values]
    num_clusters = 3
    kmeans = KMeans(n_clusters=num_clusters,n_init=10,random_state=42)
    kmeans.fit(X)
    labels = kmeans.labels_
    
    # Calculate the average value for each cluster
    cluster_means = []
    for cluster in range(num_clusters):
        cluster_mean = coverage_values[labels == cluster].mean()
        cluster_means.append([cluster, cluster_mean])
        
    # Create a list of tuples (value, label)
    df_tem = pd.DataFrame(cluster_means,columns=['old_cluster','mean'])
    df_tem.sort_values(by='mean',inplace=True)
    df_tem.reset_index(drop=True,inplace=True)
    df_tem['new_cluster'] = ['low','mid','high']
    df_tem['gene'] = gene
    my_dict = df_tem.set_index('old_cluster')['new_cluster'].to_dict()
    
    df_label_changes = pd.concat([df_label_changes,df_tem])
    
    # replace gene value with cluster label
    ## add gene cluster column
    new_column = replace_with_dictionary(labels,my_dict)
    
    df_gene_cluster = pd.concat([df_gene_cluster,pd.Series(new_column, name=gene)], axis=1)

In [40]:
df_tem

Unnamed: 0,old_cluster,mean,new_cluster,gene
0,0,0.874945,low,HORVU.MOREX.r3.7HG0751260
1,1,0.950664,mid,HORVU.MOREX.r3.7HG0751260
2,2,1.24883,high,HORVU.MOREX.r3.7HG0751260


In [41]:
df_label_changes.to_csv("df_label_changes.csv",index=None)

In [42]:
df_gene_cluster.head()

Unnamed: 0,ENA_name,Accession_ID,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,...,HORVU.MOREX.r3.7HG0699010,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260
0,ERS2903440,020055-57,mid,mid,low,low,mid,mid,low,high,...,high,low,mid,mid,low,high,high,mid,mid,mid
1,ERS2903441,02043-20,high,mid,low,mid,high,high,mid,mid,...,high,low,mid,mid,mid,high,mid,low,mid,mid
2,ERS2903442,02055-94,mid,mid,mid,mid,mid,mid,low,mid,...,mid,low,mid,low,mid,mid,mid,low,mid,mid
3,ERS2903444,04053-034,high,mid,low,mid,mid,high,mid,mid,...,mid,low,mid,mid,mid,mid,mid,low,low,mid
4,ERS2903445,04053-099,low,low,mid,high,mid,low,mid,high,...,low,low,high,high,mid,low,mid,mid,high,mid


In [43]:
df_gene_cluster.to_csv("df_gene_cluster.csv")

## Stategy: 
### extract list of experiment and loop through experiments
### Subset the phenotype data for each experiment, and merge with gene coverage data
### For each experiment, loop through trait
### For each trait, drop missing values, and check number of data point left

In [44]:
all_data.head()

Unnamed: 0,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,...,HORVU.MOREX.r3.7HG0699010,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260
0,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,...,0.51455,0.0,0.965461,0.598937,1.095395,0.974823,0.454833,0.020369,0.183325,0.900051
1,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,...,0.476762,0.0,0.925849,0.573107,1.095561,1.062924,0.508094,0.010444,0.16658,0.894517
2,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,...,0.488105,0.0,0.95085,0.613333,1.030065,1.045229,0.620392,0.005229,0.209935,0.814641
3,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,...,0.467646,0.0,1.07925,0.735495,1.058892,1.067617,0.840483,0.022393,0.195143,0.869565
4,I01-176-1,RES,2014,NtL,SoP,65.0,84.0,66.0,119.0,28.0,...,0.512037,0.0,1.077239,0.670165,1.016771,1.012892,0.494923,0.004564,0.157216,0.912037


In [45]:
phenotype_data_clean.head()

Unnamed: 0,Sort,Variety,Reason,Year,Study,Location,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo,experiment
0,1,Commander,AUS-CTL,2014,NtL,SoP,55.0,92.0,72.0,131.0,29.0,,,,,,,2014_NtL_SoP
1,2,I91-454,RES,2014,NtL,SoP,48.0,92.0,48.0,129.0,26.0,,,,,,,2014_NtL_SoP
2,3,IGB1234,RES,2014,NtL,SoP,42.0,93.0,55.0,137.0,30.0,,,,,,,2014_NtL_SoP
3,4,Jubilant,RES,2014,NtL,SoP,45.0,95.0,54.0,136.0,0.0,,,,,,,2014_NtL_SoP
4,5,HARRINGTON,RES,2014,NtL,SoP,68.0,90.0,82.0,129.0,25.0,,,,,,,2014_NtL_SoP


In [46]:
phenotype_data_clean.columns

Index(['Sort', 'Variety', 'Reason', 'Year', 'Study', 'Location', 'ZS49PlHt',
       'ZS49', 'HrvPlHt', 'ZS91', 'PPD', 'RN', 'GrYld', 'PrdGrYld', 'SEGrYld',
       'TlrNo', 'PlntNo', 'experiment'],
      dtype='object')

In [47]:
target_cols = ['experiment','ENA_name', 'Variety','ZS49PlHt', 'ZS49', 'HrvPlHt','ZS91', 'PPD', 'RN', 'GrYld', 'PrdGrYld', 'SEGrYld','TlrNo', 'PlntNo']
phenotype_data_with_ID = all_data[target_cols]
phenotype_data_with_ID.head()

Unnamed: 0,experiment,ENA_name,Variety,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo
0,2014_NtL_SoP,ERS2903701,Commander,55.0,92.0,72.0,131.0,29.0,,,,,,
1,2014_NtL_SoP,ERS2903896,I91-454,48.0,92.0,48.0,129.0,26.0,,,,,,
2,2014_NtL_SoP,ERS2903928,IGB1234,42.0,93.0,55.0,137.0,30.0,,,,,,
3,2014_NtL_SoP,ERS2903855,HARRINGTON,68.0,90.0,82.0,129.0,25.0,,,,,,
4,2014_NtL_SoP,ERS2903886,I01-176-1,65.0,84.0,66.0,119.0,28.0,,,,,,


In [48]:
## check if any duplicates in phenotype_data_with_ID
phenotype_data_with_ID.duplicated().any()

True

In [49]:
phenotype_data_with_ID.shape

(5000, 14)

In [50]:
## remove duplicates rows
phenotype_data_with_ID = phenotype_data_with_ID.drop_duplicates()
phenotype_data_with_ID.shape

(4990, 14)

In [51]:
phenotype_data_with_ID.dtypes

experiment     object
ENA_name       object
Variety        object
ZS49PlHt      float64
ZS49          float64
HrvPlHt       float64
ZS91          float64
PPD           float64
RN            float64
GrYld         float64
PrdGrYld      float64
SEGrYld       float64
TlrNo         float64
PlntNo        float64
dtype: object

In [52]:
## there still some rows with duplicated ENA_name but varied phenotype
## use groupby to get rid of theses
phenotype_data_with_ID_new = phenotype_data_with_ID.groupby(['experiment','ENA_name','Variety']).mean().reset_index()
phenotype_data_with_ID_new.head()

Unnamed: 0,experiment,ENA_name,Variety,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo
0,2014_18Hrs_SoP,ERS2903440,020055-57,,,,,,,,,,,
1,2014_18Hrs_SoP,ERS2903441,02043-20,,,,,,,,,,,
2,2014_18Hrs_SoP,ERS2903442,02055-94,,,,,,,,,,,
3,2014_18Hrs_SoP,ERS2903444,04053-034,,,,,,,,,,,
4,2014_18Hrs_SoP,ERS2903446,04055-119,,,,,,,,,,,


In [53]:
phenotype_data_with_ID_new.tail()

Unnamed: 0,experiment,ENA_name,Variety,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo
4964,2016_nan_KAT,ERS2904370,XVD21,,,,,,,,,,125.0,
4965,2016_nan_KAT,ERS2904371,XVE7,110.0,105.0,,,,,1929.7,1202.9,0.74,146.0,23.0
4966,2016_nan_KAT,ERS2904372,XVH11,115.0,112.0,,,,,2722.0,2401.2,0.65,167.0,19.0
4967,2016_nan_KAT,ERS2904375,Z019Q008R,95.0,110.0,,,,,3791.3,2895.3,0.74,165.0,24.0
4968,2016_nan_KAT,ERS2904378,ZBC9322,110.0,97.0,,,,,2799.7,2394.8,0.71,116.0,18.0


In [54]:
experiment_list = phenotype_data_with_ID_new['experiment'].unique()
experiment_list

array(['2014_18Hrs_SoP', '2014_NtL_SoP', '2015_TOP1_nan', '2015_TOP2_nan',
       '2015_TOP3_nan', '2015_nan_ESP', '2015_nan_GER', '2015_nan_KAT',
       '2016_18Hrs_SoP', '2016_1NI_MER', '2016_2Ir_MER', '2016_NatL_SoP',
       '2016_nan_ESP', '2016_nan_GER', '2016_nan_KAT'], dtype=object)

In [55]:
trait_list = phenotype_data_with_ID_new.columns[3:15]
trait_list

Index(['ZS49PlHt', 'ZS49', 'HrvPlHt', 'ZS91', 'PPD', 'RN', 'GrYld', 'PrdGrYld',
       'SEGrYld', 'TlrNo', 'PlntNo'],
      dtype='object')

In [56]:
phenotype_data_with_ID_new['experiment'].value_counts()

experiment
2014_NtL_SoP      332
2015_nan_ESP      332
2016_nan_ESP      332
2016_nan_KAT      332
2014_18Hrs_SoP    331
2015_TOP1_nan     331
2015_TOP2_nan     331
2015_TOP3_nan     331
2015_nan_GER      331
2015_nan_KAT      331
2016_18Hrs_SoP    331
2016_1NI_MER      331
2016_2Ir_MER      331
2016_NatL_SoP     331
2016_nan_GER      331
Name: count, dtype: int64

In [57]:
df_gene_cluster.set_index('ENA_name',inplace=True)
df_gene_cluster.head()

Unnamed: 0_level_0,Accession_ID,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,HORVU.MOREX.r3.1HG0062680,...,HORVU.MOREX.r3.7HG0699010,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260
ENA_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERS2903440,020055-57,mid,mid,low,low,mid,mid,low,high,mid,...,high,low,mid,mid,low,high,high,mid,mid,mid
ERS2903441,02043-20,high,mid,low,mid,high,high,mid,mid,low,...,high,low,mid,mid,mid,high,mid,low,mid,mid
ERS2903442,02055-94,mid,mid,mid,mid,mid,mid,low,mid,mid,...,mid,low,mid,low,mid,mid,mid,low,mid,mid
ERS2903444,04053-034,high,mid,low,mid,mid,high,mid,mid,low,...,mid,low,mid,mid,mid,mid,mid,low,low,mid
ERS2903445,04053-099,low,low,mid,high,mid,low,mid,high,high,...,low,low,high,high,mid,low,mid,mid,high,mid


In [58]:
phenotype_data_with_ID_new.head()

Unnamed: 0,experiment,ENA_name,Variety,ZS49PlHt,ZS49,HrvPlHt,ZS91,PPD,RN,GrYld,PrdGrYld,SEGrYld,TlrNo,PlntNo
0,2014_18Hrs_SoP,ERS2903440,020055-57,,,,,,,,,,,
1,2014_18Hrs_SoP,ERS2903441,02043-20,,,,,,,,,,,
2,2014_18Hrs_SoP,ERS2903442,02055-94,,,,,,,,,,,
3,2014_18Hrs_SoP,ERS2903444,04053-034,,,,,,,,,,,
4,2014_18Hrs_SoP,ERS2903446,04055-119,,,,,,,,,,,


In [59]:
gene_matrix_match_ID.head()

Unnamed: 0_level_0,HORVU.MOREX.r3.1HG0024860,HORVU.MOREX.r3.1HG0031260,HORVU.MOREX.r3.1HG0031480,HORVU.MOREX.r3.1HG0036390,HORVU.MOREX.r3.1HG0054220,HORVU.MOREX.r3.1HG0057440,HORVU.MOREX.r3.1HG0058180,HORVU.MOREX.r3.1HG0062390,HORVU.MOREX.r3.1HG0062680,HORVU.MOREX.r3.1HG0065060,...,HORVU.MOREX.r3.7HG0701130,HORVU.MOREX.r3.7HG0705340,HORVU.MOREX.r3.7HG0721170,HORVU.MOREX.r3.7HG0729460,HORVU.MOREX.r3.7HG0729670,HORVU.MOREX.r3.7HG0740230,HORVU.MOREX.r3.7HG0740600,HORVU.MOREX.r3.7HG0742750,HORVU.MOREX.r3.7HG0751260,Accession_ID
ENA_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ERS2903440,0.935491,0.901748,0.001374,0.107184,0.011146,0.115887,0.487823,1.309566,0.782961,0.543706,...,0.0,1.024048,0.64692,1.033514,1.076723,0.692114,0.030689,0.218337,0.966486,020055-57
ERS2903441,0.952896,0.904623,0.00117,0.123464,0.014043,0.13663,0.532475,1.212697,0.604154,0.554125,...,0.0,1.009655,0.619661,1.073435,1.082212,0.574312,0.011703,0.176419,0.929491,02043-20
ERS2903442,0.922124,0.926726,0.003186,0.115221,0.010088,0.114513,0.468142,1.238938,0.759823,0.484602,...,0.0,0.986903,0.60177,1.084602,1.013097,0.609381,0.009381,0.206018,0.913982,02055-94
ERS2903444,0.972762,0.95413,0.001597,0.120309,0.011357,0.143732,0.530565,1.197054,0.637565,0.483719,...,0.0,0.96939,0.683879,1.161388,1.014817,0.620885,0.014196,0.136279,0.942064,04053-034
ERS2903445,0.812351,0.799304,0.001957,0.131333,0.010872,0.0985,0.509459,1.332681,0.957599,0.574908,...,0.0,1.081322,0.731898,1.084584,0.976952,0.662535,0.03479,0.251794,0.976517,04053-099


## All experiments to one file

In [60]:
## create empty dataframe to store stats of experiments and trait
stats_list=['experiment',
            'trait',
            'pheno_count_with_nan',
            'pheno_count_missing',
            'ENA_name',
            'phenotype_value',
            'gene',
            'genotype_cluster',
            'genotype_value',
            'without_nan_count_high',
           'without_nan_count_low',
            'without_nan_count_mid'
           ]
df_stats = pd.DataFrame(columns=stats_list)
array_2D = []
for experiment in experiment_list:
    print(experiment)

    ## From all data, extract data for an experiment
    experiment_pheno = phenotype_data_with_ID_new[phenotype_data_with_ID_new['experiment'] == experiment]
    experiment_pheno.set_index('ENA_name',inplace=True)
    
    ## merge with gene cluster data
    experiment_pheno_gene = experiment_pheno.merge(df_gene_cluster,left_index=True, right_index=True,how='left')
    
    ## loop through traits
    for trait in trait_list:
        print(trait)
        drop_trait_list = list(trait_list)
        drop_trait_list.remove(trait)
        experiment_pheno_gene_trait = experiment_pheno_gene.drop(columns=drop_trait_list)
        
        ##get the number of total lines in experiment
        pheno_count_with_nan = len(experiment_pheno)
        pheno_count_missing = experiment_pheno[trait].isna().sum()
        ## remove missing phenotype lines and count
        experiment_pheno_trait = experiment_pheno[experiment_pheno[trait].notna()]
        
        ## loop through gene, and count low and high samples
        for gene in GENES:
            series = experiment_pheno_gene_trait[gene].value_counts()
            if series.index.str.contains('low').any():
                num_low = series['low']
            else:
                num_low = 0
            if series.index.str.contains('high').any():
                num_high = series['high']
            else:
                num_high = 0
            if series.index.str.contains('mid').any():
                num_mid = series['mid']
            else:
                num_mid = 0
                
            ## loop through accessions, and extract the phenotype value
            for variety in experiment_pheno_trait.index:
                ## extract the phenotype value for variety and trait
                phenotype_value = experiment_pheno.loc[variety,trait]
                
                ## extract cluster category and gene cov value
                genotype_cluster = df_gene_cluster.loc[variety,gene]
                genotype_value = gene_matrix_match_ID.loc[variety,gene]
                
                ## output all values
                list_of_stats = [experiment,
                                 trait,
                                 pheno_count_with_nan,
                                 pheno_count_missing,
                                 variety,
                                 phenotype_value,
                                 gene,
                                 genotype_cluster,
                                 genotype_value,
                                 num_low,
                                 num_high,
                                 num_mid
                                ]
                array_2D.append(list_of_stats)
df_stats = pd.DataFrame(array_2D, columns=stats_list)
df_stats.to_csv("all_experiments.csv")

2014_18Hrs_SoP
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2014_NtL_SoP
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2015_TOP1_nan
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2015_TOP2_nan
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2015_TOP3_nan
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2015_nan_ESP
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2015_nan_GER
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2015_nan_KAT
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2016_18Hrs_SoP
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2016_1NI_MER
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2016_2Ir_MER
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
PlntNo
2016_NatL_SoP
ZS49PlHt
ZS49
HrvPlHt
ZS91
PPD
RN
GrYld
PrdGrYld
SEGrYld
TlrNo
Pln