In [2]:
# !pip install pandas
# !pip install statsmodels

In [3]:
import pandas as pd
import os 
from scipy.stats import spearmanr, pearsonr
from statsmodels.stats.multitest import multipletests
from scipy import stats

path = "/home/user/Desktop/윤인수/data/"
tax_data = pd.read_csv(path+"/PC_normalized_PC92_HC384_130 genus_ML.csv")
path_data = pd.read_csv(path+"/PC Pathway.csv")
ortho_data = pd.read_csv(path+'/PC Orthology.csv')
mapping_df = pd.read_excel(path+ "/pathway_Orthology_map.xlsx")
tax_data
print(path_data.shape)
print(tax_data.shape)
print(ortho_data.shape)
tax_data.set_index(tax_data['study_no'] ,inplace=True)  # Sets the first column as the index by name
path_data.set_index(path_data['study_no'], inplace=True)
up_genus = ['Desulfovibrio', 'Fretibacterium', 'Lactobacillus', 'Leuconostoc', 
                      'Olsenella', 'Parvimonas', 'Ralstonia']
down_genus = ['Pseudomonas', 'Simonsiella']
# Extract the specified columns
taxonomy_up = tax_data[up_genus]
taxonomy_down = tax_data[down_genus]
# Based on statistical analysis from pathway bring out upregulated pathway in Cancer group and downregulated pathway in Cancer group
# Correcting the formatting of down_path and up_path lists
down_path = [
    'ko00472', 'ko05231', 'ko00430', 'ko00984', 'ko04150',
    'ko03320', 'ko00333', 'ko00471', 'ko04750', 'ko00830',
    'ko00944', 'ko00404', 'ko04330', 'ko00565', 'ko04975'
]

up_path = [
    'ko04024', 'ko04977', 'ko00601', 'ko04530', 'ko04151',
    'ko04141', 'ko01053', 'ko00220', 'ko04120'
]

# Filtering the path_data DataFrame for the upregulated and downregulated pathways
up_path_data = path_data[up_path]
down_path_data = path_data[down_path]
print(up_path_data.shape, down_path_data.shape)

# Function to apply correlation and adjust p-values
def calculate_correlation(taxonomy_data, pathway_data, selected_genus, up_path, down_path, alpha=0.05):
    # Filter the taxonomy_data for the specified genus columns
    taxonomy_selected = taxonomy_data[selected_genus]

    # Filter the pathway data for upregulated and downregulated pathways
    up_path_data = pathway_data[up_path]
    down_path_data = pathway_data[down_path]

    # Initialize lists to store results for upregulated and downregulated pathways
    up_results = []
    down_results = []

    # Function to calculate correlation for a given pathway dataset (up or downregulated)
    def compute_correlation(taxonomy_selected, path_data, results_list, regulation_type):
        for tax_col in taxonomy_selected.columns:
            for path_col in path_data.columns:
                # Check normality for both taxonomy and pathway columns using Shapiro-Wilk test
                tax_normality = stats.shapiro(taxonomy_selected[tax_col]).pvalue > alpha
                path_normality = stats.shapiro(path_data[path_col]).pvalue > alpha

                # Apply Pearson if both columns are normally distributed, otherwise apply Spearman
                if tax_normality and path_normality:
                    corr, p_value = stats.pearsonr(taxonomy_selected[tax_col], path_data[path_col])
                    method = "Pearson"
                else:
                    corr, p_value = stats.spearmanr(taxonomy_selected[tax_col], path_data[path_col])
                    method = "Spearman"

                # Adjust the p-value using Benjamini-Hochberg (FDR)
                _, p_adj, _, _ = multipletests([p_value], method='fdr_bh')

                # Append the result to the corresponding list (taxonomy column, pathway column, correlation, p-value, adjusted p-value, method, regulation type)
                results_list.append((tax_col, path_col, corr, p_value, p_adj[0], method, regulation_type))

    # Compute correlations for upregulated pathways
    compute_correlation(taxonomy_selected, up_path_data, up_results, 'Upregulated')

    # Compute correlations for downregulated pathways
    compute_correlation(taxonomy_selected, down_path_data, down_results, 'Downregulated')

    # Combine both results into a single DataFrame
    all_results = up_results + down_results
    correlation_df = pd.DataFrame(all_results, columns=["Taxonomy", "Pathway", "Pathway_Correlation", "Pathway_p_value", "Pathway_p_value_adjusted", "Pathway_Correlation_Method", "Pathway_Regulation_Type"])

    # Filter for significant correlations after p-value adjustment
    significant_corr_df = correlation_df[(correlation_df['Pathway_p_value_adjusted'] < alpha) & (abs(correlation_df['Pathway_Correlation']) > 0.2)]

    return significant_corr_df
# Genus lists for upregulated and downregulated groups
up_genus = ['Desulfovibrio', 'Fretibacterium', 'Lactobacillus', 'Leuconostoc', 
                      'Olsenella', 'Parvimonas', 'Ralstonia']
down_genus = ['Pseudomonas', 'Simonsiella']

# Pathways that are downregulated and upregulated based on statistical analysis
down_path = [
    'ko00472', 'ko05231', 'ko00430', 'ko00984', 'ko04150',
    'ko03320', 'ko00333', 'ko00471', 'ko04750', 'ko00830',
    'ko00944', 'ko00404', 'ko04330', 'ko00565', 'ko04975'
]

up_path = [
    'ko04024', 'ko04977', 'ko00601', 'ko04530', 'ko04151',
    'ko04141', 'ko01053', 'ko00220', 'ko04120'
]



# Assuming path_data and taxonomy_data are already loaded as DataFrames
# Call the function to calculate correlations for both upregulated and downregulated pathways
up_regulated_genus = calculate_correlation(tax_data, path_data,up_genus, up_path, down_path)
down_regulated_genus = calculate_correlation(tax_data, path_data, down_genus, up_path, down_path)
# Display the significant correlations
print("Significant Correlations:")
up_regulated_genus
pathway_label = pd.read_csv(path+"pathway_label_list.csv")
pathway_label
import pandas as pd

# Assuming you have two DataFrames:
# 1. `correlation_df`: The correlation results DataFrame (your provided correlation results)
# 2. `kegg_df`: The KEGG pathway dataset with `KEGG_no` and `pathway_kegg_no`

# Example DataFrames (replace with your actual data)
# correlation_df = pd.read_csv('correlation_results.csv')  # Load your correlation results if from CSV

# Perform the merge operation to add pathway labels to your correlation results
up_merged_df = pd.merge(up_regulated_genus, pathway_label, left_on='Pathway', right_on='KEGG_no', how='left')

# Drop the now redundant 'KEGG_no' column
up_merged_df.drop(columns=['KEGG_no'], inplace=True)

# Optionally rearrange columns for readability
up_merged_df = up_merged_df[["Taxonomy", "Pathway", "Pathway_Correlation", "Pathway_p_value", "Pathway_p_value_adjusted", "Pathway_Correlation_Method", "Pathway_Regulation_Type"]]

down_merged_df = pd.merge(down_regulated_genus, pathway_label, left_on='Pathway', right_on='KEGG_no', how='left')

# Drop the now redundant 'KEGG_no' column
down_merged_df.drop(columns=['KEGG_no'], inplace=True)

# Optionally rearrange columns for readability
down_merged_df = down_merged_df[["Taxonomy", "Pathway", "Pathway_Correlation", "Pathway_p_value", "Pathway_p_value_adjusted", "Pathway_Correlation_Method", "Pathway_Regulation_Type"]]

# Genus lists for upregulated and downregulated groups
up_genus = ['Desulfovibrio', 'Fretibacterium', 'Lactobacillus', 'Leuconostoc', 
                      'Olsenella', 'Parvimonas', 'Ralstonia']
down_genus = ['Pseudomonas', 'Simonsiella']

# Pathways that are downregulated and upregulated based on statistical analysis
down_path = [
    'ko00472', 'ko05231', 'ko00430', 'ko00984', 'ko04150',
    'ko03320', 'ko00333', 'ko00471', 'ko04750', 'ko00830',
    'ko00944', 'ko00404', 'ko04330', 'ko00565', 'ko04975'
]

up_path = [
    'ko04024', 'ko04977', 'ko00601', 'ko04530', 'ko04151',
    'ko04141', 'ko01053', 'ko00220', 'ko04120'
]



# Assuming path_data and taxonomy_data are already loaded as DataFrames
# Call the function to calculate correlations for both upregulated and downregulated pathways
up_regulated_genus = calculate_correlation(tax_data, path_data,up_genus, up_path, down_path)
down_regulated_genus = calculate_correlation(tax_data, path_data, down_genus, up_path, down_path)
# Display the significant correlations
print("Significant Correlations:")
up_regulated_genus
dir_cur = os.getcwd()
file_path_output = os.path.join(dir_cur, "analysis_output")
file_path_input = os.path.join(dir_cur,'data')

df = pd.read_excel(file_path_output+ "/statistical_analysis_results_with_labels_Orthology.xlsx", index_col=0)


(476, 449)
(476, 133)
(476, 14875)


In [4]:
tax_data.set_index(tax_data['study_no'] ,inplace=True)  # Sets the first column as the index by name
path_data.set_index(path_data['study_no'], inplace=True)
up_genus = ['Desulfovibrio', 'Fretibacterium', 'Lactobacillus', 'Leuconostoc', 
                      'Olsenella', 'Parvimonas', 'Ralstonia']
down_genus = ['Pseudomonas', 'Simonsiella']
# Extract the specified columns
taxonomy_up = tax_data[up_genus]
taxonomy_down = tax_data[down_genus]

In [5]:
path_data

Unnamed: 0_level_0,study_no,group_1,group_2,ko04612,ko04926,ko00623,ko04320,ko04730,ko05169,ko05321,...,ko04666,ko03018,ko01051,ko01062,ko00404,ko00680,ko04970,ko04016,ko00515,ko01058
study_no,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
H1700001,H1700001,Cancer,1,0.000365,0.000501,0.000299,0.000326,0.000107,0.001087,0.000077,...,0.000469,0.005032,0.000591,0.000014,0.000008,0.006161,0.000061,0.000654,0.000105,0
H1700002,H1700002,Cancer,1,0.000235,0.000381,0.000185,0.000262,0.000058,0.000622,0.000068,...,0.000526,0.005626,0.000609,0.000052,0.000030,0.005737,0.000040,0.000762,0.000046,0
H1700003,H1700003,Cancer,1,0.000170,0.000484,0.000236,0.000365,0.000091,0.001277,0.000089,...,0.000781,0.004941,0.000554,0.000212,0.000105,0.005663,0.000114,0.000902,0.000054,0
H1700004,H1700004,Cancer,1,0.000256,0.000312,0.000145,0.000167,0.000048,0.000480,0.000039,...,0.000418,0.005629,0.000728,0.000002,0.000002,0.005907,0.000045,0.000726,0.000102,0
H1700005,H1700005,Cancer,1,0.000347,0.000312,0.000196,0.000165,0.000048,0.000466,0.000051,...,0.000492,0.005909,0.000633,0.000019,0.000011,0.005975,0.000040,0.000538,0.000055,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
H1900931,H1900931,Control,0,0.000363,0.000098,0.001070,0.000108,0.000384,0.000513,0.000385,...,0.000101,0.000050,0.006157,0.000075,0.000717,0.000045,0.000000,0.000000,0.000000,0
H1900932,H1900932,Control,0,0.000461,0.000122,0.001634,0.000095,0.000511,0.000681,0.000550,...,0.000192,0.000097,0.005875,0.000092,0.000796,0.000051,0.000000,0.000000,0.000000,0
H1900933,H1900933,Control,0,0.000426,0.000110,0.001294,0.000107,0.000419,0.000545,0.000416,...,0.000102,0.000052,0.005999,0.000067,0.000740,0.000052,0.000000,0.000000,0.000000,0
H1900936,H1900936,Control,0,0.000206,0.000047,0.000507,0.000032,0.000201,0.000218,0.000182,...,0.000061,0.000034,0.005564,0.000052,0.000865,0.000053,0.000000,0.000000,0.000000,0


## Check what correlation analysis is correct for this dataset

In [6]:
# Based on statistical analysis from pathway bring out upregulated pathway in Cancer group and downregulated pathway in Cancer group
# Correcting the formatting of down_path and up_path lists
down_path = [
    'ko00472', 'ko05231', 'ko00430', 'ko00984', 'ko04150',
    'ko03320', 'ko00333', 'ko00471', 'ko04750', 'ko00830',
    'ko00944', 'ko00404', 'ko04330', 'ko00565', 'ko04975'
]

up_path = [
    'ko04024', 'ko04977', 'ko00601', 'ko04530', 'ko04151',
    'ko04141', 'ko01053', 'ko00220', 'ko04120'
]

# Filtering the path_data DataFrame for the upregulated and downregulated pathways
up_path_data = path_data[up_path]
down_path_data = path_data[down_path]
print(up_path_data.shape, down_path_data.shape)

(476, 9) (476, 15)


In [7]:

# Function to apply correlation and adjust p-values
def calculate_correlation(taxonomy_data, pathway_data, selected_genus, up_path, down_path, alpha=0.05):
    # Filter the taxonomy_data for the specified genus columns
    taxonomy_selected = taxonomy_data[selected_genus]

    # Filter the pathway data for upregulated and downregulated pathways
    up_path_data = pathway_data[up_path]
    down_path_data = pathway_data[down_path]

    # Initialize lists to store results for upregulated and downregulated pathways
    up_results = []
    down_results = []

    # Function to calculate correlation for a given pathway dataset (up or downregulated)
    def compute_correlation(taxonomy_selected, path_data, results_list, regulation_type):
        for tax_col in taxonomy_selected.columns:
            for path_col in path_data.columns:
                # Check normality for both taxonomy and pathway columns using Shapiro-Wilk test
                tax_normality = stats.shapiro(taxonomy_selected[tax_col]).pvalue > alpha
                path_normality = stats.shapiro(path_data[path_col]).pvalue > alpha

                # Apply Pearson if both columns are normally distributed, otherwise apply Spearman
                if tax_normality and path_normality:
                    corr, p_value = stats.pearsonr(taxonomy_selected[tax_col], path_data[path_col])
                    method = "Pearson"
                else:
                    corr, p_value = stats.spearmanr(taxonomy_selected[tax_col], path_data[path_col])
                    method = "Spearman"

                # Adjust the p-value using Benjamini-Hochberg (FDR)
                _, p_adj, _, _ = multipletests([p_value], method='fdr_bh')

                # Append the result to the corresponding list (taxonomy column, pathway column, correlation, p-value, adjusted p-value, method, regulation type)
                results_list.append((tax_col, path_col, corr, p_value, p_adj[0], method, regulation_type))

    # Compute correlations for upregulated pathways
    compute_correlation(taxonomy_selected, up_path_data, up_results, 'Upregulated')

    # Compute correlations for downregulated pathways
    compute_correlation(taxonomy_selected, down_path_data, down_results, 'Downregulated')

    # Combine both results into a single DataFrame
    all_results = up_results + down_results
    correlation_df = pd.DataFrame(all_results, columns=["Taxonomy", "Pathway", "Pathway_Correlation", "Pathway_p_value", "Pathway_p_value_adjusted", "Pathway_Correlation_Method", "Pathway_Regulation_Type"])

    # Filter for significant correlations after p-value adjustment
    significant_corr_df = correlation_df[(correlation_df['Pathway_p_value_adjusted'] < alpha) & (abs(correlation_df['Pathway_Correlation']) > 0.2)]

    return significant_corr_df

In [8]:
mapping_df

Unnamed: 0,Pathway_ID,Orthology_kegg_no
0,ko04024,K05264
1,ko04024,K05256
2,ko04024,K08522
3,ko04024,K08521
4,ko04024,K25483
...,...,...
1547,ko00830,K07423
1548,ko00830,K07425
1549,ko00830,K00699
1550,ko00830,K09516


In [9]:
# Genus lists for upregulated and downregulated groups
up_genus = ['Desulfovibrio', 'Fretibacterium', 'Lactobacillus', 'Leuconostoc', 
                      'Olsenella', 'Parvimonas', 'Ralstonia']
down_genus = ['Pseudomonas', 'Simonsiella']

# Pathways that are downregulated and upregulated based on statistical analysis
down_path = [
    'ko00472', 'ko05231', 'ko00430', 'ko00984', 'ko04150',
    'ko03320', 'ko00333', 'ko00471', 'ko04750', 'ko00830',
    'ko00944', 'ko00404', 'ko04330', 'ko00565', 'ko04975'
]

up_path = [
    'ko04024', 'ko04977', 'ko00601', 'ko04530', 'ko04151',
    'ko04141', 'ko01053', 'ko00220', 'ko04120'
]



# Assuming path_data and taxonomy_data are already loaded as DataFrames
# Call the function to calculate correlations for both upregulated and downregulated pathways
up_regulated_genus = calculate_correlation(tax_data, path_data,up_genus, up_path, down_path)
down_regulated_genus = calculate_correlation(tax_data, path_data, down_genus, up_path, down_path)
# Display the significant correlations
print("Significant Correlations:")
up_regulated_genus


Significant Correlations:


Unnamed: 0,Taxonomy,Pathway,Pathway_Correlation,Pathway_p_value,Pathway_p_value_adjusted,Pathway_Correlation_Method,Pathway_Regulation_Type
0,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated
3,Desulfovibrio,ko04530,0.225278,6.833549e-07,6.833549e-07,Spearman,Upregulated
4,Desulfovibrio,ko04151,0.213411,2.626457e-06,2.626457e-06,Spearman,Upregulated
5,Desulfovibrio,ko04141,0.201499,9.416391e-06,9.416391e-06,Spearman,Upregulated
9,Fretibacterium,ko04024,0.393988,3.990191e-19,3.990191e-19,Spearman,Upregulated
...,...,...,...,...,...,...,...
151,Parvimonas,ko00565,-0.205362,6.274448e-06,6.274448e-06,Spearman,Downregulated
154,Ralstonia,ko05231,-0.213802,2.515373e-06,2.515373e-06,Spearman,Downregulated
155,Ralstonia,ko00430,-0.218634,1.465541e-06,1.465541e-06,Spearman,Downregulated
160,Ralstonia,ko00471,-0.209544,4.007899e-06,4.007899e-06,Spearman,Downregulated


In [10]:
pathway_label = pd.read_csv(path+"pathway_label_list.csv")
pathway_label

Unnamed: 0,KEGG_no,pathway_kegg_no
0,ko00010,Glycolysis / Gluconeogenesis
1,ko00020,Citrate cycle (TCA cycle)
2,ko00030,Pentose phosphate pathway
3,ko00040,Pentose and glucuronate interconversions
4,ko00051,Fructose and mannose metabolism
...,...,...
441,ko05410,Hypertrophic cardiomyopathy (HCM)
442,ko05412,Arrhythmogenic right ventricular cardiomyopath...
443,ko05414,Dilated cardiomyopathy (DCM)
444,ko05416,Viral myocarditis


In [11]:
import pandas as pd

# Assuming you have two DataFrames:
# 1. `correlation_df`: The correlation results DataFrame (your provided correlation results)
# 2. `kegg_df`: The KEGG pathway dataset with `KEGG_no` and `pathway_kegg_no`

# Example DataFrames (replace with your actual data)
# correlation_df = pd.read_csv('correlation_results.csv')  # Load your correlation results if from CSV

# Perform the merge operation to add pathway labels to your correlation results
up_merged_df = pd.merge(up_regulated_genus, pathway_label, left_on='Pathway', right_on='KEGG_no', how='left')

# Drop the now redundant 'KEGG_no' column
up_merged_df.drop(columns=['KEGG_no'], inplace=True)

# Optionally rearrange columns for readability
up_merged_df = up_merged_df[["Taxonomy", "Pathway", "Pathway_Correlation", "Pathway_p_value", "Pathway_p_value_adjusted", "Pathway_Correlation_Method", "Pathway_Regulation_Type"]]

down_merged_df = pd.merge(down_regulated_genus, pathway_label, left_on='Pathway', right_on='KEGG_no', how='left')

# Drop the now redundant 'KEGG_no' column
down_merged_df.drop(columns=['KEGG_no'], inplace=True)

# Optionally rearrange columns for readability
down_merged_df = down_merged_df[["Taxonomy", "Pathway", "Pathway_Correlation", "Pathway_p_value", "Pathway_p_value_adjusted", "Pathway_Correlation_Method", "Pathway_Regulation_Type"]]

In [12]:
# Display the merged DataFrame
up_merged_df.reset_index(inplace=True, drop=True)
down_merged_df.reset_index(inplace=True, drop=True)

In [14]:
up_merged_df

Unnamed: 0,Taxonomy,Pathway,Pathway_Correlation,Pathway_p_value,Pathway_p_value_adjusted,Pathway_Correlation_Method,Pathway_Regulation_Type
0,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated
1,Desulfovibrio,ko04530,0.225278,6.833549e-07,6.833549e-07,Spearman,Upregulated
2,Desulfovibrio,ko04151,0.213411,2.626457e-06,2.626457e-06,Spearman,Upregulated
3,Desulfovibrio,ko04141,0.201499,9.416391e-06,9.416391e-06,Spearman,Upregulated
4,Fretibacterium,ko04024,0.393988,3.990191e-19,3.990191e-19,Spearman,Upregulated
...,...,...,...,...,...,...,...
80,Parvimonas,ko00565,-0.205362,6.274448e-06,6.274448e-06,Spearman,Downregulated
81,Ralstonia,ko05231,-0.213802,2.515373e-06,2.515373e-06,Spearman,Downregulated
82,Ralstonia,ko00430,-0.218634,1.465541e-06,1.465541e-06,Spearman,Downregulated
83,Ralstonia,ko00471,-0.209544,4.007899e-06,4.007899e-06,Spearman,Downregulated


In [15]:
up_result = pd.merge(up_merged_df, mapping_df, left_on = "Pathway", right_on = "Pathway_ID",how = 'left').drop(columns=['Pathway_ID'])  # Drop redundant Pathway_ID column
down_result = pd.merge(down_merged_df, mapping_df, left_on = "Pathway", right_on = "Pathway_ID",how = 'left').drop(columns=['Pathway_ID'])  # Drop redundant Pathway_ID column


In [16]:
up_result

Unnamed: 0,Taxonomy,Pathway,Pathway_Correlation,Pathway_p_value,Pathway_p_value_adjusted,Pathway_Correlation_Method,Pathway_Regulation_Type,Orthology_kegg_no
0,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated,K05264
1,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated,K05256
2,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated,K08522
3,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated,K08521
4,Desulfovibrio,ko04024,0.226677,5.801431e-07,5.801431e-07,Spearman,Upregulated,K25483
...,...,...,...,...,...,...,...,...
6228,Ralstonia,ko00830,-0.231702,3.195036e-07,3.195036e-07,Spearman,Downregulated,K07423
6229,Ralstonia,ko00830,-0.231702,3.195036e-07,3.195036e-07,Spearman,Downregulated,K07425
6230,Ralstonia,ko00830,-0.231702,3.195036e-07,3.195036e-07,Spearman,Downregulated,K00699
6231,Ralstonia,ko00830,-0.231702,3.195036e-07,3.195036e-07,Spearman,Downregulated,K09516


In [17]:
down_result

Unnamed: 0,Taxonomy,Pathway,Pathway_Correlation,Pathway_p_value,Pathway_p_value_adjusted,Pathway_Correlation_Method,Pathway_Regulation_Type,Orthology_kegg_no
0,Pseudomonas,ko00601,-0.224977,7.077250e-07,7.077250e-07,Spearman,Upregulated,K03766
1,Pseudomonas,ko00601,-0.224977,7.077250e-07,7.077250e-07,Spearman,Upregulated,K07819
2,Pseudomonas,ko00601,-0.224977,7.077250e-07,7.077250e-07,Spearman,Upregulated,K07820
3,Pseudomonas,ko00601,-0.224977,7.077250e-07,7.077250e-07,Spearman,Upregulated,K03877
4,Pseudomonas,ko00601,-0.224977,7.077250e-07,7.077250e-07,Spearman,Upregulated,K00718
...,...,...,...,...,...,...,...,...
585,Simonsiella,ko04330,0.413342,4.560335e-21,4.560335e-21,Spearman,Downregulated,K06065
586,Simonsiella,ko04330,0.413342,4.560335e-21,4.560335e-21,Spearman,Downregulated,K06066
587,Simonsiella,ko04330,0.413342,4.560335e-21,4.560335e-21,Spearman,Downregulated,K25100
588,Simonsiella,ko04330,0.413342,4.560335e-21,4.560335e-21,Spearman,Downregulated,K06067


In [18]:
dir_cur = os.getcwd()
file_path_output = os.path.join(dir_cur, "analysis_output")
file_path_input = os.path.join(dir_cur,'data')

df = pd.read_excel(file_path_output+ "/statistical_analysis_results_with_labels_Orthology.xlsx", index_col=0)


FileNotFoundError: [Errno 2] No such file or directory: '/home/user/Desktop/윤인수/correlation/analysis_output/statistical_analysis_results_with_labels_Orthology.xlsx'