In [28]:
import pandas as pd

# Load the CSV file to check its contents
file_path = 'taxa_abund.csv'
taxa_data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
taxa_data.head()

# Extract the first row to check time points associated with each sample
time_points = taxa_data.iloc[0, 1:]

# Extract only the OTU data (excluding the time point row and sample identifier column)
otu_data = taxa_data.iloc[1:, 1:]

# Convert the OTU data to numeric (in case there are non-numeric entries)
otu_data = otu_data.apply(pd.to_numeric, errors='coerce')

# Add the time points as a row to the OTU data
otu_data.columns = time_points

# Let's check the unique time points in the dataset
unique_time_points = time_points.unique()
unique_time_points

# Create a dictionary to hold the OTU counts for each time point
otu_counts_per_time_point = {'P11': [], 'P14': [], 'P28': []}

# Iterate over each column and its corresponding time point
for col, time_point in zip(otu_data.columns, time_points):
    # Count the non-zero OTUs in the column and add it to the respective time point list
    otu_count = (otu_data[col] > 0).sum()
    # print(otu_count)
    otu_counts_per_time_point[time_point].append(otu_count)
    # print(otu_data[col],":",time_point)
# print(otu_counts_per_time_point)
# Calculate the average number of OTUs for each time point
average_otus_final = {time_point: sum(otu_counts) / len(otu_counts) 
                      for time_point, otu_counts in otu_counts_per_time_point.items()}

# Calculate the average number of OTUs for each time point
average_otus_final = {
    'P11': sum(otu_counts_per_time_point['P11']) / len(otu_counts_per_time_point['P11']),
    'P14': sum(otu_counts_per_time_point['P14']) / len(otu_counts_per_time_point['P14']),
    'P28': sum(otu_counts_per_time_point['P28']) / len(otu_counts_per_time_point['P28'])
}

# Compute the mean OTUs for P11
mean_p11 = sum(average_otus_final['P11']) / len(average_otus_final['P11'])

# Display the result
print("Mean OTUs for P11:", mean_p11)

# Compute the mean OTUs for P11
mean_p14 = sum(average_otus_final['P14']) / len(average_otus_final['P14'])

# Display the result
print("Mean OTUs for P14:", mean_p14)

# Compute the mean OTUs for P11
mean_p28 = sum(average_otus_final['P28']) / len(average_otus_final['P28'])

# Display the result
print("Mean OTUs for P28:", mean_p28)

Mean OTUs for P11: 29.533333333333335
Mean OTUs for P14: 28.833333333333332
Mean OTUs for P28: 18.0


In [27]:
import pandas as pd

# Load the CSV file to check its contents
file_path = 'taxa_abund_colon.csv'
taxa_data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
taxa_data.head()

# Extract the first row to check time points associated with each sample
time_points = taxa_data.iloc[0, 1:]

# Extract only the OTU data (excluding the time point row and sample identifier column)
otu_data = taxa_data.iloc[1:, 1:]

# Convert the OTU data to numeric (in case there are non-numeric entries)
otu_data = otu_data.apply(pd.to_numeric, errors='coerce')

# Add the time points as a row to the OTU data
otu_data.columns = time_points

# Let's check the unique time points in the dataset
unique_time_points = time_points.unique()
unique_time_points

# Create a dictionary to hold the OTU counts for each time point
otu_counts_per_time_point = {'P8':[],'P11': [], 'P14': [], 'P28': []}

# Iterate over each column and its corresponding time point
for col, time_point in zip(otu_data.columns, time_points):
    # Count the non-zero OTUs in the column and add it to the respective time point list
    otu_count = (otu_data[col] > 0).sum()
    # print(otu_count)
    otu_counts_per_time_point[time_point].append(otu_count)
    # print(otu_data[col],":",time_point)
# print(otu_counts_per_time_point)
# Calculate the average number of OTUs for each time point
average_otus_final = {time_point: sum(otu_counts) / len(otu_counts) 
                      for time_point, otu_counts in otu_counts_per_time_point.items()}

# Display the results
average_otus_final

# Compute the mean OTUs for P11
mean_p8 = sum(average_otus_final['P8']) / len(average_otus_final['P8'])

# Display the result
print("Mean OTUs for P8:", mean_p8)

mean_p11 = sum(average_otus_final['P11']) / len(average_otus_final['P11'])

# Display the result
print("Mean OTUs for P11:", mean_p11)

# Compute the mean OTUs for P11
mean_p14 = sum(average_otus_final['P14']) / len(average_otus_final['P14'])

# Display the result
print("Mean OTUs for P14:", mean_p14)

# Compute the mean OTUs for P11
mean_p28 = sum(average_otus_final['P28']) / len(average_otus_final['P28'])

# Display the result
print("Mean OTUs for P28:", mean_p28)

Mean OTUs for P8: 16.666666666666668
Mean OTUs for P11: 15.533333333333333
Mean OTUs for P14: 26.25
Mean OTUs for P28: 33.666666666666664
