In [1]:
import pandas as pd
import os

In [None]:
# Function to read CSV files for a specific test and bootstrap iteration
def read_csv_for_test(bootstrap_number, test_type, base_path):
    # Construct the file name based on parameters
    file_name = f'bootstrap_{bootstrap_number}_{test_type}_results.csv'
    # Construct the full path to the file
    full_path = f'{base_path}{file_name}'
    # Read the CSV into a DataFrame
    df = pd.read_csv(full_path)
    return df

# Function to get common features from all three bootstrap files for a given test
def get_common_features_individual(test_type, base_path):
    # Read all three DataFrames for the given test type
    dfs = [read_csv_for_test(i, test_type, base_path) for i in range(1, 4)]
    
    # Assuming the first column contains the feature names, create sets of features
    feature_sets = [set(df.iloc[:, 0]) for df in dfs]
    
    # Find the intersection of features across all three sets
    common_features = set.intersection(*feature_sets)
    
    # Select the common features from one of the DataFrames to create the final DataFrame
    common_df = dfs[0][dfs[0].iloc[:, 0].isin(common_features)]
    return common_df

# Function to combine common features across multiple test types into one DataFrame
def combine_common_features(base_path, files_to_combine):
    # Read features from each file and store them in a list
    feature_sets = []
    for file_name in files_to_combine:
        full_path = os.path.join(base_path, file_name)
        df = pd.read_csv(full_path)
        feature_sets.append(set(df['Feature']))  # Assuming the first column is 'Feature'
    
    # Find the intersection of all feature sets
    common_features = set.intersection(*feature_sets)
    
    # Create a new DataFrame with the common features
    common_features_df = pd.DataFrame(list(common_features), columns=['Feature'])
    return common_features_df

In [None]:
# Base paths where the CSV files are located
base_path_individual = '/home/aghasemi/CompBio481/feat_select/feat_select/feature_select_res_filter_individual/'
base_path_combined = '/home/aghasemi/CompBio481/feat_select/feat_select/feature_select_res_filter_together/'

# Test types to be processed individually
test_types = ['ttest', 'mutual_score', 'pearson_corr', 'relieff', 'variance_thres']

# Process each test type to find common features across bootstrap iterations
for test_type in test_types:
    common_features_df = get_common_features_individual(test_type, base_path_individual)
    # Save the filtered DataFrame to a new CSV file using the test_type variable in the file name
    common_features_df.to_csv(f'{base_path_combined}common_features_{test_type}_results.csv', index=False)

In [None]:
# List of files to be combined for the final intersection
files_to_combine = [
    'common_features_mutual_score_results.csv',
    'common_features_pearson_corr_results.csv',
    'common_features_relieff_results.csv',
    'common_features_variance_thres_results.csv',
    'common_features_ttest_bh_corrected_results.csv'
]

# Combine common features across multiple test types
common_features_df = combine_common_features(base_path_combined, files_to_combine)

# Save the combined DataFrame to a new CSV file
output_file = '/home/aghasemi/CompBio481/feat_select/feat_select/final_feat_select_res/combined_common_features.csv'
common_features_df.to_csv(os.path.join(base_path_combined, output_file), index=False)