In [4]:
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

In [10]:
# pip install -U tqdm

Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
     ---------------------------------------- 78.5/78.5 kB 2.2 MB/s eta 0:00:00
Installing collected packages: tqdm
  Attempting uninstall: tqdm
    Found existing installation: tqdm 4.50.2
    Uninstalling tqdm-4.50.2:
      Successfully uninstalled tqdm-4.50.2
Successfully installed tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import pandas as pd
import numpy as np
import random
import os
from tqdm.auto import tqdm # for progress bar


In [2]:
# Rename Raw Sample Files    
# samplePath = "./GEO Samples/GSE106817/Samples/GSE106817_RAW"
# sampleFiles = os.listdir(samplePath)
# l = len(sampleFiles)
# for i in range(l): 
#     fileName = sampleFiles[i][:10]
#     oldName = samplePath +"/"+ sampleFiles[i]
  
#     sampleFiles[i] = sampleFiles[:10]
#     os.rename(oldName, samplePath+"/"+fileName+".txt")
       
    
#     # print progress bar
#     printProgressBar(i + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 50)   


In [21]:
# Create base Dataframe to merge all sample files in it


samplePath = "./GEO Samples/GSE106817/Samples/GSE106817_RAW"
sampleFiles = os.listdir(samplePath)


file_name = samplePath+"/"+sampleFiles[0]
file_arr = []
with open(file_name, 'r', encoding='ISO-8859-1') as file:
        i = -1
        for line in file:
            i += 1
            if i > 8:
                a = line.strip().split('\t')                    
                if a[5].startswith("MIMA"):
                    file_arr.append([a[5], a[6]])



arr = np.array(file_arr)    
df = pd.DataFrame(arr)
df = df.transpose()

# New column data
new_column = ["ID", sampleFiles[0][:10]]

# Insert the new column at the beginning (index 0)
df.insert(loc=0, column='ID', value=new_column)

new_header = df.iloc[0] #grab the first row for the header
main_df = df[2:] #take the data less the header row
main_df.columns = new_header #set the header row as the df header



print(main_df)



Empty DataFrame
Columns: [ID, MIMAT0004501, MIMAT0002844, MIMAT0002843, MIMAT0002824, MIMAT0002823, MIMAT0002807, MIMAT0002806, MIMAT0001635, MIMAT0001631, MIMAT0005890, MIMAT0005889, MIMAT0005873, MIMAT0005872, MIMAT0004591, MIMAT0004590, MIMAT0004562, MIMAT0004518, MIMAT0004517, MIMAT0016907, MIMAT0016901, MIMAT0014994, MIMAT0016913, MIMAT0011777, MIMAT0011775, MIMAT0005924, MIMAT0005923, MIMAT0005906, MIMAT0005905, MIMAT0018939, MIMAT0019949, MIMAT0019764, MIMAT0019200, MIMAT0019837, MIMAT0019204, MIMAT0019811, MIMAT0019358, MIMAT0015068, MIMAT0015066, MIMAT0019017, MIMAT0019064, MIMAT0021125, MIMAT0021124, MIMAT0021079, MIMAT0021046, MIMAT0019037, MIMAT0019981, MIMAT0019982, MIMAT0019980, MIMAT0019825, MIMAT0019725, MIMAT0026555, MIMAT0026554, MIMAT0026472, MIMAT0022700, MIMAT0025846, MIMAT0025845, MIMAT0022282, MIMAT0022281, MIMAT0022266, MIMAT0022265, MIMAT0027560, MIMAT0027559, MIMAT0027544, MIMAT0027543, MIMAT0027528, MIMAT0027527, MIMAT0027512, MIMAT0027511, MIMAT0026617, MIMA

In [23]:
# Create a df for each file and append new sample file data to the main Dataframe  
l = len(sampleFiles)
for i in range(l):    
    file_name = sampleFiles[i]
    file_path = samplePath+"/"+file_name
    
    file_arr = []
    with open(file_path, 'r', encoding='ISO-8859-1') as file:
            i = -1
            for line in file:
                i += 1
                if i > 8:
                    a = line.strip().split('\t')                    
                    if a[5].startswith("MIMA"):
                        file_arr.append([a[5], a[6]])



    arr = np.array(file_arr)    
    df = pd.DataFrame(arr, columns=['ID', 'SpotMedian'])
    df = df.transpose()
    
    # New column data
    new_column = ["ID", file_name[:10] ]
    

    # Insert the new column at the beginning (index 0)
    df.insert(loc=0, column='ID', value=new_column)
    
    new_header = df.iloc[0] #grab the first row for the header
    new_df = df[1:] #take the data less the header row
    new_df.columns = new_header #set the header row as the df header
    
    # Append new sample file data to main Dataframe
    main_df = main_df.append(new_df, ignore_index=True)
    
    # print progress bar
#     printProgressBar(i + 1, l, prefix = 'Progress:', suffix = 'Complete', length = 70)   


In [26]:
filepath = "./GEO Samples/GSE106817/Samples/GSE106817_RAW/GSE106817_Raw_Sample_files.csv"
main_df.to_csv(filepath, index=False)

In [24]:
main_df

ID,ID.1,MIMAT0004501,MIMAT0002844,MIMAT0002843,MIMAT0002824,MIMAT0002823,MIMAT0002807,MIMAT0002806,MIMAT0001635,MIMAT0001631,...,MIMAT0022965,MIMAT0022948,MIMAT0027678,MIMAT0027677,MIMAT0027662,MIMAT0027661,MIMAT0027646,MIMAT0027645,MIMAT0027630,MIMAT0027629
0,GSM2850709,73.666741,72.111405,71.341913,75.194389,70.870683,72.029542,70.795544,70.153202,74.788287,...,67.399068,73.617964,75.941798,66.573414,69.513890,72.661265,69.182798,72.057357,72.742872,69.833113
1,GSM2850710,70.683921,74.243225,76.891432,75.592536,73.159304,74.399045,77.070311,74.968704,81.882352,...,67.822738,71.524435,71.444863,70.872130,68.868805,79.387575,70.712644,72.604690,73.222807,71.760760
2,GSM2850711,70.815732,69.506095,71.075206,75.207255,71.427502,73.715987,70.388913,70.428831,105.297610,...,67.395087,73.119943,74.569598,67.270232,72.201108,72.469498,68.327205,71.467505,70.433565,76.053390
3,GSM2850712,77.400533,77.375505,79.929305,84.612952,79.089451,78.010816,76.682754,75.165586,93.018896,...,75.539965,83.441202,81.890179,75.251411,77.848652,77.750365,77.528941,82.008421,81.057614,78.954433
4,GSM2850713,81.754654,81.014768,85.754313,89.885890,78.232733,81.896997,78.948887,81.799711,101.522312,...,77.301863,91.620314,89.056055,77.770712,76.650729,84.824454,76.256971,83.572199,86.654459,85.623393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4041,GSM2854750,68.165653,76.438012,69.763515,83.423606,71.823781,78.551299,71.302485,69.595249,73.379120,...,66.899756,76.107287,100.439793,69.765538,73.999936,86.242170,69.893890,81.487061,83.515956,70.265179
4042,GSM2854751,66.439675,68.438569,66.390121,78.855693,68.676941,73.217761,68.189975,68.411612,71.108768,...,64.828639,71.473479,92.221666,64.662081,68.975581,77.441213,68.818140,79.015128,74.671355,69.446798
4043,GSM2854752,68.734171,71.477456,72.761171,90.299150,69.783158,74.837632,71.256230,68.020635,68.305375,...,68.466628,83.446271,122.070894,67.261573,70.684709,90.028699,68.846277,82.703459,92.025870,72.173795
4044,GSM2854753,71.885898,84.208434,76.653755,92.902488,75.855554,86.572369,73.805025,76.478828,134.689123,...,69.655233,98.322161,140.531747,73.738100,85.418333,114.831968,76.294256,92.729231,111.937078,81.112643
