# Transpose and merge miRNA data
Run notebook to transpose and merge miRNA data in the files:
* `Tf_8_weeks.xlsx`
* `Sham_8_weeks.xlsx`
* `Tf_16_weeks.xlsx`
* `Sham_16_weeks.xlsx`

Outputs the `transposed_Tf_miRNA.xlsx` file.

In [1]:
import pandas as pd
import numpy as np

In [2]:
def transpose_df(file_name, cohort=None):
    def create_cohort_key(cohorts, mouse_numbers):
        cohort_keys = [
            c + '_' + n
            for c,n in zip(cohorts, mouse_numbers)
        ]
        return cohort_keys
        
    df = pd.read_excel(file_name) # read data

    # merge miR and accession values into name col
    # and add as first col in datafame
    names = df.miR.str.cat(' (' + df.accession + ')')
    df.insert(0, 'miR_name', names)

    # now drop miR and accession cols
    df = df.drop(columns=['miR', 'accession'])
    
    # flip/transpose df
    tx_df  = df.transpose()

    # set columns to first row
    tx_df = tx_df.rename(columns=tx_df.iloc[0]).iloc[1:] # set colums to first row
    tx_df.index.name = 'Name'
    tx_df = tx_df.reset_index()
    tx_df.Name = tx_df.Name.astype(str)

    # add mouse numbers to each cohort
    mouse_numbers = list(tx_df['Name'].map(lambda x: x.split('-')[-1]))
    tx_df.insert(0, 'mouse_number', mouse_numbers)

    # add sex info: mice 1-5 are male, rest are female
    tx_df['female'] = np.where(tx_df['mouse_number'].astype(int) < 6, 0, 1)
    
    # add name of cohort
    if cohort:
        tx_df.insert(0, 'cohort', cohort)

        # add cohort key
        cohort_keys = create_cohort_key(tx_df.cohort, tx_df.mouse_number)
        tx_df.insert(0, 'cohort_key', cohort_keys)

    return tx_df

In [3]:
file_names = ['../data/Tf_8_weeks.xlsx', '../data/Sham_8_weeks.xlsx', '../data/Tf_16_weeks.xlsx', '../data/Sham_16_weeks.xlsx']
cohort_names = ['tf_8_weeks', 'sham_8_weeks', 'tf_16_weeks', 'sham_16_weeks']

In [4]:
list(zip(file_names, cohort_names))

[('../data/Tf_8_weeks.xlsx', 'tf_8_weeks'),
 ('../data/Sham_8_weeks.xlsx', 'sham_8_weeks'),
 ('../data/Tf_16_weeks.xlsx', 'tf_16_weeks'),
 ('../data/Sham_16_weeks.xlsx', 'sham_16_weeks')]

In [5]:
# # testing
# for (file_name, cohort_name) in zip(file_names, cohort_names):
#     print(file_name, cohort_name)
#     transpose_df(file_name, cohort_name)

In [6]:
dfs = [transpose_df(file_name, cohort_name) for (file_name, cohort_name) in zip(file_names, cohort_names)]

In [7]:
final_df = pd.concat(dfs)
len(final_df)

40

In [8]:
final_df.head()

Unnamed: 0,cohort_key,cohort,mouse_number,Name,mcmv-miR-M23-1-3p (MIMAT0005543),mcmv-miR-M23-1-5p (MIMAT0005542),mcmv-miR-M23-2 (MIMAT0005545),mcmv-miR-M44-1 (MIMAT0005546),mcmv-miR-M55-1 (MIMAT0005547),mcmv-miR-M87-1 (MIMAT0005550),...,mmu-miR-883b-5p (MIMAT0004850),mmu-miR-9 (MIMAT0000142),mmu-miR-92a (MIMAT0000539),mmu-miR-92b (MIMAT0004899),mmu-miR-93 (MIMAT0000540),mmu-miR-96 (MIMAT0000541),mmu-miR-98 (MIMAT0000545),mmu-miR-99a (MIMAT0000131),mmu-miR-99b (MIMAT0000132),female
0,tf_8_weeks_1,tf_8_weeks,1,GV-8W-1,66,58,19,29,43,27,...,59,119,24,28,223,287,683,4114,279,0
1,tf_8_weeks_2,tf_8_weeks,2,GV-8W-2,82,87,24,36,65,47,...,60,185,27,14,210,221,662,4357,268,0
2,tf_8_weeks_3,tf_8_weeks,3,GV-8W-3,88,73,22,36,48,46,...,73,233,27,22,264,318,1037,4748,310,0
3,tf_8_weeks_4,tf_8_weeks,4,GV-8W-4,26,26,43,18,19,9,...,16,14,11,15,12,16,19,14,20,0
4,tf_8_weeks_5,tf_8_weeks,5,GV-8W-5,120,94,24,24,57,36,...,84,139,35,20,125,186,837,3606,248,0


In [9]:
final_df.to_excel('../data/transposed_Tf_miRNA.xlsx', index=False, engine='openpyxl')