In [1]:
import os, glob
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
!pip install gender-guesser
import gender_guesser.detector as gender
from functools import reduce
pd.set_option('display.max_rows', 1000)



# Aggregate all variables

## Finance

In [2]:
## dependent variables and control variables
category = 'fin'
info = pd.read_csv('fin/x900a.csv', sep=',')
info.rename(columns={'Id': 'Video_ID'}, inplace=True)
## duration
dur_df = pd.read_csv('fin/duration_freq5.csv', sep='\t')
## timing
tim_df = pd.read_csv('fin/timing_3_freq5.csv', sep='\t')
## intensity
inten_df = pd.read_csv('fin/intensity_freq5.csv', sep='\t')
## asymmetry
asym_df = pd.read_csv('fin/asymmetry_freq5.csv', sep='\t')
## beauty
beauty_df = pd.read_csv('fin/beauty_freq5.csv', sep='\t')
## video quality
qua_df = pd.read_csv('fin/video_quality.txt', sep='\t', header=None)
qua_df.columns = ['Video_ID', 'quality']

In [3]:
video_folders = ['video_1-100', 'video_101-200', 'video_201-300', 'video_301-350', 'video_351-400', 'video_401-500', \
                 'video_501-580', 'video_581-660', 'video_661-740', 'video_741-820', 'video_821-900']
video_paths = [f'{category}/{x}' for x in video_folders]
video_lookup = dict.fromkeys(video_folders)
for i, k in enumerate(video_lookup.keys()):
    video_lookup[k] = [os.path.splitext(x)[0] for x in os.listdir(f'/home/xyubl/Downloads/{k}')]
video_lookup_inv = {}
for k, v in video_lookup.items():
    for vid in v:
        video_lookup_inv[vid] = f'/home/xyubl/Downloads/{k}/{vid}.mp4'
path_df = pd.DataFrame(columns=['Video_ID', 'path'])
for k, v in tqdm(video_lookup_inv.items()):
    path_df.loc[len(path_df)] = [k, v]
path_df = path_df.astype({'Video_ID': int})

100%|██████████| 900/900 [00:01<00:00, 556.60it/s]


In [4]:
## gender estimation and manually correction: do not run
if False:
    d = gender.Detector()
    gender_df = pd.DataFrame(columns=['Name_of_Tch_1', 'gender'])
    for name in tqdm(set(info.Name_of_Tch_1.values.tolist())):
        est = d.get_gender(name.split(' ')[0])
        gender_df.loc[len(gender_df)] = [name, est]
    gender_df = pd.merge(info[['Video_ID', 'Name_of_Tch_1']], gender_df[['Name_of_Tch_1', 'gender']], on='Name_of_Tch_1')
    df = pd.merge(gender_df, path_df, on='Video_ID', how='left')
    df.to_excel('fin/gender_estimation.xlsx', index=False)
    gender_df = pd.read_excel('fin/gender.xlsx')
    gender_df.to_csv('fin/gender.csv', sep='\t', index=False)

In [5]:
## gender
gender_df = pd.read_csv('fin/gender.csv', sep='\t')

In [6]:
## video length
if not os.path.exists('fin/video_length.csv'):
    video_folders = ['video_1-100', 'video_101-200', 'video_201-300', 'video_301-350', 'video_351-400', 'video_401-500', \
                         'video_501-580', 'video_581-660', 'video_661-740', 'video_741-820', 'video_821-900']
    video_paths = [f'/home/xyubl/Downloads/{x}' for x in video_folders]
    len_df = pd.DataFrame(columns=['Video_ID', 'video_length'])
    for video_path in video_paths:
        video_files = glob.glob(f'{video_path}/*.mp4')
        for video_file in tqdm(video_files):
            video_id = int(video_file.split('/')[-1].split('.')[0])
            video = cv2.VideoCapture(video_file)
            frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
            fps = video.get(cv2.CAP_PROP_FPS) 
            video_length = frames / fps
            len_df.loc[len(len_df)] = [video_id, video_length]
    len_df = len_df.astype({'Video_ID': int})
    len_df.to_csv('fin/video_length.csv', sep='\t', index=False)
len_df = pd.read_csv('fin/video_length.csv', sep='\t')

In [7]:
## filter information
filter_df = pd.read_csv('fin/video_filter.csv', sep='\t')
use_videos = filter_df.loc[(filter_df.showup_proportion>0.2) & (filter_df.frame_land_diff>3) & (filter_df.align_land_diff>3)].Video_ID.values.tolist()

In [8]:
## aggregate all variables
dfs = [info, dur_df[['Video_ID', 'showup', 'smile_proportion']], tim_df[['Video_ID', 'beginning', 'middle', 'end']], 
       inten_df[['Video_ID', 'Intensity']], asym_df[['Video_ID', 'asymmetry']], beauty_df[['Video_ID', 'beauty']],
       qua_df, len_df, gender_df, path_df]
var_df = reduce(lambda left, right: pd.merge(left, right, on='Video_ID', how='outer'), dfs)
var_df['preview'] = (~var_df.path.isna().values).astype(int)
showup_arr = []
for _, row in var_df.iterrows():
    if row.preview == 0:
        showup_arr.append(np.nan)
    else:
        showup_arr.append(int(row.Video_ID in use_videos))
var_df.showup = showup_arr
var_df = var_df.sort_values('Video_ID').reset_index(drop=True)
var_df['male'] = [1 if x in ['male', 'unknown'] else 0 for x in var_df.gender]
var_df['description_length'] = var_df['Blurb'].apply(lambda x: len(x.split(' ')) if type(x) is str else 0)
var_df['log_description_length'] = [np.log(x) if x != 0 else 0 for x in var_df['description_length']]
var_df.to_csv('fin/variables.csv', sep='\t', index=False)

In [9]:
## preview video
print(f'Presence of preview video: {len(var_df)}')
print(f'Video with teacher face: {len(var_df)}, {np.mean(var_df.showup):.3f}, {np.std(var_df.showup):.3f}')
print(f'Video quality: {len(var_df)}, {np.mean(var_df.quality):.3f}, {np.std(var_df.quality):.3f}, {np.min(var_df.quality):.3f}, {np.max(var_df.quality):.3f}')
print(f'Video length: {len(var_df)}, {np.mean(var_df.video_length):.3f}, {np.std(var_df.video_length):.3f}, {np.min(var_df.video_length):.3f}, {np.max(var_df.video_length):.3f}')
print(f'Average smiling duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].smile_proportion):.3f}, {np.std(var_df.loc[var_df.showup==1].smile_proportion):.3f},\
        {np.min(var_df.loc[var_df.showup==1].smile_proportion):.3f}, {np.max(var_df.loc[var_df.showup==1].smile_proportion):.3f}')
print(f'Beginning duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].beginning):.3f}, {np.std(var_df.loc[var_df.showup==1].beginning):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].beginning):.3f}, {np.max(var_df.loc[var_df.showup==1].beginning):.3f}')
print(f'Middle duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].middle):.3f}, {np.std(var_df.loc[var_df.showup==1].middle):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].middle):.3f}, {np.max(var_df.loc[var_df.showup==1].middle):.3f}')
print(f'End duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].end):.3f}, {np.std(var_df.loc[var_df.showup==1].end):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].end):.3f}, {np.max(var_df.loc[var_df.showup==1].end):.3f}')
print(f'Average smiling intensity: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].Intensity):.3f}, {np.std(var_df.loc[var_df.showup==1].Intensity):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].Intensity):.3f}, {np.max(var_df.loc[var_df.showup==1].Intensity):.3f}')
print(f'Facial symmetry: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].asymmetry):.3f}, {np.std(var_df.loc[var_df.showup==1].asymmetry):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].asymmetry):.3f}, {np.max(var_df.loc[var_df.showup==1].asymmetry):.3f}')


Presence of preview video: 900
Video with teacher face: 900, 0.401, 0.490
Video quality: 900, 0.725, 0.081, 0.379, 0.930
Video length: 900, 196.325, 204.122, 10.067, 2132.708
Average smiling duration: 361, 0.103, 0.143,        0.000, 0.840
Beginning duration: 361, 0.112, 0.164,         0.000, 1.000
Middle duration: 361, 0.088, 0.138,         0.000, 0.889
End duration: 361, 0.108, 0.157,         0.000, 0.928
Average smiling intensity: 361, 0.401, 0.126,         0.113, 0.771
Facial symmetry: 361, 0.629, 0.235,         0.065, 1.326


In [10]:
## teacher characteristics
print(f'Male: {len(var_df)}, {np.nanmean(var_df.male):.3f}, {np.nanstd(var_df.male):.3f}, {np.nanmin(var_df.male):.3f}, {np.nanmax(var_df.male):.3f}')
print(f'Teacher rating: {len(var_df)}, {np.mean(var_df.Rating_of_Tch_1):.3f}, {np.std(var_df.Rating_of_Tch_1):.3f}, {np.min(var_df.Rating_of_Tch_1):.3f}, {np.max(var_df.Rating_of_Tch_1):.3f}')
print(f'Log number of students: {len(var_df)}, {np.nanmean(np.log(var_df.Students_of_Tch_1)):.3f}, {np.nanstd(np.log(var_df.Students_of_Tch_1)):.3f}, {np.nanmin(np.log(var_df.Students_of_Tch_1)):.3f}, {np.nanmax(np.log(var_df.Students_of_Tch_1)):.3f}')
print(f'Number of courses: {len(var_df)}, {np.mean(var_df.Courses_of_Tch_1):.3f}, {np.std(var_df.Courses_of_Tch_1):.3f}, {np.min(var_df.Courses_of_Tch_1):.3f}, {np.max(var_df.Courses_of_Tch_1):.3f}')


Male: 900, 0.913, 0.281, 0.000, 1.000
Teacher rating: 900, 4.318, 0.280, 2.567, 4.924
Log number of students: 900, 10.020, 1.998, 5.204, 14.745
Number of courses: 900, 23.764, 31.472, 1.000, 220.000


In [11]:
## course characteristics
print(f'Log length of course description: {len(var_df)}, {np.nanmean(var_df.log_description_length):.3f}, {np.nanstd(var_df.log_description_length):.3f}, {np.nanmin(var_df.log_description_length):.3f}, {np.nanmax(var_df.log_description_length):.3f}')
print(f'Selling price: {len(var_df)}, {np.mean(var_df.Selling_Price):.3f}, {np.std(var_df.Selling_Price):.3f}, {np.min(var_df.Selling_Price):.3f}, {np.max(var_df.Selling_Price):.3f}')
print(f'Log number of student enrollment: {len(var_df)}, {np.nanmean(np.log(var_df.Students_Num)):.3f}, {np.nanstd(np.log(var_df.Students_Num)):.3f}, {np.nanmin(np.log(var_df.Students_Num)):.3f}, {np.nanmax(np.log(var_df.Students_Num)):.3f}')
print(f'Course rating: {len(var_df)}, {np.nanmean(var_df.Rating):.3f}, {np.nanstd(var_df.Rating):.3f}, {np.nanmin(var_df.Rating):.3f}, {np.nanmax(var_df.Rating):.3f}')


Log length of course description: 900, 2.664, 0.366, 0.693, 3.258
Selling price: 900, 17.940, 15.790, 0.000, 199.990
Log number of student enrollment: 900, 8.033, 1.381, 5.204, 12.647
Course rating: 900, 4.291, 0.390, 1.862, 5.000


## Personal development

In [12]:
## dependent variables and control variables
category = 'pd'
info = pd.read_excel('pd/x897.xlsx')
info.rename(columns={'Id': 'Video_ID'}, inplace=True)
## duration
dur_df = pd.read_csv('pd/duration_freq5.csv', sep='\t')
## timing
tim_df = pd.read_csv('pd/timing_3_freq5.csv', sep='\t')
## intensity
inten_df = pd.read_csv('pd/intensity_freq5.csv', sep='\t')
## asymmetry
asym_df = pd.read_csv('pd/asymmetry_freq5.csv', sep='\t')
## beauty
beauty_df = pd.read_csv('pd/beauty_freq5.csv', sep='\t')
## video quality
qua_df = pd.read_csv('pd/video_quality.txt', sep='\t', header=None)
qua_df.columns = ['Video_ID', 'quality']

In [13]:
video_folders = ['pd_1-100', 'pd_101-200', 'pd_201-300', 'pd_301-400', 'pd_401-500',
                 'pd_501-600', 'pd_601-700', 'pd_701-800', 'pd_801-897']
video_paths = [f'{category}/{x}' for x in video_folders]
video_lookup = dict.fromkeys(video_folders)
for i, k in enumerate(video_lookup.keys()):
    video_lookup[k] = [os.path.splitext(x)[0] for x in os.listdir(f'/home/xyubl/Downloads/{k}')]
video_lookup_inv = {}
for k, v in video_lookup.items():
    for vid in v:
        video_lookup_inv[vid] = f'/home/xyubl/Downloads/{k}/{vid}.mp4'
path_df = pd.DataFrame(columns=['Video_ID', 'path'])
for k, v in tqdm(video_lookup_inv.items()):
    path_df.loc[len(path_df)] = [k, v]
path_df = path_df.astype({'Video_ID': int})

100%|██████████| 897/897 [00:01<00:00, 572.04it/s]


In [14]:
## gender estimation and manually correction: do not run
if False:
    d = gender.Detector()
    gender_df = pd.DataFrame(columns=['Name_of_Tch_1', 'gender'])
    for name in tqdm(set(info.Name_of_Tch_1.values.tolist())):
        est = d.get_gender(name.split(' ')[0])
        gender_df.loc[len(gender_df)] = [name, est]
    gender_df = pd.merge(info[['Video_ID', 'Name_of_Tch_1']], gender_df[['Name_of_Tch_1', 'gender']], on='Name_of_Tch_1')
    df = pd.merge(gender_df, path_df, on='Video_ID', how='left')
    df.to_excel('pd/gender_estimation.xlsx', index=False)
    gender_df = pd.read_excel('pd/gender.xlsx')
    gender_df.to_csv('pd/gender.csv', sep='\t', index=False)

In [15]:
## gender
gender_df = pd.read_csv('pd/gender.csv', sep='\t')

In [16]:
## video length
if not os.path.exists('fin/video_length.csv'):
    video_folders = ['pd_1-100', 'pd_101-200', 'pd_201-300', 'pd_301-400', 'pd_401-500',
                     'pd_501-600', 'pd_601-700', 'pd_701-800', 'pd_801-897']
    video_paths = [f'/home/xyubl/Downloads/{x}' for x in video_folders]
    len_df = pd.DataFrame(columns=['Video_ID', 'video_length'])
    for video_path in video_paths:
        video_files = glob.glob(f'{video_path}/*.mp4')
        for video_file in tqdm(video_files):
            video_id = int(video_file.split('/')[-1].split('.')[0])
            video = cv2.VideoCapture(video_file)
            frames = video.get(cv2.CAP_PROP_FRAME_COUNT)
            fps = video.get(cv2.CAP_PROP_FPS) 
            video_length = frames / fps
            len_df.loc[len(len_df)] = [video_id, video_length]
    len_df = len_df.astype({'Video_ID': int})
    len_df.to_csv('pd/video_length.csv', sep='\t', index=False)
len_df = pd.read_csv('pd/video_length.csv', sep='\t')

In [17]:
## filter information
filter_df = pd.read_csv('pd/video_filter.csv', sep='\t')
use_videos = filter_df.loc[(filter_df.showup_proportion>0.2) & (filter_df.frame_land_diff>3) & (filter_df.align_land_diff>3)].Video_ID.values.tolist()


In [18]:
## aggregate all variables
dfs = [info, dur_df[['Video_ID', 'showup', 'smile_proportion']], tim_df[['Video_ID', 'beginning', 'middle', 'end']], 
       inten_df[['Video_ID', 'Intensity']], asym_df[['Video_ID', 'asymmetry']], beauty_df[['Video_ID', 'beauty']],
       qua_df, len_df, gender_df, path_df]
var_df = reduce(lambda left, right: pd.merge(left, right, on='Video_ID', how='outer'), dfs)
var_df['preview'] = (~var_df.path.isna().values).astype(int)
showup_arr = []
for _, row in var_df.iterrows():
    if row.preview == 0:
        showup_arr.append(np.nan)
    else:
        showup_arr.append(int(row.Video_ID in use_videos))
var_df.showup = showup_arr
var_df = var_df.sort_values('Video_ID').reset_index(drop=True)
var_df['male'] = [1 if x in ['male', 'unknown'] else 0 for x in var_df.gender]
var_df['description_length'] = var_df['Blurb'].apply(lambda x: len(x.split(' ')) if type(x) is str else 0)
var_df['log_description_length'] = [np.log(x) if x != 0 else 0 for x in var_df['description_length']]
var_df.to_csv('pd/variables.csv', sep='\t', index=False)

In [19]:
## preview video
print(f'Presence of preview video: {sum(var_df.preview)}')
print(f'Video with teacher face: {sum(var_df.preview)}, {np.mean(var_df.showup):.3f}, {np.std(var_df.showup):.3f}')
print(f'Video quality: {sum(var_df.preview)}, {np.nanmean(var_df.quality):.3f}, {np.nanstd(var_df.quality):.3f}, {np.nanmin(var_df.quality):.3f}, {np.nanmax(var_df.quality):.3f}')
print(f'Video length: {sum(var_df.preview)}, {np.nanmean(var_df.video_length):.3f}, {np.nanstd(var_df.video_length):.3f}, {np.nanmin(var_df.video_length):.3f}, {np.nanmax(var_df.video_length):.3f}')
print(f'Average smiling duration: {len(var_df.loc[var_df.showup==1]):.3f}, {np.mean(var_df.loc[var_df.showup==1].smile_proportion):.3f}, {np.std(var_df.loc[var_df.showup==1].smile_proportion):.3f},\
        {np.min(var_df.loc[var_df.showup==1].smile_proportion):.3f}, {np.max(var_df.loc[var_df.showup==1].smile_proportion):.3f}')
print(f'Beginning duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].beginning):.3f}, {np.std(var_df.loc[var_df.showup==1].beginning):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].beginning):.3f}, {np.max(var_df.loc[var_df.showup==1].beginning):.3f}')
print(f'Middle duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].middle):.3f}, {np.std(var_df.loc[var_df.showup==1].middle):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].middle):.3f}, {np.max(var_df.loc[var_df.showup==1].middle):.3f}')
print(f'End duration: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].end):.3f}, {np.std(var_df.loc[var_df.showup==1].end):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].end):.3f}, {np.max(var_df.loc[var_df.showup==1].end):.3f}')
print(f'Average smiling intensity: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].Intensity):.3f}, {np.std(var_df.loc[var_df.showup==1].Intensity):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].Intensity):.3f}, {np.max(var_df.loc[var_df.showup==1].Intensity):.3f}')
print(f'Facial symmetry: {len(var_df.loc[var_df.showup==1])}, {np.mean(var_df.loc[var_df.showup==1].asymmetry):.3f}, {np.std(var_df.loc[var_df.showup==1].asymmetry):.3f}, \
        {np.min(var_df.loc[var_df.showup==1].asymmetry):.3f}, {np.max(var_df.loc[var_df.showup==1].asymmetry):.3f}')


Presence of preview video: 897
Video with teacher face: 897, 0.692, 0.462
Video quality: 897, 0.744, 0.075, 0.484, 0.958
Video length: 897, 172.217, 184.258, 15.633, 3542.200
Average smiling duration: 621.000, 0.123, 0.173,        0.000, 0.886
Beginning duration: 621, 0.130, 0.192,         0.000, 1.000
Middle duration: 621, 0.110, 0.171,         0.000, 1.000
End duration: 621, 0.130, 0.189,         0.000, 1.000
Average smiling intensity: 621, 0.425, 0.121,         0.128, 0.894
Facial symmetry: 621, 0.608, 0.231,         0.001, 1.234


In [20]:
## teacher characteristics
print(f'Male: {len(var_df.loc[var_df.male==1])}, {np.nanmean(var_df.male):.3f}, {np.nanstd(var_df.male):.3f}, {np.nanmin(var_df.male):.3f}, {np.nanmax(var_df.male):.3f}')
print(f'Teacher rating: {len(var_df)}, {np.mean(var_df.Rating_of_Tch_1):.3f}, {np.std(var_df.Rating_of_Tch_1):.3f}, {np.min(var_df.Rating_of_Tch_1):.3f}, {np.max(var_df.Rating_of_Tch_1):.3f}')
print(f'Log number of students: {len(var_df)}, {np.nanmean(np.log(var_df.Students_of_Tch_1)):.3f}, {np.nanstd(np.log(var_df.Students_of_Tch_1)):.3f}, {np.nanmin(np.log(var_df.Students_of_Tch_1)):.3f}, {np.nanmax(np.log(var_df.Students_of_Tch_1)):.3f}')
print(f'Number of courses: {len(var_df)}, {np.mean(var_df.Courses_of_Tch_1):.3f}, {np.std(var_df.Courses_of_Tch_1):.3f}, {np.min(var_df.Courses_of_Tch_1):.3f}, {np.max(var_df.Courses_of_Tch_1):.3f}')


Male: 687, 0.723, 0.447, 0.000, 1.000
Teacher rating: 950, 4.405, 0.249, 3.116, 5.000
Log number of students: 950, 9.948, 1.959, 5.357, 14.311
Number of courses: 950, 24.348, 46.907, 1.000, 454.000


In [21]:
## course characteristics
print(f'Log length of course description: {len(var_df)}, {np.nanmean(var_df.log_description_length):.3f}, {np.nanstd(var_df.log_description_length):.3f}, {np.nanmin(var_df.log_description_length):.3f}, {np.nanmax(var_df.log_description_length):.3f}')
print(f'Selling price: {len(var_df)}, {np.mean(var_df.Selling_Price):.3f}, {np.std(var_df.Selling_Price):.3f}, {np.min(var_df.Selling_Price):.3f}, {np.max(var_df.Selling_Price):.3f}')
print(f'Log number of student enrollment: {len(var_df)}, {np.nanmean(np.log(var_df.Students_Num)):.3f}, {np.nanstd(np.log(var_df.Students_Num)):.3f}, {np.nanmin(np.log(var_df.Students_Num)):.3f}, {np.nanmax(np.log(var_df.Students_Num)):.3f}')
print(f'Course rating: {len(var_df)}, {np.nanmean(var_df.Rating):.3f}, {np.nanstd(var_df.Rating):.3f}, {np.nanmin(var_df.Rating):.3f}, {np.nanmax(var_df.Rating):.3f}')


Log length of course description: 950, 2.640, 0.377, 1.099, 3.219
Selling price: 950, 13.738, 21.785, 9.990, 199.990
Log number of student enrollment: 950, 8.241, 1.295, 5.357, 11.659
Course rating: 950, 4.396, 0.328, 2.591, 5.000


# Concatenate two categories into one dataframe

In [22]:
fin_df = pd.read_csv('fin/variables.csv', sep='\t')
pd_df = pd.read_csv('pd/variables.csv', sep='\t')
fin_df['category'] = 'finance'
pd_df['category'] = 'personal development'
fin_df.shape, pd_df.shape

((900, 37), (950, 37))

In [23]:
all_df = pd.concat([fin_df, pd_df], axis=0)
all_df.to_csv('all_variables.csv', sep='\t', index=False)