# Data analysis

In [1]:
import pandas as pd
import numpy as np

In [2]:
import matplotlib.pyplot as plt
%matplotlib notebook

In [53]:
from scipy import stats

In [5]:
csvfile = "meetings_complete_clean.csv"

In [13]:
converters = dict(Time_dt=pd.to_datetime, Time_del=pd.to_timedelta)

In [14]:
df = pd.read_csv(csvfile, converters=converters)

In [15]:
df.head()

Unnamed: 0,Name,Day,Time,Video,Desc,Categories,Email,Phone,Time_dt,Time_del,...,Label_Game,Label_Blind / Visually Impaired,Label_Deaf / Hard of Hearing,Label_Email,Label_Professionals,Label_Meditation,Label_Seniors,Label_Newcomer,Label_24/7,Zoom
0,1 Hole in the Sky AA Meetings: AA Secular Dail...,Tuesday,5:00 pm,https://zoom.us/j/414948190,Tuesday: AA Secular Daily Reflections 3:00pm P...,"Audio,Open,Video",,,2021-11-16 17:00:00,0 days 17:00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
1,AA Friends,Tuesday,5:00 pm,https://aafriends.webs.com/,AA Friends currently holds three online meetin...,"Forum,Open",aafriends@usa.com,,2021-11-16 17:00:00,0 days 17:00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
2,AA New Beginnings,Tuesday,5:00 pm,https://us02web.zoom.us/j/5662326551,"A daily meeting that focuses on experience, st...","Audio,Big Book,Discussion,Open,Video",AANewBeginnings230@gmail.com,,2021-11-16 17:00:00,0 days 17:00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
3,Agnes Water,Tuesday,5:00 pm,https://us02web.zoom.us/j/8983968244,PW billw,"Big Book,Discussion,Speaker,Video",agneswaterxa@gmail.com,,2021-11-16 17:00:00,0 days 17:00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
4,Beginners AA meeting,Tuesday,5:00 pm,https://us02web.zoom.us/j/88198562544?pwd=wfzm...,"Zoom Passcode, if any: serenity","Big Book,Discussion,Open,Speaker,Video",liaazoom2020@gmail.com,,2021-11-16 17:00:00,0 days 17:00:00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True


In [8]:
df.shape

(4155, 37)

In [9]:
df.columns

Index(['Name', 'Day', 'Time', 'Video', 'Desc', 'Categories', 'Email', 'Phone',
       'Time_dt', 'Time_del', 'Label_Audio', 'Label_Open', 'Label_Video',
       'Label_Forum', 'Label_Big Book', 'Label_Discussion', 'Label_Speaker',
       'Label_Closed', 'Label_Telephone', 'Label_Women',
       'Label_Loners / Isolationists', 'Label_Chat', 'Label_LGBTQIAA+',
       'Label_Young People', 'Label_Men', 'Label_Secular',
       'Label_Steps / Traditions', 'Label_Game',
       'Label_Blind / Visually Impaired', 'Label_Deaf / Hard of Hearing',
       'Label_Email', 'Label_Professionals', 'Label_Meditation',
       'Label_Seniors', 'Label_Newcomer', 'Label_24/7', 'Zoom'],
      dtype='object')

In [30]:
# Add column with time in total seconds for easier calculations
df['Time_sec'] = df['Time_del'].dt.total_seconds()

For evaluating day and time patterns, best to remove ongoing meetings:

In [41]:
# df "minus ongoing"
df_mo = df.drop(df[df['Day'].str.contains('Ongoing')].index)

In [43]:
df_mo.shape

(4078, 38)

##### Helpful functions

In [57]:
def print_results(pval, conf):
    print("P-value = {}".format(pval))
    if pval < conf:
        print("Reject null hypothesis.")
    else:
        print("Fail to reject null hypothesis.")

### One sample t-test

Average time of all non-ongoing meetings:

In [45]:
df_mo['Time_del'].mean()

Timedelta('0 days 14:10:38.357037763')

In [51]:
avg_time = df_mo['Time_sec'].mean()

**Null hypothesis:** The average time of men's meetings = 14:10:38

**Alternative hypothesis:** The average time of men's meetings /= 14:10:38

**Confidence level:** alpha = 0.05

In [47]:
male_mask = df_mo['Label_Men'] == 1

In [49]:
df_mo[male_mask]['Time_del'].mean()

Timedelta('0 days 16:00:58.301886792')

In [48]:
df_mo[male_mask]['Time_sec'].mean()

57658.301886792455

In [59]:
sample1 = df_mo[male_mask]['Time_sec']
tset1, pval1 = stats.ttest_1samp(sample1, avg_time)

print_results(pval1, 0.05)

P-value = 0.0002425719732924799
Reject null hypothesis.


In other words, we are 95% confident that men's meetings are held significantly later in the day. 

### Two sample t-test

What about meetings occuring on Friday and Saturday as opposed to the rest of the week? People often can stay up later on Friday and Saturday.