# Is Preview Improving Conversion Rate between Acceptance and Payment?

* We sent preview emails to all students been accepted to cohorts:
    * 495: nd209; cohort notify at 2017-12-22, start at 2018-01-10; preview course: ud208
    * 326: nd889; cohort notify at 2018-01-04, start at 2018-01-25; preview course: ud889-preview
    * 293: nd013; cohort notify at 2017-12-05, start at 2017-12-14; preview course: ud013-preview

In [75]:
import psycopg2
import pandas as pd
import numpy as np
from scipy import stats
from math import sqrt

In [92]:
laurel = open('conn_laurel.txt','r')
hardy = open('conn_hardy.txt','r')
conn_laurel = psycopg2.connect(laurel.read())
conn_hardy = psycopg2.connect(hardy.read())

## Cohort 495

In [8]:
sql_495_table = '''select 'Apps Created' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '495'
                   UNION
                   select 'Apps Submitted' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '495' and submitted_at is not null
                   UNION
                   select 'Apps Accepted' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '495' and accepted_at is not null
                   UNION
                   select 'Apps Paid' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '495' and first_charge_created_at is not null
                '''
df_495_table = pd.read_sql(sql_495_table,conn_laurel)
df_495_table.sort_values(by='Values',ascending=False)

Unnamed: 0,Phases,Values
0,Apps Created,2928
3,Apps Submitted,554
1,Apps Accepted,437
2,Apps Paid,41


In [50]:
sql_495 = '''select distinct a.applicant_id,a.cohort_notify_at,a.cohort_start_at
                   ,a.application_created_at,a.submitted_at,a.accepted_at,a.first_charge_created_at
                   ,case when a.accepted_at is not null then 'Y' else 'N' end as accepted
                   ,case when a.first_charge_created_at is not null then 'Y' else 'N' end as paid
                   ,b.join_time,b.leave_time
                   ,case when b.join_time is not null then 'Y' else 'N' end as preview_clicked
                   ,case when b.leave_time is not null then 'Y' else 'N' end as preview_finished
                   ,c.day_of,c.completed_concepts
                   ,case when c.day_of is not null then 'Y' else 'N' end as preview_watched
             from applications a
             left join enrollments.course_enrollments b
               on a.applicant_id = b.user_id and b.course_key = 'ud208'
             left join daily_course_progress c
               on a.applicant_id = c.user_id and c.course_key = 'ud208'
             where a.cohort_id = '495' and a.accepted_at is not null
          '''
df_495 = pd.read_sql(sql_495,conn_laurel)
df_495.groupby(['accepted','paid','preview_clicked','preview_watched','preview_finished'])['applicant_id'].count().unstack().fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,preview_finished,N
accepted,paid,preview_clicked,preview_watched,Unnamed: 4_level_1
Y,N,N,N,400
Y,Y,N,N,37


In [62]:
sql_ud208 = '''
select *
      ,case when concept_completed > 0 then 'viewed' else 'not_viewed' end as course_enrolled
      ,case when concept_completed = 0 then 'not enrolled'
            when concept_completed_before_application > 0 then 'enroll before apply'
            when concept_completed_before_application = 0 and concept_completed_before_accepted > 0 then 'enroll bewteen application and accept'
            when concept_completed_before_accepted = 0 and concept_completed_after_accepted > 0 then 'enroll after accept'
       else 'others'
       end as course_enrollment
from (select a.applicant_id,case when a.first_charge_created_at is not null then 'convert' else 'not_convert' end as converted
            ,count(distinct b.concept_key) AS concept_completed
            ,count(distinct case when b.original_timestamp < a.application_created_at then b.concept_key end) as concept_completed_before_application
            ,count(distinct case when b.original_timestamp < a.accepted_at then b.concept_key end) as concept_completed_before_accepted
            ,count(distinct case when b.original_timestamp >= a.accepted_at then b.concept_key end) as concept_completed_after_accepted
      from analytics_tables.applications a
      left join classroom.course_concept_completed b on a.applicant_id = b.user_id and b.course_key = 'ud208'
      where a.cohort_id = '495' and accepted_at is not null
      group by a.applicant_id,case when a.first_charge_created_at is not null then 'convert' else 'not_convert' end
     )
     '''
df_ud208 = pd.read_sql(sql_ud208,conn_hardy)
df_ud208.groupby(['course_enrolled','course_enrollment','converted'])['applicant_id'].count()

course_enrolled  course_enrollment                      converted  
not_viewed       not enrolled                           convert         12
                                                        not_convert    231
viewed           enroll after accept                    convert         10
                                                        not_convert    100
                 enroll before apply                    convert          5
                                                        not_convert     32
                 enroll bewteen application and accept  convert         10
                                                        not_convert     37
Name: applicant_id, dtype: int64

In [71]:
accepted_student = df_ud208.shape[0]
not_accessed_preview = df_ud208[df_ud208['course_enrolled']=='not_viewed'].shape[0]
accessed_preview = df_ud208[df_ud208['course_enrolled']=='viewed'].shape[0]
not_accessed_preview_converted = df_ud208[(df_ud208['course_enrolled']=='not_viewed')&(df_ud208['converted']=='convert')].shape[0]
accessed_preview_converted = df_ud208[(df_ud208['course_enrolled']=='viewed')&(df_ud208['converted']=='convert')].shape[0]

print('Out of {0} accepted students, {1} of them didn\'t access preview. ({2:.1%})'.format(accepted_student,not_accessed_preview,not_accessed_preview/accepted_student))
print('Out of {0} accepted students, {1} of them accessed preview. ({2:.1%})'.format(accepted_student,accessed_preview,accessed_preview/accepted_student))

print('Out of {0} student didn\'t access preview, {1} of them converted. ({2:.1%})'.format(not_accessed_preview,not_accessed_preview_converted,not_accessed_preview_converted/not_accessed_preview))
print('Out of {0} student accessed preview, {1} of them converted. ({2:.1%})'.format(accessed_preview,accessed_preview_converted,accessed_preview_converted/accessed_preview))

Out of 437 accepted students, 243 of them didn't access preview. (55.6%)
Out of 437 accepted students, 194 of them accessed preview. (44.4%)
Out of 243 student didn't access preview, 12 of them converted. (4.9%)
Out of 194 student accessed preview, 25 of them converted. (12.9%)


### Question 1: Is accessing preview improving conversion?

In [76]:
obs_v1 = not_accessed_preview_converted
obs_v2 = accessed_preview_converted
n1 = not_accessed_preview
n2 = accessed_preview

m1 = obs_v1/n1
m2 = obs_v2/n2
sd1 = (obs_v1/n1*(1-obs_v1/n1))/n1
sd2 = (obs_v2/n2*(1-obs_v2/n2))/n2
s_t = np.sqrt(((n1-1)*n1*sd1+(n2-1)*n2*sd2)/(n1+n2-2))
t = (m2-m1)/(s_t*np.sqrt(1/n1+1/n2))
tscore = stats.t.ppf(.95,n1+n2-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 2.9960700856380216; 95% t score is 1.6483640618530113


### Result: Accessing preview helps improve conversion.

### Question 2: Which timing is influencing conversion: Before application? During application? After accepted?

In [82]:
obs_v1 = df_ud208[(df_ud208['course_enrollment']=='enroll before apply')&(df_ud208['converted']=='convert')].shape[0]
obs_v2 = df_ud208[(df_ud208['course_enrollment']=='enroll bewteen application and accept')&(df_ud208['converted']=='convert')].shape[0]
obs_v3 = df_ud208[(df_ud208['course_enrollment']=='enroll after accept')&(df_ud208['converted']=='convert')].shape[0]
n1 = df_ud208[df_ud208['course_enrollment']=='enroll before apply'].shape[0]
n2 = df_ud208[df_ud208['course_enrollment']=='enroll bewteen application and accept'].shape[0]
n3 = df_ud208[df_ud208['course_enrollment']=='enroll after accept'].shape[0]

enrollments = ['enroll before apply','enroll bewteen application and accept','enroll after accept']
obs_v = [obs_v1,obs_v2,obs_v3]
n = [n1,n2,n3]
for i in range(len(enrollments)):
    print('Out of {0} students who {1}, {2} of them converted. {3:.1%}'.format(n[i],enrollments[i],obs_v[i],obs_v[i]/n[i]))

m1 = obs_v1/n1
m2 = obs_v2/n2
m3 = obs_v3/n3
sd1 = (obs_v1/n1*(1-obs_v1/n1))/n1
sd2 = (obs_v2/n2*(1-obs_v2/n2))/n2
sd3 = (obs_v3/n3*(1-obs_v3/n3))/n3

Out of 37 students who enroll before apply, 5 of them converted. 13.5%
Out of 47 students who enroll bewteen application and accept, 10 of them converted. 21.3%
Out of 110 students who enroll after accept, 10 of them converted. 9.1%


In [83]:
s_t = np.sqrt(((n1-1)*n1*sd1+(n2-1)*n2*sd2)/(n1+n2-2))
t = (m2-m1)/(s_t*np.sqrt(1/n1+1/n2))
tscore = stats.t.ppf(.95,n1+n2-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 0.9267306985795078; 95% t score is 1.6636491839760918


In [84]:
s_t = np.sqrt(((n2-1)*n2*sd2+(n3-1)*n3*sd3)/(n2+n3-2))
t = (m3-m2)/(s_t*np.sqrt(1/n2+1/n3))
tscore = stats.t.ppf(.95,n2+n3-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is -2.1295247272486875; 95% t score is 1.6547437739175987


In [85]:
s_t = np.sqrt(((n1-1)*n1*sd1+(n3-1)*n3*sd3)/(n1+n3-2))
t = (m3-m1)/(s_t*np.sqrt(1/n1+1/n3))
tscore = stats.t.ppf(.95,n1+n3-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is -0.7708241265072567; 95% t score is 1.6554302514146266


### Result: It's hard to tell which timing is best to send out preview notice. Only we can say students who enroll in preview during application have slightly better conversion rate than students enrolled after accepted.

## Cohort 326

In [23]:
sql_326_table = '''select 'Apps Created' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '326'
                   UNION
                   select 'Apps Submitted' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '326' and submitted_at is not null
                   UNION
                   select 'Apps Accepted' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '326' and accepted_at is not null
                   UNION
                   select 'Apps Paid' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '326' and first_charge_created_at is not null
                '''
df_326_table = pd.read_sql(sql_326_table,conn_laurel)
df_326_table.sort_values(by='Values',ascending=False)

Unnamed: 0,Phases,Values
0,Apps Created,4825
1,Apps Submitted,937
2,Apps Accepted,747
3,Apps Paid,161


In [31]:
sql_326 = '''select distinct a.applicant_id,a.cohort_notify_at,a.cohort_start_at
                   ,a.application_created_at,a.submitted_at,a.accepted_at,a.first_charge_created_at
                   ,case when a.accepted_at is not null then 'Y' else 'N' end as accepted
                   ,case when a.first_charge_created_at is not null then 'Y' else 'N' end as paid
                   ,b.join_time,b.leave_time
                   ,case when b.join_time is not null then 'Y' else 'N' end as preview_clicked
                   ,case when b.leave_time is not null then 'Y' else 'N' end as preview_finished
                   ,c.day_of,c.completed_concepts
                   ,case when c.day_of is not null then 'Y' else 'N' end as preview_watched
             from applications a
             left join enrollments.course_enrollments b
               on a.applicant_id = b.user_id and b.course_key = 'ud889-preview'
             left join daily_course_progress c
               on a.applicant_id = c.user_id and c.course_key = 'ud889-preview'
             where a.cohort_id = '326' and a.accepted_at is not null
          '''
df_326 = pd.read_sql(sql_326,conn_laurel)
df_326.groupby(['accepted','paid','preview_clicked','preview_watched','preview_finished'])['applicant_id'].count().unstack().fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,preview_finished,N
accepted,paid,preview_clicked,preview_watched,Unnamed: 4_level_1
Y,N,N,N,593
Y,Y,N,N,154


In [63]:
sql_ud889 = '''
select *
      ,case when concept_completed > 0 then 'viewed' else 'not_viewed' end as course_enrolled
      ,case when concept_completed = 0 then 'not enrolled'
            when concept_completed_before_application > 0 then 'enroll before apply'
            when concept_completed_before_application = 0 and concept_completed_before_accepted > 0 then 'enroll bewteen application and accept'
            when concept_completed_before_accepted = 0 and concept_completed_after_accepted > 0 then 'enroll after accept'
       else 'others'
       end as course_enrollment
from (select a.applicant_id,case when a.first_charge_created_at is not null then 'convert' else 'not_convert' end as converted
            ,count(distinct b.concept_key) AS concept_completed
            ,count(distinct case when b.original_timestamp < a.application_created_at then b.concept_key end) as concept_completed_before_application
            ,count(distinct case when b.original_timestamp < a.accepted_at then b.concept_key end) as concept_completed_before_accepted
            ,count(distinct case when b.original_timestamp >= a.accepted_at then b.concept_key end) as concept_completed_after_accepted
      from analytics_tables.applications a
      left join classroom.course_concept_completed b on a.applicant_id = b.user_id and b.course_key = 'ud889-preview'
      where a.cohort_id = '326' and accepted_at is not null
      group by a.applicant_id,case when a.first_charge_created_at is not null then 'convert' else 'not_convert' end
     )
     '''
df_ud889 = pd.read_sql(sql_ud889,conn_hardy)
df_ud889.groupby(['course_enrolled','course_enrollment','converted'])['applicant_id'].count()

course_enrolled  course_enrollment                      converted  
not_viewed       not enrolled                           convert        139
                                                        not_convert    561
viewed           enroll after accept                    convert          6
                                                        not_convert      4
                 enroll before apply                    convert          7
                                                        not_convert     22
                 enroll bewteen application and accept  convert          2
                                                        not_convert      6
Name: applicant_id, dtype: int64

In [86]:
accepted_student = df_ud889.shape[0]
not_accessed_preview = df_ud889[df_ud889['course_enrolled']=='not_viewed'].shape[0]
accessed_preview = df_ud889[df_ud889['course_enrolled']=='viewed'].shape[0]
not_accessed_preview_converted = df_ud889[(df_ud889['course_enrolled']=='not_viewed')&(df_ud889['converted']=='convert')].shape[0]
accessed_preview_converted = df_ud889[(df_ud889['course_enrolled']=='viewed')&(df_ud889['converted']=='convert')].shape[0]

print('Out of {0} accepted students, {1} of them didn\'t access preview. ({2:.1%})'.format(accepted_student,not_accessed_preview,not_accessed_preview/accepted_student))
print('Out of {0} accepted students, {1} of them accessed preview. ({2:.1%})'.format(accepted_student,accessed_preview,accessed_preview/accepted_student))

print('Out of {0} student didn\'t access preview, {1} of them converted. ({2:.1%})'.format(not_accessed_preview,not_accessed_preview_converted,not_accessed_preview_converted/not_accessed_preview))
print('Out of {0} student accessed preview, {1} of them converted. ({2:.1%})'.format(accessed_preview,accessed_preview_converted,accessed_preview_converted/accessed_preview))

Out of 747 accepted students, 700 of them didn't access preview. (93.7%)
Out of 747 accepted students, 47 of them accessed preview. (6.3%)
Out of 700 student didn't access preview, 139 of them converted. (19.9%)
Out of 47 student accessed preview, 15 of them converted. (31.9%)


### Question 1: Is accessing preview improving conversion?

In [87]:
obs_v1 = not_accessed_preview_converted
obs_v2 = accessed_preview_converted
n1 = not_accessed_preview
n2 = accessed_preview

m1 = obs_v1/n1
m2 = obs_v2/n2
sd1 = (obs_v1/n1*(1-obs_v1/n1))/n1
sd2 = (obs_v2/n2*(1-obs_v2/n2))/n2
s_t = np.sqrt(((n1-1)*n1*sd1+(n2-1)*n2*sd2)/(n1+n2-2))
t = (m2-m1)/(s_t*np.sqrt(1/n1+1/n2))
tscore = stats.t.ppf(.95,n1+n2-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 1.983664636606534; 95% t score is 1.6469015158540836


### Result: Accessing preview helps improve conversion.

### Question 2: Which timing is influencing conversion: Before application? During application? After accepted?

In [88]:
obs_v1 = df_ud889[(df_ud889['course_enrollment']=='enroll before apply')&(df_ud889['converted']=='convert')].shape[0]
obs_v2 = df_ud889[(df_ud889['course_enrollment']=='enroll bewteen application and accept')&(df_ud889['converted']=='convert')].shape[0]
obs_v3 = df_ud889[(df_ud889['course_enrollment']=='enroll after accept')&(df_ud889['converted']=='convert')].shape[0]
n1 = df_ud889[df_ud889['course_enrollment']=='enroll before apply'].shape[0]
n2 = df_ud889[df_ud889['course_enrollment']=='enroll bewteen application and accept'].shape[0]
n3 = df_ud889[df_ud889['course_enrollment']=='enroll after accept'].shape[0]

enrollments = ['enroll before apply','enroll bewteen application and accept','enroll after accept']
obs_v = [obs_v1,obs_v2,obs_v3]
n = [n1,n2,n3]
for i in range(len(enrollments)):
    print('Out of {0} students who {1}, {2} of them converted. {3:.1%}'.format(n[i],enrollments[i],obs_v[i],obs_v[i]/n[i]))

m1 = obs_v1/n1
m2 = obs_v2/n2
m3 = obs_v3/n3
sd1 = (obs_v1/n1*(1-obs_v1/n1))/n1
sd2 = (obs_v2/n2*(1-obs_v2/n2))/n2
sd3 = (obs_v3/n3*(1-obs_v3/n3))/n3

Out of 29 students who enroll before apply, 7 of them converted. 24.1%
Out of 8 students who enroll bewteen application and accept, 2 of them converted. 25.0%
Out of 10 students who enroll after accept, 6 of them converted. 60.0%


In [89]:
s_t = np.sqrt(((n1-1)*n1*sd1+(n2-1)*n2*sd2)/(n1+n2-2))
t = (m2-m1)/(s_t*np.sqrt(1/n1+1/n2))
tscore = stats.t.ppf(.95,n1+n2-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 0.05032518164431224; 95% t score is 1.6895724539637709


In [90]:
s_t = np.sqrt(((n2-1)*n2*sd2+(n3-1)*n3*sd3)/(n2+n3-2))
t = (m3-m2)/(s_t*np.sqrt(1/n2+1/n3))
tscore = stats.t.ppf(.95,n2+n3-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 1.5838558368914246; 95% t score is 1.74588367627624


In [91]:
s_t = np.sqrt(((n1-1)*n1*sd1+(n3-1)*n3*sd3)/(n1+n3-2))
t = (m3-m1)/(s_t*np.sqrt(1/n1+1/n3))
tscore = stats.t.ppf(.95,n1+n3-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 2.203543740055378; 95% t score is 1.6870936167109873


### Result: It's hard to tell which timing is best to send out preview notice. Only we can say students who enroll in preview after accepted have slightly better conversion rate than students enrolled before application.

## Cohort 293

In [32]:
sql_293_table = '''select 'Apps Created' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '293'
                   UNION
                   select 'Apps Submitted' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '293' and submitted_at is not null
                   UNION
                   select 'Apps Accepted' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '293' and accepted_at is not null
                   UNION
                   select 'Apps Paid' AS "Phases",count(distinct id) AS "Values"
                   from applications
                   where cohort_id = '293' and first_charge_created_at is not null
                '''
df_293_table = pd.read_sql(sql_293_table,conn_laurel)
df_293_table.sort_values(by='Values',ascending=False)

Unnamed: 0,Phases,Values
0,Apps Created,4291
3,Apps Submitted,1129
1,Apps Accepted,877
2,Apps Paid,324


In [33]:
sql_293 = '''select distinct a.applicant_id,a.cohort_notify_at,a.cohort_start_at
                   ,a.application_created_at,a.submitted_at,a.accepted_at,a.first_charge_created_at
                   ,case when a.accepted_at is not null then 'Y' else 'N' end as accepted
                   ,case when a.first_charge_created_at is not null then 'Y' else 'N' end as paid
                   ,b.join_time,b.leave_time
                   ,case when b.join_time is not null then 'Y' else 'N' end as preview_clicked
                   ,case when b.leave_time is not null then 'Y' else 'N' end as preview_finished
                   ,c.day_of,c.completed_concepts
                   ,case when c.day_of is not null then 'Y' else 'N' end as preview_watched
             from applications a
             left join enrollments.course_enrollments b
               on a.applicant_id = b.user_id and b.course_key = 'ud013-preview'
             left join daily_course_progress c
               on a.applicant_id = c.user_id and c.course_key = 'ud013-preview'
             where a.cohort_id = '293' and a.accepted_at is not null
          '''
df_293 = pd.read_sql(sql_293,conn_laurel)
df_293.groupby(['accepted','paid','preview_clicked','preview_watched','preview_finished'])['applicant_id'].count().unstack().fillna(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,preview_finished,N
accepted,paid,preview_clicked,preview_watched,Unnamed: 4_level_1
Y,N,N,N,565
Y,Y,N,N,312


In [94]:
sql_ud013 = '''
select *
      ,case when concept_completed > 0 then 'viewed' else 'not_viewed' end as course_enrolled
      ,case when concept_completed = 0 then 'not enrolled'
            when concept_completed_before_application > 0 then 'enroll before apply'
            when concept_completed_before_application = 0 and concept_completed_before_accepted > 0 then 'enroll bewteen application and accept'
            when concept_completed_before_accepted = 0 and concept_completed_after_accepted > 0 then 'enroll after accept'
       else 'others'
       end as course_enrollment
from (select a.applicant_id,case when a.first_charge_created_at is not null then 'convert' else 'not_convert' end as converted
            ,count(distinct b.concept_key) AS concept_completed
            ,count(distinct case when b.original_timestamp < a.application_created_at then b.concept_key end) as concept_completed_before_application
            ,count(distinct case when b.original_timestamp < a.accepted_at then b.concept_key end) as concept_completed_before_accepted
            ,count(distinct case when b.original_timestamp >= a.accepted_at then b.concept_key end) as concept_completed_after_accepted
      from analytics_tables.applications a
      left join classroom.course_concept_completed b on a.applicant_id = b.user_id and b.course_key = 'ud013-preview'
      where a.cohort_id = '293' and accepted_at is not null
      group by a.applicant_id,case when a.first_charge_created_at is not null then 'convert' else 'not_convert' end
     )
     '''
df_ud013 = pd.read_sql(sql_ud013,conn_hardy)
df_ud013.groupby(['course_enrolled','course_enrollment','converted'])['applicant_id'].count()

course_enrolled  course_enrollment                      converted  
not_viewed       not enrolled                           convert        140
                                                        not_convert    388
viewed           enroll after accept                    convert        119
                                                        not_convert    100
                 enroll before apply                    convert         14
                                                        not_convert     28
                 enroll bewteen application and accept  convert         39
                                                        not_convert     49
Name: applicant_id, dtype: int64

In [95]:
accepted_student = df_ud013.shape[0]
not_accessed_preview = df_ud013[df_ud013['course_enrolled']=='not_viewed'].shape[0]
accessed_preview = df_ud013[df_ud013['course_enrolled']=='viewed'].shape[0]
not_accessed_preview_converted = df_ud013[(df_ud013['course_enrolled']=='not_viewed')&(df_ud013['converted']=='convert')].shape[0]
accessed_preview_converted = df_ud013[(df_ud013['course_enrolled']=='viewed')&(df_ud013['converted']=='convert')].shape[0]

print('Out of {0} accepted students, {1} of them didn\'t access preview. ({2:.1%})'.format(accepted_student,not_accessed_preview,not_accessed_preview/accepted_student))
print('Out of {0} accepted students, {1} of them accessed preview. ({2:.1%})'.format(accepted_student,accessed_preview,accessed_preview/accepted_student))

print('Out of {0} student didn\'t access preview, {1} of them converted. ({2:.1%})'.format(not_accessed_preview,not_accessed_preview_converted,not_accessed_preview_converted/not_accessed_preview))
print('Out of {0} student accessed preview, {1} of them converted. ({2:.1%})'.format(accessed_preview,accessed_preview_converted,accessed_preview_converted/accessed_preview))

Out of 877 accepted students, 528 of them didn't access preview. (60.2%)
Out of 877 accepted students, 349 of them accessed preview. (39.8%)
Out of 528 student didn't access preview, 140 of them converted. (26.5%)
Out of 349 student accessed preview, 172 of them converted. (49.3%)


### Question 1: Is accessing preview improving conversion?

In [96]:
obs_v1 = not_accessed_preview_converted
obs_v2 = accessed_preview_converted
n1 = not_accessed_preview
n2 = accessed_preview

m1 = obs_v1/n1
m2 = obs_v2/n2
sd1 = (obs_v1/n1*(1-obs_v1/n1))/n1
sd2 = (obs_v2/n2*(1-obs_v2/n2))/n2
s_t = np.sqrt(((n1-1)*n1*sd1+(n2-1)*n2*sd2)/(n1+n2-2))
t = (m2-m1)/(s_t*np.sqrt(1/n1+1/n2))
tscore = stats.t.ppf(.95,n1+n2-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 7.088817444441026; 95% t score is 1.6465969338329176


### Result: Accessing preview helps improve conversion.

### Question 2: Which timing is influencing conversion: Before application? During application? After accepted?

In [97]:
obs_v1 = df_ud013[(df_ud013['course_enrollment']=='enroll before apply')&(df_ud013['converted']=='convert')].shape[0]
obs_v2 = df_ud013[(df_ud013['course_enrollment']=='enroll bewteen application and accept')&(df_ud013['converted']=='convert')].shape[0]
obs_v3 = df_ud013[(df_ud013['course_enrollment']=='enroll after accept')&(df_ud013['converted']=='convert')].shape[0]
n1 = df_ud013[df_ud013['course_enrollment']=='enroll before apply'].shape[0]
n2 = df_ud013[df_ud013['course_enrollment']=='enroll bewteen application and accept'].shape[0]
n3 = df_ud013[df_ud013['course_enrollment']=='enroll after accept'].shape[0]

enrollments = ['enroll before apply','enroll bewteen application and accept','enroll after accept']
obs_v = [obs_v1,obs_v2,obs_v3]
n = [n1,n2,n3]
for i in range(len(enrollments)):
    print('Out of {0} students who {1}, {2} of them converted. {3:.1%}'.format(n[i],enrollments[i],obs_v[i],obs_v[i]/n[i]))

m1 = obs_v1/n1
m2 = obs_v2/n2
m3 = obs_v3/n3
sd1 = (obs_v1/n1*(1-obs_v1/n1))/n1
sd2 = (obs_v2/n2*(1-obs_v2/n2))/n2
sd3 = (obs_v3/n3*(1-obs_v3/n3))/n3

Out of 42 students who enroll before apply, 14 of them converted. 33.3%
Out of 88 students who enroll bewteen application and accept, 39 of them converted. 44.3%
Out of 219 students who enroll after accept, 119 of them converted. 54.3%


In [98]:
s_t = np.sqrt(((n1-1)*n1*sd1+(n2-1)*n2*sd2)/(n1+n2-2))
t = (m2-m1)/(s_t*np.sqrt(1/n1+1/n2))
tscore = stats.t.ppf(.95,n1+n2-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 1.1983201325341042; 95% t score is 1.6568452260758075


In [99]:
s_t = np.sqrt(((n2-1)*n2*sd2+(n3-1)*n3*sd3)/(n2+n3-2))
t = (m3-m2)/(s_t*np.sqrt(1/n2+1/n3))
tscore = stats.t.ppf(.95,n2+n3-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 1.5949855217402589; 95% t score is 1.649864892961819


In [100]:
s_t = np.sqrt(((n1-1)*n1*sd1+(n3-1)*n3*sd3)/(n1+n3-2))
t = (m3-m1)/(s_t*np.sqrt(1/n1+1/n3))
tscore = stats.t.ppf(.95,n1+n3-2)
print("t stats is {0}; 95% t score is {1}".format(t,tscore))

t stats is 2.5242272021394117; 95% t score is 1.6507581336529946


### Result: Students who enroll in preview after accepted have better conversion rate than students enrolled before application and students enrolled during application.