In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### National Survey on Drug Use and Health 2002 to 2014

https://www.icpsr.umich.edu/web/RCMD/search/studies?start=0&sort=score%20desc%2CTITLE_SORT%20asc&CLASSIF_FACET=RCMD.V.&ARCHIVE=RCMD&PUBLISH_STATUS=PUBLISHED&TIMEPERIOD_NEW=%5B2012%20TO%202022%5D&rows=50&q=Behavioral%20Risk%20Factor%20Surveillance%20System

In [4]:
drug = pd.read_stata('34933-0001-Data.dta', convert_categoricals=False) #2012 example

In [5]:
#contain race and sex column

In [6]:
drug.shape

(55268, 3120)

In [7]:
drug

Unnamed: 0,CASEID,QUESTID2,CIGEVER,CIGOFRSM,CIGWILYR,CIGTRY,CIGYFU,CIGMFU,CIGREC,CIG30USE,...,IIEMPSTY,II2EMSTY,EMPSTAT4,IIEMPST4,II2EMST4,PDEN00,COUTYP2,ANALWT_C,VESTR,VEREP
0,1,50886467,2,4,4,991,9991,91,91,91,...,1,1,99,9,9,2,2,1275.60,30054,2
1,2,13766883,2,99,99,991,9991,91,91,91,...,1,1,1,1,1,2,2,5191.07,30031,1
2,3,17772877,2,99,99,991,9991,91,91,91,...,1,1,1,1,1,3,3,419.74,30056,2
3,4,45622817,1,99,99,13,9999,99,2,93,...,1,1,2,1,1,2,2,1449.30,30054,1
4,5,17239390,1,99,99,11,9999,99,4,93,...,1,1,1,1,1,1,1,15344.29,30012,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55263,55264,54947473,2,99,99,991,9991,91,91,91,...,1,1,1,1,1,1,1,1289.36,30032,2
55264,55265,67706608,2,4,4,991,9991,91,91,91,...,9,9,99,9,9,2,1,116.92,30050,2
55265,55266,47284524,2,99,99,991,9991,91,91,91,...,1,1,2,1,1,1,1,1276.95,30018,1
55266,55267,32935928,1,99,99,13,9999,99,1,4,...,1,1,1,1,1,2,2,8072.38,30030,2


In [8]:
relgion_df = drug[['SNRLGSVC','SNRLGIMP','SNRLDCSN','SNRLFRND']]
religion_df = relgion_df.rename({'SNRLGSVC':'Past_12_months','SNRLGIMP':'Important','SNRLDCSN':'Influence','SNRLFRND':'Friends_Share'},axis=1)
religion_df.shape

(55268, 4)

In [23]:
drop_values = [94,97,98,99]

# To see what observations involves answers: 
#"Don't Know", Refused to answer, blank, or Legitimate Skip
religion_df[(religion_df.Past_12_months.isin(drop_values) == False) & 
            (religion_df.Important.isin(drop_values) == False) & 
            (religion_df.Influence.isin(drop_values) == False) & 
            (religion_df.Friends_Share.isin(drop_values) == False)]

Unnamed: 0,Past_12_months,Important,Influence,Friends_Share
1,2,3,3,2
2,6,4,4,2
3,4,3,3,1
4,2,1,1,1
5,3,3,4,2
...,...,...,...,...
55256,6,4,4,4
55261,3,3,3,2
55262,6,4,4,3
55263,1,3,3,3


In [24]:
religion_df

Unnamed: 0,Past_12_months,Important,Influence,Friends_Share
0,99,99,99,99
1,2,3,3,2
2,6,4,4,2
3,4,3,3,1
4,2,1,1,1
...,...,...,...,...
55263,1,3,3,3
55264,99,99,99,99
55265,1,97,97,97
55266,4,3,4,3


In [25]:
crime_df = drug[["SNYSTOLE", "YEYSTOLE","SNYSELL", "YEYSELL","DRVDONLY", "TXYRJAIL", 
                 "BOOKED","NOBOOKY2","YUJVDTON","YUJVDTN2","TXYRJLAD","LOCJAIL","ALCJAIL",
                "DRGJAIL"]]
crime_df

Unnamed: 0,SNYSTOLE,YEYSTOLE,SNYSELL,YEYSELL,DRVDONLY,TXYRJAIL,BOOKED,NOBOOKY2,YUJVDTON,YUJVDTN2,TXYRJLAD,LOCJAIL,ALCJAIL,DRGJAIL
0,99,1,99,1,99,99,2,999,2,999,99,0,0,0
1,1,99,1,99,99,99,2,999,99,999,99,0,0,0
2,1,99,1,99,91,91,2,999,99,999,91,0,0,0
3,1,99,1,99,1,99,2,999,99,999,99,0,0,0
4,1,99,1,99,99,99,2,999,99,999,99,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55263,1,99,1,99,91,91,2,999,99,999,91,0,0,0
55264,99,1,99,1,91,91,1,994,2,999,91,0,0,0
55265,1,99,1,99,99,99,2,999,99,999,99,0,0,0
55266,1,99,1,99,2,99,2,999,99,999,99,0,0,0


In [26]:
religion_crime_df = pd.concat([religion_df, crime_df], axis=1)
religion_crime_df = religion_crime_df.rename(
    {"SNYSTOLE": 'Ever_Steal_50', "YEYSTOLE":'Y_Ever_Steal_50',"SNYSELL": 'Sell_Illgal_Drugs',
     "YEYSELL":"Y_Sell_Illgal_Drugs","DRVDONLY":'Drunk_Drive',"TXYRJAIL":"Jail_Treatment_Past_12",
     "TXYRJLAD":"Alc_Drug_Treatment_Ever","LOCJAIL":"Drug_or_Alc_Past_12","ALCJAIL":"Alc_Treatment_Past_12", 
     "DRGJAIL":"Drug_Treatment_Past_12","BOOKED":"Ever_Booked_Arrested","NOBOOKY2":"No_times_Arrested_Past_12",
     "YUJVDTON":"Y_Arrested_Past_12", "YUJVDTN2":"Y_No_Nights_Jail"},axis=1)
religion_crime_df.shape

(55268, 18)

In [27]:
religion_crime_df

Unnamed: 0,Past_12_months,Important,Influence,Friends_Share,Ever_Steal_50,Y_Ever_Steal_50,Sell_Illgal_Drugs,Y_Sell_Illgal_Drugs,Drunk_Drive,Jail_Treatment_Past_12,Ever_Booked_Arrested,No_times_Arrested_Past_12,Y_Arrested_Past_12,Y_No_Nights_Jail,Alc_Drug_Treatment_Ever,Drug_or_Alc_Past_12,Alc_Treatment_Past_12,Drug_Treatment_Past_12
0,99,99,99,99,99,1,99,1,99,99,2,999,2,999,99,0,0,0
1,2,3,3,2,1,99,1,99,99,99,2,999,99,999,99,0,0,0
2,6,4,4,2,1,99,1,99,91,91,2,999,99,999,91,0,0,0
3,4,3,3,1,1,99,1,99,1,99,2,999,99,999,99,0,0,0
4,2,1,1,1,1,99,1,99,99,99,2,999,99,999,99,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55263,1,3,3,3,1,99,1,99,91,91,2,999,99,999,91,0,0,0
55264,99,99,99,99,99,1,99,1,91,91,1,994,2,999,91,0,0,0
55265,1,97,97,97,1,99,1,99,99,99,2,999,99,999,99,0,0,0
55266,4,3,4,3,1,99,1,99,2,99,2,999,99,999,99,0,0,0
