In [1]:
from pathlib import Path
import pandas as pd

In [2]:
dir_path = Path.cwd()
raw_data_path = Path.joinpath(dir_path, "data", "raw")
interim_data_path = Path.joinpath(dir_path, "data", "interim")

In [3]:
df = pd.read_stata(Path.joinpath(raw_data_path, "level9.dta"))
df.columns

Index(['common_id', 'State', 'Sector', 'District', 'Schedule_ID',
       'FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'Serial_no',
       'Owned_No', 'Owned_Value_Rs', 'equipment_owned', 'Blank', 'NSC',
       'Multiplier', 'w'],
      dtype='object')

In [4]:
df.drop(['FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'Blank','NSC'], axis = 1, inplace = True)

In [5]:
df

Unnamed: 0,common_id,State,Sector,District,Schedule_ID,Serial_no,Owned_No,Owned_Value_Rs,equipment_owned,Multiplier,w
0,53335303,1,1,5,771,2,1,25000,3.0,241800,2418.0
1,53335303,1,1,5,771,8,1,25000,,241800,2418.0
2,53339302,1,1,6,771,3,1,20000,3.0,881600,8816.0
3,53339302,1,1,6,771,8,1,20000,,881600,8816.0
4,53331304,1,1,7,771,3,1,30000,3.0,62292,623.0
...,...,...,...,...,...,...,...,...,...,...,...
186963,76272502,36,2,31,771,8,2,29500,,93500,935.0
186964,76272601,36,2,31,771,3,1,25500,3.0,149600,1496.0
186965,76272601,36,2,31,771,8,1,25500,,149600,1496.0
186966,76272602,36,2,31,771,3,1,29000,3.0,149600,1496.0


In [6]:
var_names = [x.lower() for x in df.columns]
df.columns = var_names

In [7]:
df['transport_category'] = df['serial_no']

In [8]:
transport_labels = {1: 'tractors (all types)',
                    2: 'motor cars/jeep/van',
                    3: 'motorcycles/ scooters/ mopeds/ auto-rickshaws',
                    4: 'rickshaw/e-rickshaw/toto rickshaw/van rickshaw',
                    5: 'bicycles',
                    6: 'carts (hand-driven / animal driven)',
                    7: 'other transport equipment incl. boats, trucks,trailers, light commercial vehicles (LCV), passenger buses, etc.',
                    8: 'total'} 

In [9]:
use_labels = {1: 'for farm business',
                2: 'for non-farm business',
                3: 'for household use'}

In [10]:
df['transport_category'] = df['transport_category'].map(transport_labels)
df['equipment_owned'] = df['equipment_owned'].map(use_labels)

In [11]:
df

Unnamed: 0,common_id,state,sector,district,schedule_id,serial_no,owned_no,owned_value_rs,equipment_owned,multiplier,w,transport_category
0,53335303,1,1,5,771,2,1,25000,for household use,241800,2418.0,motor cars/jeep/van
1,53335303,1,1,5,771,8,1,25000,,241800,2418.0,total
2,53339302,1,1,6,771,3,1,20000,for household use,881600,8816.0,motorcycles/ scooters/ mopeds/ auto-rickshaws
3,53339302,1,1,6,771,8,1,20000,,881600,8816.0,total
4,53331304,1,1,7,771,3,1,30000,for household use,62292,623.0,motorcycles/ scooters/ mopeds/ auto-rickshaws
...,...,...,...,...,...,...,...,...,...,...,...,...
186963,76272502,36,2,31,771,8,2,29500,,93500,935.0,total
186964,76272601,36,2,31,771,3,1,25500,for household use,149600,1496.0,motorcycles/ scooters/ mopeds/ auto-rickshaws
186965,76272601,36,2,31,771,8,1,25500,,149600,1496.0,total
186966,76272602,36,2,31,771,3,1,29000,for household use,149600,1496.0,motorcycles/ scooters/ mopeds/ auto-rickshaws


In [12]:
csv_path = Path.joinpath(interim_data_path, "level9.csv")
df.to_csv(csv_path, index=False)