In [1]:
from pathlib import Path
import pandas as pd

In [2]:
dir_path = Path.cwd()
raw_data_path = Path.joinpath(dir_path, "data", "raw")
interim_data_path = Path.joinpath(dir_path, "data", "interim")

In [3]:
df = pd.read_stata(Path.joinpath(raw_data_path, "level5.dta"))

In [11]:
df.drop(['Centre_code_Round',
       'FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'blank', 'NSC'], axis = 1, inplace = True)

In [12]:
df.columns

Index(['common_id', 'State', 'Sector', 'District', 'operated_ind_jointly',
       'type_of_holding', 'use_of_the_holding', 'number_of_percels',
       'number_crops_harvested', 'Multiplier', 'w'],
      dtype='object')

In [13]:
var_names = [x.lower() for x in df.columns]
df.columns = var_names

In [5]:
if df["common_id"].is_unique:
    print("Common ID is unique for Level 5")
else:
    print("Common ID not unique for Level 5")

Common ID is unique for Level 5


In [24]:
ind_joint_labels = {1: 'individually', 2: 'jointly'}
holding_type_labels = {1: 'entirely owned', 2: 'entirely leased', 3: 'both owned and leased-in', 4: 'entirely otherwise posessed'}
holding_use_labels = {1 :'only for growing of crops: on land used for shifting /jhum cultivation', 2 : 'only for growing of crops:on land other than the land used for shifting /jhum cultivation', 3: ' only for farming of animals', 4: 'both for crop growing and animal farming', 5: 'other agricultural uses'}

In [18]:
df.columns

Index(['common_id', 'state', 'sector', 'district', 'operated_ind_jointly',
       'type_of_holding', 'use_of_the_holding', 'number_of_percels',
       'number_crops_harvested', 'multiplier', 'w'],
      dtype='object')

In [19]:
df['operated_ind_jointly'] = df['operated_ind_jointly'].map(ind_joint_labels)

In [23]:
df['type_of_holding'] = df['type_of_holding'].map(holding_type_labels)

In [25]:
df['use_of_the_holding'] = df['use_of_the_holding'].map(holding_use_labels)

In [26]:
df

Unnamed: 0,common_id,state,sector,district,operated_ind_jointly,type_of_holding,use_of_the_holding,number_of_percels,number_crops_harvested,multiplier,w
0,72227101,1,1,5,individually,entirely owned,only for growing of crops:on land other than t...,1.0,1.0,139500,1395.0
1,72227102,1,1,5,individually,entirely owned,only for growing of crops:on land other than t...,1.0,1.0,139500,1395.0
2,72227201,1,1,5,individually,entirely owned,only for growing of crops:on land other than t...,1.0,1.0,170500,1705.0
3,72227202,1,1,5,individually,entirely owned,only for growing of crops:on land other than t...,1.0,1.0,170500,1705.0
4,72227301,1,1,5,individually,entirely owned,only for growing of crops:on land other than t...,2.0,2.0,403000,4030.0
...,...,...,...,...,...,...,...,...,...,...,...
46914,70112302,36,1,31,individually,both owned and leased-in,only for growing of crops:on land other than t...,2.0,2.0,335000,3350.0
46915,70112401,36,1,31,individually,entirely owned,both for crop growing and animal farming,3.0,2.0,83750,838.0
46916,70112402,36,1,31,individually,entirely owned,only for farming of animals,1.0,,83750,838.0
46917,70112501,36,1,31,jointly,both owned and leased-in,both for crop growing and animal farming,6.0,4.0,100500,1005.0


In [4]:
csv_path = Path.joinpath(interim_data_path, "level5.csv")
df.to_csv(csv_path, index=False)