In [1]:
from pathlib import Path
import pandas as pd

In [2]:
dir_path = Path.cwd()
raw_data_path = Path.joinpath(dir_path, "data", "raw")
interim_data_path = Path.joinpath(dir_path, "data", "interim")

In [3]:
df = pd.read_stata(Path.joinpath(raw_data_path, "level11.dta"))
df.columns

Index(['common_id', 'State', 'Sector', 'District', 'Centre_code_Round',
       'FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'Sl_no',
       'oth_quant', 'oth_val', 'tot_prod_quant', 'tot_prod_val', 'blank',
       'NSC', 'Multiplier', 'w'],
      dtype='object')

In [4]:
df.drop(['Centre_code_Round',
       'FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'blank','NSC'], axis = 1, inplace = True)

In [5]:
var_names = [x.lower() for x in df.columns]
df.columns = var_names

In [6]:
df.columns

Index(['common_id', 'state', 'sector', 'district', 'sl_no', 'oth_quant',
       'oth_val', 'tot_prod_quant', 'tot_prod_val', 'multiplier', 'w'],
      dtype='object')

In [8]:
if df["common_id"].is_unique:
    print("Common ID is unique for Level 11")
else:
    print("Common ID not unique for Level 11")

Common ID not unique for Level 11


In [9]:
df['animal_farming_category'] = df['sl_no']

In [11]:
animal_labels = {1: 'milk (cattle) (litre)',2:'milk ( buffalo) (litre)', 3: 'milk (sheep goat, etc.) (litre)', 4: 'egg (poultry, duck, etc.) (no.)', 5: 'wool (sheep, etc.) (kg)', 6: 'fish (kg)', 7: 'livestock cattle (nos.)', 8: 'livestock buffalo (nos.)', 9:' livestock sheep, goat, etc. (nos.)', 10: 'livestock pig (nos.)', 11: 'livestock poultry, duck, etc. (nos.)', 12: 'other livestock (nos.)', 13: 'skin, hide, bones', 14: 'manure', 15: 'value of other produce (Rs.)', 16: 'total value of produce'}

In [12]:
df['animal_farming_category'] = df['animal_farming_category'].map(animal_labels)

In [13]:
df

Unnamed: 0,common_id,state,sector,district,sl_no,oth_quant,oth_val,tot_prod_quant,tot_prod_val,multiplier,w,animal_farming_category
0,72227201,1,1,5,2,,,50.0,2000,170500,1705.0,milk ( buffalo) (litre)
1,72227201,1,1,5,16,,,,2000,170500,1705.0,total value of produce
2,72227301,1,1,5,2,,,30.0,1200,403000,4030.0,milk ( buffalo) (litre)
3,72227301,1,1,5,16,,,,1200,403000,4030.0,total value of produce
4,72227303,1,1,5,2,,,25.0,1000,403000,4030.0,milk ( buffalo) (litre)
...,...,...,...,...,...,...,...,...,...,...,...,...
75409,70112501,36,1,31,11,,,8.0,1970,100500,1005.0,"livestock poultry, duck, etc. (nos.)"
75410,70112501,36,1,31,14,,,,400,100500,1005.0,manure
75411,70112501,36,1,31,16,,,,4518,100500,1005.0,total value of produce
75412,70112502,36,1,31,14,,,,450,100500,1005.0,manure


In [14]:
csv_path = Path.joinpath(interim_data_path, "level11.csv")
df.to_csv(csv_path, index=False)