In [1]:
from pathlib import Path
import pandas as pd

In [2]:
dir_path = Path.cwd()
raw_data_path = Path.joinpath(dir_path, "data", "raw")
interim_data_path = Path.joinpath(dir_path, "data", "interim")

In [3]:
df = pd.read_stata(Path.joinpath(raw_data_path, "level8.dta"))
df.columns

Index(['common_id', 'State', 'Sector', 'District', 'Schedule_ID',
       'FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'Serial_no',
       'Owned_No', 'Owned_Value_Rs', 'Blank', 'NSC', 'Multiplier', 'w'],
      dtype='object')

In [4]:
df.drop(['FSU_Serial_No', 'Round', 'Schedule', 'Sample', 'NSS_Region', 'Stratum',
       'Sub_Stratum', 'Sub_Round', 'FOD_Sub_Region', 'Second_stage_stratum_no',
       'Sample_hhld_No', 'Visit_number', 'Level', 'Filler', 'Blank','NSC'], axis = 1, inplace = True)

In [5]:
df

Unnamed: 0,common_id,State,Sector,District,Schedule_ID,Serial_no,Owned_No,Owned_Value_Rs,Multiplier,w
0,53335201,1,1,5,771,9,2,35000,31000,310.0
1,53335201,1,1,5,771,10,1,15500,31000,310.0
2,53335201,1,1,5,771,13,3,50500,31000,310.0
3,53335201,1,1,5,771,14,4,11500,31000,310.0
4,53335201,1,1,5,771,15,3,850,31000,310.0
...,...,...,...,...,...,...,...,...,...,...
162212,76985402,36,2,20,771,9,1,44000,374400,3744.0
162213,76985402,36,2,20,771,13,3,74000,374400,3744.0
162214,76985402,36,2,20,771,17,3,74000,374400,3744.0
162215,76982602,36,2,23,771,15,5,1200,950400,9504.0


In [6]:
var_names = [x.lower() for x in df.columns]
df.columns = var_names

In [7]:
df['animal_category'] = df['serial_no']

In [8]:
animal_labels = {}
for i in range(1, 18):
    if(i <= 6):
        animal_labels[i] = 'cattle exotic/cross-bred/descript/non-descript'
        if(i <= 2):
            animal_labels[i] += '- young stock'
            if(i == 1):
                animal_labels[1] += ' (male)'

            else:
                animal_labels[2] += ' (female)'
        elif((i>2) and (i<6)):
            animal_labels[i] += '- female'
            if((i==3) or (i==4)):
                animal_labels[i] += ' breeding cow'
                if(i==3):
                    animal_labels[3] += ' (milching)'
                else:
                    animal_labels[4] += ': dry/not calved even once'
            else:
                animal_labels[i] += ' other'
        else:
            animal_labels[6] += '- male cattle for work/breeding/other'
    elif((i>6) and (i<13)):
        animal_labels[i] = 'buffalo exotic/cross-bred/descript/non-descript'
        if(i <= 8):
            animal_labels[i] += '- young stock'
            if(i==7):
                animal_labels[7] += ' (male)'
            else:
                animal_labels[8] += ' (female)'
        elif((i>8) and (i<12)):
            animal_labels[i] += '- female'
            if((i==9) or (i==10)):
                animal_labels[i] += ' breeding buffalo'
                if(i==9):
                    animal_labels[9] += ' in milk'
                else:
                    animal_labels[10] += ': dry/not calved even once'
            else:
                animal_labels[i] += ' other'
        else:
            animal_labels[12] += '- male for work/breeding/other'


In [9]:
animal_labels

{1: 'cattle exotic/cross-bred/descript/non-descript- young stock (male)',
 2: 'cattle exotic/cross-bred/descript/non-descript- young stock (female)',
 3: 'cattle exotic/cross-bred/descript/non-descript- female breeding cow (milching)',
 4: 'cattle exotic/cross-bred/descript/non-descript- female breeding cow: dry/not calved even once',
 5: 'cattle exotic/cross-bred/descript/non-descript- female other',
 6: 'cattle exotic/cross-bred/descript/non-descript- male cattle for work/breeding/other',
 7: 'buffalo exotic/cross-bred/descript/non-descript- young stock (male)',
 8: 'buffalo exotic/cross-bred/descript/non-descript- young stock (female)',
 9: 'buffalo exotic/cross-bred/descript/non-descript- female breeding buffalo in milk',
 10: 'buffalo exotic/cross-bred/descript/non-descript- female breeding buffalo: dry/not calved even once',
 11: 'buffalo exotic/cross-bred/descript/non-descript- female other',
 12: 'buffalo exotic/cross-bred/descript/non-descript- male for work/breeding/other'}

In [10]:
animal_labels[13] = 'sub-total_buffalo_cattle'
animal_labels[14] = 'ovine and other mammals (sheep, goat, pig, rabbits, etc.)'
animal_labels[15] = 'poultry birds (hen, cock, chicken, duck, duckling, other poultry birds, etc.)'
animal_labels[16] = 'other including large heads (elephant, camel, horse, mule, pony, donkey, yak, mithun, etc.)'
animal_labels[17] = 'total'


In [11]:
df['aninmal_category'] = df['aninmal_category'].map(animal_labels)

KeyError: 'aninmal_category'

In [None]:
df

Unnamed: 0,common_id,schedule_id,sector,district,serial_no,owned_no,owned_value_rs,multiplier,w,aninmal_category
0,53335201,771,1,5,9,2,35000,31000,310.0,buffalo exotic/cross-bred/descript/non-descrip...
1,53335201,771,1,5,10,1,15500,31000,310.0,buffalo exotic/cross-bred/descript/non-descrip...
2,53335201,771,1,5,13,3,50500,31000,310.0,sub-total_buffalo_cattle
3,53335201,771,1,5,14,4,11500,31000,310.0,"ovine and other mammals (sheep, goat, pig, rab..."
4,53335201,771,1,5,15,3,850,31000,310.0,"poultry birds (hen, cock, chicken, duck, duckl..."
...,...,...,...,...,...,...,...,...,...,...
162212,76985402,771,2,20,9,1,44000,374400,3744.0,buffalo exotic/cross-bred/descript/non-descrip...
162213,76985402,771,2,20,13,3,74000,374400,3744.0,sub-total_buffalo_cattle
162214,76985402,771,2,20,17,3,74000,374400,3744.0,total
162215,76982602,771,2,23,15,5,1200,950400,9504.0,"poultry birds (hen, cock, chicken, duck, duckl..."


In [None]:
csv_path = Path.joinpath(interim_data_path, "level8.csv")
df.to_csv(csv_path, index=False)