## EBA/PES Nippes-Haiti: Baseline evaluation data cleaning and summary analysis

In [1]:
#Import the necessary modules

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None 

In [2]:
# Help to display very large datasets. That means dataset with a lot of columns.

pd.set_option('display.max_colwidth', 20000)
out_put_path = r'../datasets/endline/output/'

In [3]:
#Use os and sys to setup a relative path to the current working directory in order to make python modules import easier.
import os
import sys

In [4]:
sys.path.append(os.path.join(os.path.dirname('__file__'), '..', 'utils'))

In [5]:
#functions is a module from the utils package. It gathers all the utility functions to perferm aggregation on columns.
from functions import *

In [6]:
#Read the csv file and ingest the data with pandas

farmers_df = pd.read_csv("../datasets/endline/input/farmers_end.csv", encoding="utf-8")

In [7]:
farmers_df.shape

(52, 263)

In [8]:
#Create a copy of the farmers_df to work with.
f_df = farmers_df.copy()

In [9]:
assert len(f_df.columns) == 263

In [10]:
#f_df.head(2)

In [11]:
columns = f_df.columns

In [12]:
#columns

In [13]:
col_name = "Q2 – Sèks"
values = {"Gason":"Male", "Fi":"Female"}

sex_col = rename_col_values(f_df, col_name, values)

In [14]:
f_df["Sex"] = sex_col
n_f_df = f_df[["Sex"]]

In [15]:
sex_def = count_freq_simple_answer(f_df, 'Sex')

In [16]:
sex_def

Unnamed: 0,response,frequency
0,,3
1,Male,30
2,Female,19


In [17]:
sex_def.rename(columns={"response":"Sex", "frequency":"Frequencies"})

Unnamed: 0,Sex,Frequencies
0,,3
1,Male,30
2,Female,19


In [18]:
n_f_df.shape

(52, 1)

In [19]:
col_name = "Q3 - Nan ki entèval laj ou sitiye ?  "
values = {"Mwens ke 25 lane":"Less than 25", 
          "Ant 25 ak 35 lane":"From 25 to 35",
          "Ant 35 ak 55 lane":"From 35 to 55",
          "Plis ke 55 lane":"Greater than 55",
         }

ages_col = rename_col_values(f_df, col_name, values)
col_name ="Age interval"
n_f_df[col_name] = pd.Series(ages_col)

ages_df = count_freq_simple_answer(n_f_df, col_name)
ages_df

Unnamed: 0,response,frequency
0,,3
1,Greater than 55,10
2,From 35 to 55,24
3,From 25 to 35,14
4,Less than 25,1


In [20]:
col_name = "Q4 - Ki nivo edikasyon w"
values = {"Okenn Nivo":"None", 
          "Primè":"Primary school",
          "Segondè":"High school",
          "Inivèsitè":"University",
         }

education_col = rename_col_values(f_df, col_name, values)
col_name ="Education level"
n_f_df[col_name] = pd.Series(education_col)

education_df = count_freq_simple_answer(n_f_df, col_name)
education_df

Unnamed: 0,response,frequency
0,,3
1,Primary school,20
2,High school,23
3,,4
4,University,2


In [21]:
col_name = "Q5 - Ou gen telefòn Digicel ?"
values = {"Wi":"Yes", 
          "Non":"No"
         }
phone_access_col =  rename_col_values(f_df, col_name, values)

col_name ="Access to phone"
n_f_df[col_name] = pd.Series(phone_access_col)

phone_access_df = count_freq_simple_answer(n_f_df, col_name)
phone_access_df

Unnamed: 0,response,frequency
0,,2
1,Yes,48
2,No,2


In [22]:
col_name ="Q6 - Ki prensipal aktivite ou fe pou w viv? "
values = {"Agrikilti":"Agriculture", 
          "Elvaj":"Animal breeding",
          "Chabon ak bwa mang":"Charcoal with mangrove Woods",
          "Chabon ak lòt bwa":"Charcoal with woods of other trees",
          "Lòt (Presize)":"Other",
         }

income_source =  rename_col_with_multiple_values(f_df, col_name, values)
eng_col_name = "Income source"

n_f_df[eng_col_name] = pd.Series(income_source)

other_activities = []

for act in f_df["Presizyon 1 Lòt (Q6)"]:
    if type(act) != str:
        other_activities.append(None)
        continue
    if act == 'Mason' or act == 'Mason ,mèt grefè' or act == 'Bòs mason':
        other_activities.append("Mason")
    elif act == 'Biznis' or act == 'Komès' or act == 'Ti  komès' or act == 'Vann chabon' or act == 'Komès chabon' or act == 'Machann chabon' or act == 'Komėsant' or act =='Komès pwovizyon alimantè':    
        other_activities.append("Business")
    elif act == 'Ansenyan':
        other_activities.append("Teacher")
    elif act == 'Boss soudi' or act == 'Fewoni':
        other_activities.append("Black-smith") 
    elif act == 'Ebenis ' or act == 'Chapant  ak  ebenis':
        other_activities.append("Carpenter") 
    elif act == 'Pechè' or act == 'Peche pwason' or act == 'Pech':
        other_activities.append("Fishing")
    else:
        other_activities.append(None)
        
new_income_col = []

for i in range(len(other_activities)):
    if type(other_activities[i]) != str:
        new_income_col.append(n_f_df[eng_col_name][i])
        continue
    new_income_col.append(n_f_df[eng_col_name][i] + ' ' + other_activities[i]) 

n_f_df[eng_col_name] = pd.Series(new_income_col)
     
patterns = ["Agriculture",
            "Animal breeding", 
            "Charcoal with mangrove Woods",
            "Charcoal with woods of other trees",
            "Mason",
            "Teacher",
            "Business",
            "Black-smith",
            "Carpenter",
            "Fishing"]

income_source_df = count_freq_multiple_answer(n_f_df, eng_col_name, patterns)
income_source_df

Unnamed: 0,response,frequency
0,Agriculture,46
1,Animal breeding,22
2,Business,11
3,Charcoal with woods of other trees,8
4,Mason,2
5,Charcoal with mangrove Woods,1


In [23]:
col_name = "Q7 - Kisa mangwòv Bondeau (oubyen mang) a ye selon oumenm ?"
values = {"Mwen pa konnen – Pase a Q13":"Don't know", 
          "Mwen pa konnen – Pase a Q12":"Don't know",
          "Se yon pye bwa ki pwodwi mango":"A tree that produces mangoes",
          "Se yon rak bwa ki grandi bo lanmè":"A tree that grows in forest by the sea",
         }

environment_knowledge1 = rename_col_values(f_df, col_name, values)

col_name ="What are mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge1)

environment_knowledge1_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge1_df

Unnamed: 0,response,frequency
0,,10
1,A tree that grows in forest by the sea,30
2,Don't know,12


In [24]:
col_name = "Q8 - Eske ou konnen lwa oubyen règleman ki pale de mangwov (mang) ?"
values = {"Wi":"Yes", 
          "Non – Si non, pase a Q9":"No",
         }

environment_knowledge2 = rename_col_values(f_df, col_name, values)
col_name = "Do you know the laws about mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge2)

environment_knowledge2_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge2_df

Unnamed: 0,response,frequency
0,,12
1,No,25
2,Yes,15


In [25]:
col_name = "Q8.1 - Si wi, kiyès ?"
values = {"Kòd riral ":"Don't know", 
          "Dekrè sou anviwonman (2005) ":"Environment Decree (2005)",
          "Dekrè sou mangwòv (2013)":"Mangroves Decree (2013)",
          "Lòt (Presize) ":"Other",
         }

environment_knowledge3 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What text of law about the environment do you know?"
n_f_df[col_name] = pd.Series(environment_knowledge3)

patterns = ["Don't know",
            "Environment Decree (2005)", 
            "Mangroves Decree (2013)", 
            "Other"]

environment_knowledge3_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge3_df

Unnamed: 0,response,frequency
0,Environment Decree (2005),2
1,Other,2
2,Mangroves Decree (2013),8


In [26]:
col_name = "Q8.1.1 - Eskew konn kisa lwa sa yo di sou zafe pwoteksyon mangwov ?"
values = {"Wi":"Yes", 
          "Non – Si non pase a Q9":"No",
         }

environment_knowledge4 = rename_col_values(f_df, col_name, values)
col_name = "Do you know what the laws say about mangroves protection?"
n_f_df[col_name] = pd.Series(environment_knowledge4)

environment_knowledge4_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge4_df

Unnamed: 0,response,frequency
0,,31
1,Yes,12
2,No,9


In [27]:
col_name = "Q8.1.1.1- ki sa lwa yo di ? "
values = {"Pa fè kay nan mangwòv":"Don't know", 
          "Pa koute mangwòv yo":"Don't cut them",
          "Pa jete fatra/dechè plastik/vye materyo konstriksyon nan mangwòv yo ":"Do not throw garbage in the mangroves",
          "Lòt (Presize)":"Other",
         }
environment_knowledge5 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What does the law about the environment and mangrove protection say?"
n_f_df[col_name] = pd.Series(environment_knowledge5)

patterns = ["Don't know",
            "Don't cut them", 
            "Do not throw garbage in the mangroves", 
            "Other"]

environment_knowledge5_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge5_df

Unnamed: 0,response,frequency
0,Don't cut them,9
1,Don't know,4
2,Do not throw garbage in the mangroves,9


In [28]:
col_name = "Q9 - Ki enpotans ou konnen mang lan genyen ?"
values = {"Li kontribiye a bay sekirite alimantè (gras ak pwodwi pèch li ofri yo)":"It contributes to food security (thank to fishing products)", 
          "Pwoteksyon konn inondasyon ":"Prevent flooding",
          "Pwoteksyon kot yo ak resif yo":"Protect the reefs",
          "Se espas kote anpil zwazo viv ak repwodwi":"Birds habitat",
          "Refij pou fwi lame (kòm pwason, krab ak sirik) repwodwi oubyen grandi ":"Growing environment for crustaceans",
          "Lòt (Presize)":"Other",
          "Mwen pa konnen":"Don't know"
         }
environment_knowledge6 =  rename_col_with_multiple_values(f_df, col_name, values)

n_f_df["What is the importance of the mangroves?"] = pd.Series(environment_knowledge6)

patterns = ["It contributes to food security (thank to fishing products)",
            "Prevent flooding", 
            "Protect the reefs", 
            "Birds habitat",
            "Growing environment for crustaceans",
            "Don't know",
            "Other"]

col_name = "What is the importance of the mangroves?"

environment_knowledge6_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge6_df

Unnamed: 0,response,frequency
0,Prevent flooding,19
1,Protect the reefs,10
2,Birds habitat,10
3,Other,3
4,Growing environment for crustaceans,17
5,Don't know,9


In [29]:
col_name = "Q10 - Daprè ou, èske mang lan an danje ?"
values = {"Wi":"Yes", 
          "Non – Si non, pase a Q11":"No",
         }

environment_knowledge7 = rename_col_values(f_df, col_name, values)
n_f_df["Are mangroves in danger?"] = pd.Series(environment_knowledge7)

environment_knowledge7_df = count_freq_simple_answer(n_f_df, "Are mangroves in danger?")
environment_knowledge7_df

Unnamed: 0,response,frequency
0,,13
1,Yes,27
2,No,12


In [30]:
col_name = "Q10.1 Poukisa selon ou mang lan an danje ? "
values = {"Leta a pa prezan/pa gen kontwòl nan jan moun ap eksplwate mangwòv yo":"Absence of government/no control of mangroves exploitation", 
          "Kominote ap mal eksplwate mangwòv yo (Yo koupe yo twòp pou fe chabon ak kay)":"Community over-exploitation of mangroves",
          "Mangwòv yo ap fin konble ak tè ki soti nan tèt mòn yo akoz ewozyon ":"Continuous deposition of sediments in mangroves",
          "Nivo lamè a ap bese lakoz gen plantasyon mangwòv ki ap fin seche":"Decrease in the sea level",
          "Moun yo ap konstwi kay nan espas kote ki ta dwe gen mang selman, ":"Urbanization of mangrove areas",
          "Yo jete anpil fatra (dechè plastik, vye materyo konstriksyon) nan espas mangwòv yo ":"Mangroves become a dumping ground for garbage",
          "Lòt (Presize)":"Other",
         }

environment_knowledge8 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "Why do you believe the mangroves are in danger?"
n_f_df[col_name] = pd.Series(environment_knowledge8)

patterns = ["Absence of government/no control of mangroves exploitation",
            "Community over-exploitation of mangroves", 
            "Continuous deposition of sediments in mangroves", 
            "Decrease in the sea level",
            "Urbanization of mangrove areas",
            "Mangroves become a dumping ground for garbage",
            "Other"]

environment_knowledge8_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge8_df

Unnamed: 0,response,frequency
0,Absence of government/no control of mangroves exploitation,11
1,Community over-exploitation of mangroves,22
2,Continuous deposition of sediments in mangroves,10
3,Decrease in the sea level,8
4,Urbanization of mangrove areas,6
5,Mangroves become a dumping ground for garbage,3
6,Other,1


In [31]:
col_name = "Q11 - Eske ou konnen ki aksyon oubyen mezi ki ka pwoteje mang lan ?"
values = {"Wi":"Yes", 
          "Non – SI non, pase a Q12":"No",
         }

environment_knowledge9 = rename_col_values(f_df, col_name, values)
col_name = "Do you know what can be done to protect the mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge9)

environment_knowledge9_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge9_df

Unnamed: 0,response,frequency
0,,14
1,No,11
2,Yes,27


In [32]:
col_name = "Q11.1 Kisa ki kap fèt pou pwoteje mangwòv yo selon ou ? "
values = {"Elabore yon plan jesyon ak amenajman kominotè ":"Elaborate a plan with community management", 
          "Met sou pye yon komite jesyon":"Organize a management committee",
          "Itilize espas mangwòv yo pou bay moun yo lot resous tankou elvaj myèl":"Use mangroves areas for bee keeping",
          "Plante plis mang ":"Plant more mangroves",
          "Valorize espas ak resous li yo atravè aktivite touris":"Valorize mangroves forest with tourism",
          "Edikasyon ak sansibilizsyon popilasyon an sou jan yo dwe eksplwate l":"Educate and sensitize the population on how to exploit mangroves",
          "Mete an aplikasyon lwa ak reglemantasyon sou mangwòv yo ":"Enforce law to protect the mangroves",
          "Konsèvasyon sòl nan mòn yo ak rebwazman nan basen vesan an ":"Soil conservation in the mountains",
          "Itilize materyèl pèch ki pi dirab":"use of fishing tools that are more durable",
          "Lòt (Presize)":"Other",
         }

environment_knowledge10 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What can be done to protect the mangroves according to you?"
n_f_df[col_name] = pd.Series(environment_knowledge10)

patterns = ["Elaborate a plan with community management",
            "Organize a management committee", 
            "Use mangroves areas for bee keeping", 
            "Plant more mangroves",
            "Valorize mangroves forest with tourism",
            "Educate and sensitize the population on how to exploit mangroves",
            "Enforce law to protect the mangroves",
            "Soil conservation in the mountains",
            "use of fishing tools that are more durable",
            "Other"]



environment_knowledge10_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge10_df

Unnamed: 0,response,frequency
0,Elaborate a plan with community management,20
1,Organize a management committee,17
2,Use mangroves areas for bee keeping,11
3,Plant more mangroves,17
4,Enforce law to protect the mangroves,11
5,Valorize mangroves forest with tourism,10
6,Other,7
7,Educate and sensitize the population on how to exploit mangroves,11
8,Soil conservation in the mountains,7


In [33]:
col_name = "Q13- Ki enpòtans ou konnen yon pye bwa genyen ?"
values = {"Kenbe tè yo kont ewozyon ":"Protect soil agains erosion", 
          "Pemèt dlo rantre byen nan tè a":"Facilitate water filtration to the ground",
          "Bay manje pou moun ak bet":"Provide food for human and animals",
          "Medikaman natirèl":"Natural medicines",
          "Kontwole tanperati a":"Controle the earth temperature",
          "Bay oksijèn pou moun respire byen ":"Provide oxygen",
          "Mwen pa konnen":"Don't know",
          "Pwoteksyon kont inondasyon":"Protection against flooding",
          "Pwoteksyon kont gwo van (briz van) ":"Wind breaker",
          "Lòt (Presize)":"Other",
         }

environment_knowledge11 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What is the importance of trees according to you?"
n_f_df[col_name] = pd.Series(environment_knowledge11)

patterns = ["Protect soil agains erosion",
            "Facilitate water filtration to the ground", 
            "Provide food for human and animals", 
            "Natural medicines",
            "Controle the earth temperature",
            "Provide oxygen",
            "Don't know",
            "Protection against flooding",
            "Wind breaker",
            "Other"]

environment_knowledge11_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge11_df

Unnamed: 0,response,frequency
0,Protect soil agains erosion,29
1,Provide oxygen,25
2,Facilitate water filtration to the ground,10
3,Provide food for human and animals,23
4,Natural medicines,19
5,Controle the earth temperature,11
6,Protection against flooding,18
7,Wind breaker,14
8,Other,5


In [34]:
col_name = "Q15- Eske ou konn koman pou w fe pepinye pou pwodwi pye bwa ?"

values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge12 = rename_col_values(f_df, col_name, values)
col_name = "Do you have skills in tree nursery?"

n_f_df[col_name] = pd.Series(environment_knowledge12)

environment_knowledge12_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge12_df

Unnamed: 0,response,frequency
0,,2
1,Yes,31
2,No,19


In [35]:
col_name = "Q16 - Eske ou prè pou w patisipe nan fòmasyon pou w konn kijan pou w pwoteje chan mangwòv yo ?"

values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge13 = rename_col_values(f_df, col_name, values)
col_name = "Are you willing to be trained to protect the mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge13)

environment_knowledge13_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge13_df

Unnamed: 0,response,frequency
0,,10
1,Yes,41
2,No,1


In [36]:
col_name = "Q17 - Eske w ou ta renmen pran fòmasyon pou konnen kijan pou w fe pepinyè pou pwodwi pye bwa ?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge14 = rename_col_values(f_df, col_name, values)
col_name = "Are you willing to be trained in tree nursery?"
n_f_df[col_name] = pd.Series(environment_knowledge14)

environment_knowledge14_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge14_df

Unnamed: 0,response,frequency
0,,2
1,Yes,50


In [37]:
col_name = "Q18 – Eske ou pre pou w chanje fason w’ap fè jaden pou w pwoteje mangwòv yo ?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge15 = rename_col_values(f_df, col_name, values)
col_name = "Are you willing to chnage your agricultural practices to protect the mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge15)

environment_knowledge15_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge15_df

Unnamed: 0,response,frequency
0,,10
1,Yes,42


In [38]:
col_name = "Q19- Eske w prè pou w adopte lot plant (tankou morenga) pou w ka pwoteje tè yo kont ewozyon?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge16 = rename_col_values(f_df, col_name, values)
col_name = "Are you willing to adopt new culture like Moringa to protect the soil against erosion?"
n_f_df[col_name] = pd.Series(environment_knowledge16)

environment_knowledge16_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge16_df

Unnamed: 0,response,frequency
0,,1
1,Yes,51


In [39]:
col_name = "Q20 – Pandan 12 mwa ki sot pase la yo kiles pami aktivite sa yo ou reyalize nan mangwòv la ?"
values = {"Sit pèch pou mete bato":"Parking for the boats", 
          "Koupe bwa vèt, ":"Cut out living trees",
          "Koupe bwa sèch, ":"Cut dead trees",
          "Elvaj myèl, ":"Beekeeping",
          "Peche pwason ak lot bet lanmè ":"Fishing",
          "Lòt (Presize)":"Other",
         }

environment_knowledge17 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "For the pase 12 months, what activities did you do in the mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge17)

patterns = ["Parking for the boats",
            "Cut out living trees",
            'Cut dead trees',
            'Beekeeping',
            'Fishing',
            "Other"]

environment_knowledge17_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge17_df

Unnamed: 0,response,frequency
0,Cut out living trees,3
1,Cut dead trees,6
2,Fishing,6
3,Other,12
4,Beekeeping,1


In [40]:
col_name = "Q21 - Kisa ou konn fe ak pye bwa ki nan jadenw?"
values = {"Rekolte fwi ak fèy pou manje oubyen fè remèd, ":"Harvest their fruits and leaves as food or medicines", 
          "Rekòlte fwi oubyen fèy pou bay bèt manje, ":"Harvest their fruits and leaves to feed animals",
          "Pran bwa sèch pou fè manje":"Chop dead branches for cooking",
          "Koupe bwa pou fè chabon":"Chop woods to make charcoal",
          "Koupe bwa pou konstriksyon":"Chop woods for construction",
          "Lòt (presize)":"Other",
         }

environment_knowledge18 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What do you use to do with trees that are in your garden?"
n_f_df[col_name] = pd.Series(environment_knowledge18)

patterns = ["Harvest their fruits and leaves as food or medicines",
            "Harvest their fruits and leaves to feed animals",
            'Chop dead branches for cooking',
            'Chop woods to make charcoal',
            'Chop woods for construction',
            "Other"]

environment_knowledge18_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge18_df

Unnamed: 0,response,frequency
0,Harvest their fruits and leaves as food or medicines,37
1,Harvest their fruits and leaves to feed animals,22
2,Chop dead branches for cooking,22
3,Chop woods to make charcoal,16
4,Chop woods for construction,11
5,Other,6


In [41]:
col_name = "Q12 – Ki kilti ou pi byen konn pratike ?"
values = {"Mayi":"Corn", 
          "Pitimi":"Sorgho",
          "Pistach":"Peanut",
          "Pwa Kongo":"Pigeon pea",
          "Pwa (pwa nwa ak lot varyete)":"Bean (Haricot)",
          "Vetivè":"Vetiver",
          "Yanm":"Yam",
          "Bannann":"Banana tree",
          "Sitwon":"Lemon",
          "Lòt (Presize)":"Other",
         }

environment_knowledge19 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What culture you practice the best?"
n_f_df[col_name] = pd.Series(environment_knowledge19)


precis_l = [ i.strip().lower() if type(i) == str else None for i in f_df["Presizyon 1 Lòt (Q12)"]]

precis = []

for e in precis_l:
    if type(e) != str:
        precis.append(None)
        continue
    elif e == 'melon':
        precis.append('Melon')
    elif e == 'pèch' or e =='pech':
        precis.append("Peach")
    elif e == 'pech ,melon':
        precis.append("Peach melon")
    elif e == 'manyòk ak melon':
        precis.append("Manioc Melon")
    elif e == 'joumou':
        precis.append("Giraumon")
    elif e == 'manioc' or e == 'manyok' or e == 'melon, manyòk' or e == 'mannyok' or e == 'manyòk ak  papay':
        precis.append("Manioc")
    elif e == 'pwa kongo':
        precis.append("Pigeon pea")
    elif e == 'melon , patat,papay':
        precis.append("Melon Sweet potato Papaya")
    else: 
        precis.append(None)
        
new_precis_col = []

for i in range(len(precis)):
    if type(precis[i]) != str:
        new_precis_col.append(n_f_df[col_name][i])
        continue
    new_precis_col.append(n_f_df[col_name][i] + ' ' + precis[i]) 

n_f_df[col_name] = pd.Series(new_precis_col)
  
patterns = ["Corn",
            "Sorgho",
            'Melon',
            'Peach',
            'Giraumon',
            "Peanut", 
            'Manioc',
            "Pigeon pea",
            "Sweet potato",
            "Bean (Haricot)",
            "Papaya",
            "Vetiver",
            "Yam",
            "Banana tree",
            "Lemon",
            "Other"]

environment_knowledge19_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge19_df

Unnamed: 0,response,frequency
0,Corn,42
1,Sorgho,18
2,Pigeon pea,41
3,Bean (Haricot),28
4,Yam,22
5,Banana tree,25
6,Other,16
7,Vetiver,1
8,Lemon,2
9,Melon,2


In [42]:
col_name = "Q14- Pratik agrikòl – Eske ou konn ?"
values = {"Plante pistach nan flan mòn yo":"Grow peanuts on the mountainsides", 
          "Plante pistach, vetivè, manyòk, yanm ak lot kilti ki ka rache le wap rekolte yo oubyen ou fouye te a le wap rekolte yo nan flan mòn yo":"Cultivate seasonal crops on the mountainsides",
          "Laboure te ki nan flan mòn yo":"Plowing the soil",
          "Fe misek pou pwoteje tè a kont erozyon":"Soil conservation with dry walls",
          "Fe ranp vivan pou pwoteje te yo kont ewozyon":"Soil conservation with living ramp",
          "Fe kanal koutou pou pemet plis dlo rantre nan te a ":"Irrigation canals",
          "Plante pye bwa nan jaden yo":"Plant trees in the gardens",
          "Plante banbou nan flan mon yo, bo rivye pou kenbe te yo":"Plant bamboos on the mountainsides",
          "Koupe pye bwa nan jaden yo":"Cut off trees in the garden",
          "Brile te a ak raje ki ladanl":"Burn the soil with the weeds",
          "Mete fimye nan plant yo":"Put organic fertilizer in the soil",
          "Mare bet nan jaden yo pou anrichi te a":"Breed animals after the crops to fertilize the soil",
          "Fe lot pratik nan jaden ou yo ":"Other",
         }

environment_knowledge20 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What agricultural practices do you know?"
n_f_df[col_name] = pd.Series(environment_knowledge20)

patterns = ["Grow peanuts on the mountainsides",
            "Cultivate seasonal crops on the mountainsides",
            'Plowing the soil',
            'Soil conservation with dry walls',
            'Soil conservation with living ramp',
            "Irrigation canals", 
            'Plant trees in the gardens',
            "Plant bamboos on the mountainsides",
            "Cut off trees in the garden",
            "Burn the soil with the weeds",
            "Put organic fertilizer in the soil",
            "Breed animals after the crops to fertilize the soil",
            "Other"]

environment_knowledge20_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge20_df

Unnamed: 0,response,frequency
0,Soil conservation with dry walls,19
1,Grow peanuts on the mountainsides,3
2,Plowing the soil,9
3,Plant trees in the gardens,21
4,Cut off trees in the garden,12
5,Burn the soil with the weeds,13
6,Put organic fertilizer in the soil,26
7,Breed animals after the crops to fertilize the soil,22
8,Plant bamboos on the mountainsides,1
9,Other,2


In [43]:
col_name = "Q20.3 – Nan ki peryòd ou konn koupe mang yo ?"
values = {"Janvye":"January", 
          "Fevriye":"February",
          "Mas":"March",
          "Avril":"April",
          "Me":"May",
          "Jen":"June",
          "Jiyè":"July",
          "Out":"August",
          "Septanm":"September",
          "Novanm":"November",
          'Evenman (Presize)':'Other'
         }

environment_knowledge20 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "When do you use to cut out the mangroves?"
n_f_df[col_name] = pd.Series(environment_knowledge20)

patterns = ["January",
            "February",
            'March',
            'April',
            'May',
            'June',
            'July',
            'August',
            'September',
            'October',
            'November',
            'December',
            "Other"]

environment_knowledge20_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge20_df

Unnamed: 0,response,frequency
0,April,1
1,June,1
2,Other,3
3,January,2
4,May,3
5,July,3
6,February,1
7,March,1
8,August,3


In [44]:
col_name = "Q22- Eske ou kon plante pye bwa nan jaden w deja ?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge21 = rename_col_values(f_df, col_name, values)
col_name = "Do you use to plant trees in your gargen?"
n_f_df[col_name] = pd.Series(environment_knowledge21)

environment_knowledge21_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge21_df

Unnamed: 0,response,frequency
0,,1
1,Yes,51


In [45]:
col_name = "Q23 - Eske ou kon wè yon jaden moriga/benzoliv deja ? "
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge22 = rename_col_values(f_df, col_name, values)
col_name = "Have you already seen Moringa garden?"
n_f_df[col_name] = pd.Series(environment_knowledge22)

environment_knowledge22_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge22_df

Unnamed: 0,response,frequency
0,,1
1,Yes,50
2,No,1


In [46]:
col_name = "Q24 - Kisa ou konnen yo fè ak fèy, grenn oubyen flè moringa/benzoliv ?  "
values = {"Mwen pa konnen":"Don't know", 
          "Fèy pou moun manje":"Human food (their leaves)",
          "Fèy pou bèt manje  ":"Animal food",
          "Fèy pou fè poud  ":"Transform it to powder",
          "Grenn pou fè lwil":"Transform the fruits to oil",
          "Flè, grenn konn itilize kòm remèd (te) ":"Use them as medicine",
          "Lòt (Presize)":"Other"
         }

environment_knowledge23 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "What can one do with the leaves or the fruit of Moringa?"
n_f_df[col_name] = pd.Series(environment_knowledge23)

patterns = ["Don't know",
            "Human food (their leaves)",
            'Animal food',
            'Transform it to powder',
            'Transform the fruits to oil',
            'Use them as medicine',
            'Other',
]

environment_knowledge23_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge23_df

Unnamed: 0,response,frequency
0,Human food (their leaves),43
1,Transform the fruits to oil,17
2,Transform it to powder,16
3,Other,3
4,Animal food,10
5,Use them as medicine,14
6,Don't know,1


In [47]:
col_name = "Q25 - Ou konn plante moringa/benzoliv ? "
values = {"Wi – Si wi pase a Q25.1 ":"Yes", 
          "Non – Si non, pase a Q25.2 ":"No",
         }

environment_knowledge24 = rename_col_values(f_df, col_name, values)
col_name = "Do you cultivate Moringa?"
n_f_df[col_name] = pd.Series(environment_knowledge24)

environment_knowledge24_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge24_df

Unnamed: 0,response,frequency
0,,1
1,Yes,44
2,No,7


In [48]:
col_name = "Q25.1 - Pou fè kisa ou plante moringa/benzoliv ?"
values = {"Kloti":"Fence", 
          "Manje pou bèt":"Animal food",
          "Aliman kòm legim":"Food/Vegetable",
          "Fèy, grenn ak flè pou vann":"Sale the leaves and fruits",
          "Lòt":"Other"
         }

environment_knowledge25 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "Why do you cultivate Moringa?"
n_f_df[col_name] = pd.Series(environment_knowledge25)

patterns = ["Fence",
            "Animal food",
            'Food/Vegetable',
            'Sale the leaves and fruits',
            'Other',
           ]

environment_knowledge25_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge25_df

Unnamed: 0,response,frequency
0,Food/Vegetable,44
1,Sale the leaves and fruits,11
2,Animal food,10
3,Other,6
4,Fence,4


In [49]:
col_name = "Q25.2 – Eske w pre pou w plante morenga alavni?"
values = {"Wi – Si wi pase a Q25.2.1 ":"Yes", 
          "Non, si non pase a konklizyon":"No",
         }

environment_knowledge26 = rename_col_values(f_df, col_name, values)
col_name = "Are you willing to cultivate Moringa in the future?"
n_f_df[col_name] = pd.Series(environment_knowledge26)

environment_knowledge26_df = count_freq_simple_answer(n_f_df, col_name)
environment_knowledge26_df

Unnamed: 0,response,frequency
0,,1
1,Yes,50
2,No,1


In [50]:
col_name = "Q25.2.1 Ki kantite espas wap dispoze pou plante moringa ?"
values = {"Yon kawo tè":"1 ha", 
          "Plis ke yon kawo tè":"More than 1 ha",
          "Yon ka tè (1/4)":"0.25 ha",
          "Mwens ke yon ka tè":"Less than 0.25 ha",
          "M poko gen tè pou sa":"Don't have farm",
          "M poko ka deside ":"No decision yet",
         }

environment_knowledge27 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "How much space in your farm are you willing to use to cultivate Moringa?"
n_f_df[col_name] = pd.Series(environment_knowledge27)

patterns = ["1 ha",
            "More than 1 ha",
            '0.25 ha',
            'Less than 0.25 ha',
            "Don't have farm",           
            'No decision yet',
           ]

environment_knowledge27_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge27_df

Unnamed: 0,response,frequency
0,1 ha,23
1,More than 1 ha,14
2,0.25 ha,21
3,Less than 0.25 ha,10
4,Don't have farm,2
5,No decision yet,3


In [51]:
col_name = "Q25.2.2 Nan ki bitasyon li ye ?"
values = {"Bondeau (4e PR)":"4th Section", 
          "Bezin 4e PR)":"4th Section",
          "Bwa pati (4e PR) ":"4th Section",
          "Nan jenn (4e PR) ":"4th Section",
          "Coray (4e PR) ":"4th Section",
          "Nan kokoye (4e PR) ":"4th Section",
          "Djaka (4e PR)":"4th Section",
          "Kafoudan (2e P)":"2nd Section",
          "Lòt":"Other"
         }

environment_knowledge28 =  rename_col_with_multiple_values(f_df, col_name, values)

col_name = "Where would you cultivate Moringa?"
n_f_df[col_name] = pd.Series(environment_knowledge28)

patterns = ["4th Section",
            "2nd Section",
            "Other"
           ]

environment_knowledge28_df = count_freq_multiple_answer(n_f_df, col_name, patterns)
environment_knowledge28_df

Unnamed: 0,response,frequency
0,Other,26
1,4th Section,17
2,2nd Section,4


In [52]:
n_f_df.to_csv (out_put_path + 'farmers_clean_end.csv', index = False, header=True)

In [53]:
col_name = "Are you willing to adopt new culture like Moringa to protect the soil against erosion?"
n_total3 = n_f_df[[col_name]].dropna()[col_name].count()
n_total3

51