In [1]:
#Import the necessary modules

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pd.options.mode.chained_assignment = None

In [2]:
# Help to display very large datasets. That means dataset with a lot of columns.

pd.set_option('display.max_colwidth', 20000)
out_put_path = r'../datasets/endline/output/'

In [3]:
#Use os and sys to setup a relative path to the current working directory in order to make python modules import easier.
import os
import sys

In [4]:
sys.path.append(os.path.join(os.path.dirname('__file__'), '..', 'utils'))

In [5]:
#functions is a module from the utils package. It gathers all the utility functions to perferm aggregation on columns.
from functions import *

In [6]:
#Read the csv file and ingest the data with pandas

students_df = pd.read_csv("../datasets/endline/input/students_end.csv", encoding="utf-8")

In [7]:
students_df.shape

(52, 209)

In [8]:
#Create a copy of the farmers_df to work with.
s_df = students_df.copy()

In [9]:
s_df.columns

Index(['start', 'end', 'Anketè', 'Dat', 'Komin (Pa/Pe)',
       'Seksyon Kominal (4PR /2PA)', 'Lokalite / Bitasyon',
       'Q0 - Eske w’ dakò patisipe nan anket sa a ak nou?',
       'Q1 - Non ak Siyati moun k’ap reponn nan', 'Q2 – Sèks',
       ...
       'Presizyon 3 Lòt(Q28)', '_id', '_uuid', '_submission_time',
       '_validation_status', '_notes', '_status', '_submitted_by', '_tags',
       '_index'],
      dtype='object', length=209)

In [10]:
col_name = "Q2 – Sèks"
values = {"Gason":"Male", "Fi":"Female"}

sex_col = rename_col_values(s_df, col_name, values)
s_df["Sex"] = sex_col

In [11]:
n_s_df = s_df[["Sex"]]

In [12]:
sex_def = count_freq_simple_answer(n_s_df, 'Sex')

In [13]:
sex_def

Unnamed: 0,response,frequency
0,Female,37
1,Male,15


In [14]:
col_name = "Q3 - Nan ki entèval laj ou sitiye ?  "
values = {"Mwens ke 12 lane":"Less than 12", 
          "Ant 12 ak 15 lane":"From 12 to 15",
          "Ant 15 ak 20 lane":"From 15 to 20",
          "Plis ke 20 lane":"Greater than 20",
         }
ages_col =  rename_col_values(students_df, col_name, values)

In [15]:
n_s_df["Age interval"] = pd.Series(ages_col)

In [16]:
n_s_df.head()

Unnamed: 0,Sex,Age interval
0,Female,From 15 to 20
1,Female,From 15 to 20
2,Female,From 15 to 20
3,Female,From 15 to 20
4,Female,From 12 to 15


In [17]:
n_s_df.isna().any()

Sex             False
Age interval    False
dtype: bool

In [18]:
ages_def = count_freq_simple_answer(n_s_df, 'Age interval')

In [19]:
ages_def

Unnamed: 0,response,frequency
0,From 15 to 20,16
1,From 12 to 15,23
2,Less than 12,12
3,Greater than 20,1


In [20]:


col_name = "Q4 - Ki nivo edikasyon w"
values = {"Okenn Nivo":"None", 
          "Lekol primè":"Primary school",
          "Segondè":"Secondary school",
         }
education_col =  rename_col_values(students_df, col_name, values)

In [21]:
n_s_df["Education level"] = pd.Series(education_col)

In [22]:
n_s_df.shape

(52, 3)

In [23]:
education_def = count_freq_simple_answer(n_s_df, 'Education level')

In [24]:
education_def 

Unnamed: 0,response,frequency
0,Secondary school,21
1,Primary school,31


In [25]:
col_name = "Q5 - Ou gen telefòn Digicel ?"
values = {"Wi":"Yes", 
          "Non":"No"
         }
phone_access_col =  rename_col_values(students_df, col_name, values)

In [26]:
n_s_df["Access to phone"] = pd.Series(phone_access_col)

In [27]:
n_s_df.head()

Unnamed: 0,Sex,Age interval,Education level,Access to phone
0,Female,From 15 to 20,Secondary school,Yes
1,Female,From 15 to 20,Secondary school,Yes
2,Female,From 15 to 20,Secondary school,Yes
3,Female,From 15 to 20,Primary school,Yes
4,Female,From 12 to 15,Primary school,Yes


In [28]:
access_to_phone_def = count_freq_simple_answer(n_s_df, 'Access to phone')

In [29]:
access_to_phone_def

Unnamed: 0,response,frequency
0,Yes,39
1,No,13


In [30]:
col_name = "Q6 –  Kisa ki anviwonman an? "
values = {"Mwen pa konnen ":"Don't know", 
          "Se espas kote pye bwa yo ye":"Spaces where there are trees",
          "Se rivyè yo, sous yo ":"Rivers, water springs",
          "Se lanme a":"The sea",
          "Se espas kote bet yo ak vi ":"Spaces where live the animals",
          "Se espace kote moun nap viv, ak tout sa ki ladanl":"Spaces where people live",
          "Se espas klas yo, lekol la":"Schools and classooms",
          "Se espas anle a/syel la, ":"The sky",
          "Lòt (Presize)":"Other",
         }
environment_knowledge =  rename_col_with_multiple_values(students_df, col_name, values)

In [31]:
for i in range(students_df.shape[1]):
    if i > 20:
        break
        
    #print(students_df.columns[i])

In [32]:
n_s_df["What is environment?"] = pd.Series(environment_knowledge)

In [33]:
n_s_df.head()

Unnamed: 0,Sex,Age interval,Education level,Access to phone,What is environment?
0,Female,From 15 to 20,Secondary school,Yes,Spaces where people live
1,Female,From 15 to 20,Secondary school,Yes,Spaces where people live
2,Female,From 15 to 20,Secondary school,Yes,Spaces where people live
3,Female,From 15 to 20,Primary school,Yes,Spaces where people live
4,Female,From 12 to 15,Primary school,Yes,Spaces where people live


In [34]:
patterns = ["Don't know", "Spaces where there are trees", "Rivers, water springs", "The sea", "Spaces where live the animals", "Spaces where people live", "Schools and classooms", "The sky", "Other"]
col_name = "What is environment?"

environment_knowledge_df = count_freq_multiple_answer(n_s_df, col_name, patterns)

In [35]:
environment_knowledge_df

Unnamed: 0,response,frequency
0,Spaces where people live,44
1,Spaces where there are trees,20
2,Spaces where live the animals,18
3,Other,9
4,Schools and classooms,4
5,"Rivers, water springs",8
6,The sea,5
7,The sky,1


In [36]:

col_name = "Q7 - Kijan anviwonman an dwe ye poun ka viv byen e rete sante ?  "
values = {"Mwen pa konnen, ":"Don't know", 
          "Anviwonman dwe pwòp":"The environment must be clean",
          "Nou dwe pwoteje pye bwa yo, ":"WE must protect the trees",
          "Nou dwe plante pye bwa, sitou nan tèt mòn yo":"We must plant trees, mostly in the mountains",
          "Nou pa dwe jete fatra tout kalite nan mang lan, nan lari, nan rivyè yo, bo lanmè a, ":"We must not throw garbages in the mangroves, in the streets, in the rivers and at the sea",
          "Lòt (Presize)":"Other",
         }
environment_knowledge1 =  rename_col_with_multiple_values(students_df, col_name, values)

In [37]:
n_s_df["How should the environment be for us to be in good health?"] = pd.Series(environment_knowledge1)

In [38]:
#n_s_df.head()

In [39]:
patterns = ["Don't know", "The environment must be clean", "WE must protect the trees", "We must plant trees, mostly in the mountains", "We must not throw garbages in the mangroves, in the streets, in the rivers and at the sea", "Other"]
col_name = "How should the environment be for us to be in good health?"

environment_knowledge1_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge1_df

Unnamed: 0,response,frequency
0,The environment must be clean,43
1,"We must not throw garbages in the mangroves, in the streets, in the rivers and at the sea",23
2,"We must plant trees, mostly in the mountains",18
3,WE must protect the trees,21
4,Other,4
5,Don't know,1


In [40]:
col_name = "Q8 – Eske w konn tande pale de chanjman klimatik ?"
values = {"Wi, si wi pase ak Q9.1":"Yes", 
          "Non":"No",
         }
environment_knowledge2 =  rename_col_values(students_df, col_name, values)

In [41]:
n_s_df["Have you already heard about climate change?"] = pd.Series(environment_knowledge2)
access_to_phone_def = count_freq_simple_answer(n_s_df, 'Have you already heard about climate change?')
access_to_phone_def

Unnamed: 0,response,frequency
0,Yes,30
1,No,22


In [42]:
col_name = "Q8.1 – Kisa chanjman klimatik la ye ?"
values = {"Mwen pa konnen, si 1) pase ak Q14":"Don't know", 
          "Se tanperati atmosfè a kap ogmante akoz twòp gas rechofman aktivite moun ap degaje":"Increase in the temperature of the atmosphere due to the emission of heating gases from human activities",
          "Se lè jaden pa donnen ankò":"When the crops give nothing to harvest",
          "Se lè solèy la leve nan mitan lanwit":"When the sun raises at midnight",
          "Lòt (Presize)":"Other",
         }
environment_knowledge3 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What is climate change?"] = pd.Series(environment_knowledge3)

patterns = ["Don't know", "Increase in the temperature of the atmosphere due to the emission of heating gases from human activities", "When the crops give nothing to harvest", "When the sun raises at midnight", "Other"]
col_name = "What is climate change?"

environment_knowledge3_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge3_df

Unnamed: 0,response,frequency
0,Increase in the temperature of the atmosphere due to the emission of heating gases from human activities,24
1,Don't know,20
2,Other,2


In [43]:
n_s_df.head()

Unnamed: 0,Sex,Age interval,Education level,Access to phone,What is environment?,How should the environment be for us to be in good health?,Have you already heard about climate change?,What is climate change?
0,Female,From 15 to 20,Secondary school,Yes,Spaces where people live,The environment must be clean,Yes,Increase in the temperature of the atmosphere due to the emission of heating gases from human activities
1,Female,From 15 to 20,Secondary school,Yes,Spaces where people live,The environment must be clean,Yes,Increase in the temperature of the atmosphere due to the emission of heating gases from human activities
2,Female,From 15 to 20,Secondary school,Yes,Spaces where people live,"We must not throw garbages in the mangroves, in the streets, in the rivers and at the sea",Yes,Increase in the temperature of the atmosphere due to the emission of heating gases from human activities
3,Female,From 15 to 20,Primary school,Yes,Spaces where people live,"We must not throw garbages in the mangroves, in the streets, in the rivers and at the sea",Yes,Increase in the temperature of the atmosphere due to the emission of heating gases from human activities
4,Female,From 12 to 15,Primary school,Yes,Spaces where people live,The environment must be clean,Yes,Increase in the temperature of the atmosphere due to the emission of heating gases from human activities


In [44]:
col_name = "Q9 - Ki prensipal gaz ki ka rechofe klima, ou konnen ?"
values = {"Mwen pa konnen, ":"Don't know", 
          "CO2/gaz kabonik":"CO2",
          "Oksijèn":"Oxygen",
          "Lòt (Presize)":"Other",
         }
environment_knowledge4 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["Main heating gaz you know?"] = pd.Series(environment_knowledge4)

patterns = ["Don't know", "CO2", "Oxygen", "Other"]
col_name = "Main heating gaz you know?"

environment_knowledge4_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge4_df

Unnamed: 0,response,frequency
0,CO2,21
1,Other,2
2,Oxygen,1
3,Don't know,16


In [45]:
col_name = "Q10 - Kisa ki pwodwi gaz ki ka rechofe klima yo ?"
values = {"Mwen pa konnen ":"Don't know", 
          "Veyikil ki mache a eneji fosil, tankou, gaz, petwol, chabon":"Vehicles that run with fossil energies",
          "Ogmantasyon elevaj (bef) itilizasyon angre ":"Increase in animal breeding and use of fertilizers",
          "Endistri yo":"The industries",
          "Debwazman":"Deforestation",
          "Lòt (Presize)":"Other",
         }
environment_knowledge5 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What produces heating gaz?"] = pd.Series(environment_knowledge5)

patterns = ["Don't know", "Vehicles that run with fossil energies", "Increase in animal breeding and use of fertilizers", "The industries","Deforestation", "Other"]
col_name = "What produces heating gaz?"

environment_knowledge5_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge5_df

Unnamed: 0,response,frequency
0,The industries,12
1,Deforestation,3
2,Vehicles that run with fossil energies,4
3,Increase in animal breeding and use of fertilizers,3
4,Other,3
5,Don't know,18


In [46]:
col_name = "Q11 - Ki konsekans chanjman klimatik la genyen globalman?"
values = {"Mwen pa konnen":"Don't know", 
          "Ogmante tenperati sou te a ":"Increase in the earth temperature",
          "Ogmante asidite nan lanme ":"Increase in the sea saltiness",
          "Li fe siklòn ak tenpe yo vin pi vyolan ":"There are more hurricane and they become more violent",
          "Li koze sechres":"It causes drought",
          "Li kose inondasyon":"It causes flooding",
          "Li favorize maladi sou moun ak bet":"It occasions diseases for human being and animals",
          "Li ka konplike pwodiksyon manje ak elevaj bet":"It can cause issues in crops and in animal breeding",
          "Lòt (Presize)":"Other",
         }
environment_knowledge6 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What are the consequencies of climate change?"] = pd.Series(environment_knowledge6)

patterns = ["Don't know","It can cause issues in crops and in animal breeding", 
            "It occasions diseases for human being and animals", 
            "Increase in the earth temperature", 
            "Increase in the sea saltiness", 
            "There are more hurricane and they become more violent",
            "It causes drought",
            "It causes flooding", 
            "Other"]

col_name = "What are the consequencies of climate change?"

environment_knowledge6_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge6_df

Unnamed: 0,response,frequency
0,It can cause issues in crops and in animal breeding,7
1,It causes drought,5
2,Increase in the earth temperature,10
3,It occasions diseases for human being and animals,4
4,There are more hurricane and they become more violent,5
5,Increase in the sea saltiness,3
6,It causes flooding,5
7,Don't know,16
8,Other,1


In [47]:
col_name = "Q12 - Ki sa nou ka fè pou n adapte n ak chanjman klimatik la ?"
values = {"Mwen pa konnen ":"Don't know", 
          "Plante pye bwa pou kapte ak estoke CO2 yo ":"Plant trees to stock the excess of CO2",
          "Jere, resikle ak trete fatra yo":"Manage garbage, recycle them",
          "Diminye kantite CO2 nap degaje nan lè a":"Reduce the amount of CO2 we are emitting into the atmosphere",
          "Manje ak itilize pwodwi ki fèt lokal ":"Consume local products",
          "Lòt (Presize)":"Other",
         }
environment_knowledge7 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What can we do to adapt to climate change?"] = pd.Series(environment_knowledge7)

patterns = ["Don't know",
            "Plant trees to stock the excess of CO2", 
            "Manage garbage, recycle them", 
            "Reduce the amount of CO2 we are emitting into the atmosphere",
            "Consume local products",  
            "Other"]

col_name = "What can we do to adapt to climate change?"

environment_knowledge7_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge7_df

Unnamed: 0,response,frequency
0,Plant trees to stock the excess of CO2,16
1,"Manage garbage, recycle them",3
2,Don't know,23
3,Reduce the amount of CO2 we are emitting into the atmosphere,2
4,Consume local products,1
5,Other,1


In [48]:
col_name = "Q13 - Eske ou konn tande pale de klèb anviwonman ? "
values = {"Non":"Yes", 
          "Wi":"No",
         }

environment_knowledge8 = rename_col_values(students_df, col_name, values)
n_s_df["Have you heard about environment club?"] = pd.Series(environment_knowledge8)

environment_knowledge8_df = count_freq_simple_answer(n_s_df, "Have you heard about environment club?")
environment_knowledge8_df

Unnamed: 0,response,frequency
0,No,50
1,,1
2,Yes,1


In [49]:
col_name = "Q14 - Si wi, poukisa yo fòme yon klèb anviwonman ?"
values = {"Mwen pa konnen, si 1) pase ak kesyon Q16":"Don't know", 
          "Pou mennen aktivite sansibilizasyon sou anviwonman":"To carry out environmental sensitization activities",
          "Pou Plante pye bwa":"To plant trees",
          "Pou patisipe oubyen fe fomasyon sou anviwonman":"To carry out training about environment",
          "Pou moun al detann yo nan aprè midi":"To relax in the afternoon",
          "Lòt":"Other",
         }
environment_knowledge9 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["Why does one create environment clubs?"] = pd.Series(environment_knowledge9)

patterns = ["Don't know",
            "To carry out environmental sensitization activities", 
            "To plant trees", 
            "To carry out training about environment",
            "To relax in the afternoon",  
            "Other"]

col_name = "Why does one create environment clubs?"

environment_knowledge9_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge9_df

Unnamed: 0,response,frequency
0,To carry out environmental sensitization activities,34
1,To carry out training about environment,26
2,To plant trees,21
3,Other,6
4,To relax in the afternoon,2
5,Don't know,2


In [50]:
col_name = "Q15 - Kisa mang la ye selon oumenm ?"
values = {"Mwen pa konnen – Pase a Q13":"Don't know", 
          "Se yon pye bwa ki pwodwi mango":"A tree that produces mangoes",
          "Se yon rak bwa ki grandi bo lanmè":"A tree that grows in forest by the sea",
         }

environment_knowledge10 = rename_col_values(students_df, col_name, values)
n_s_df["What are mangroves?"] = pd.Series(environment_knowledge10)

environment_knowledge10_df = count_freq_simple_answer(n_s_df, "What are mangroves?")
environment_knowledge10_df

Unnamed: 0,response,frequency
0,A tree that grows in forest by the sea,41
1,Don't know,8
2,,2
3,A tree that produces mangoes,1


In [51]:
col_name = "Q16 - Eske ou konnen lwa oubyen règleman ki pale de mangwov (mang) ?"
values = {"Wi":"Yes", 
          "Non – Si non, pase a Q9":"No",
         }

environment_knowledge11 = rename_col_values(students_df, col_name, values)
n_s_df["Do you know the laws about mangroves?"] = pd.Series(environment_knowledge11)

environment_knowledge11_df = count_freq_simple_answer(n_s_df, "Do you know the laws about mangroves?")
environment_knowledge11_df

Unnamed: 0,response,frequency
0,Yes,14
1,No,35
2,,3


In [52]:
col_name = "Q16.1 - Si wi, kiyès ?"
values = {"Kòd riral ":"Don't know", 
          "Dekrè sou anviwonman (2005) ":"Environment Decree (2005)",
          "Dekrè sou mangwòv (2013)":"Mangroves Decree (2013)",
          "Lòt (Presize) ":"Other",
         }
environment_knowledge12 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What text of law about the environment do you know?"] = pd.Series(environment_knowledge12)

patterns = ["Don't know",
            "Environment Decree (2005)", 
            "Mangroves Decree (2013)", 
            "Other"]

col_name = "What text of law about the environment do you know?"

environment_knowledge12_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge12_df

Unnamed: 0,response,frequency
0,Environment Decree (2005),4
1,Don't know,1
2,Mangroves Decree (2013),7


In [53]:
col_name = "Q16.1.1 - Eskew konn kisa lwa sa yo di sou zafe pwoteksyon mangwov ?"
values = {"Wi":"Yes", 
          "Non – Si non pase a Q9":"No",
         }

environment_knowledge13 = rename_col_values(students_df, col_name, values)
n_s_df["Do you know what the laws say about mangroves protection?"] = pd.Series(environment_knowledge13)

environment_knowledge13_df = count_freq_simple_answer(n_s_df, "Do you know what the laws say about mangroves protection?")
environment_knowledge13_df

Unnamed: 0,response,frequency
0,Yes,16
1,,19
2,No,17


In [54]:
col_name = "Q16.1.1.1- ki sa lwa yo di ? "
values = {"Pa fè kay nan mangwòv":"Don't know", 
          "Pa koute mangwòv yo":"Don't cut them",
          "Pa jete fatra/dechè plastik/vye materyo konstriksyon nan mangwòv yo ":"Do not throw garbage in the mangroves",
          "Lòt (Presize)":"Other",
         }
environment_knowledge14 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What does the law about the environment and mangrove protection say?"] = pd.Series(environment_knowledge14)

patterns = ["Don't know",
            "Don't cut them", 
            "Do not throw garbage in the mangroves", 
            "Other"]

col_name = "What does the law about the environment and mangrove protection say?"

environment_knowledge14_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge14_df

Unnamed: 0,response,frequency
0,Do not throw garbage in the mangroves,11
1,Don't cut them,5
2,Don't know,6
3,Other,1


In [55]:
col_name = "Q17 - Ki enpotans ou konnen mang lan genyen ?"
values = {"Li kontribiye a bay sekirite alimantè (gras ak pwodwi pèch li ofri yo)":"It contributes to food security (thank to fishing products)", 
          "Pwoteksyon konn inondasyon ":"Prevent flooding",
          "Pwoteksyon kot yo ak resif yo":"Protect the reefs",
          "Se espas kote anpil zwazo viv ak repwodwi":"Birds habitat",
          "Refij pou fwi lame (kòm pwason, krab ak sirik) repwodwi oubyen grandi ":"Growing environment for crustaceans",
          "Lòt (Presize)":"Other",
         }
environment_knowledge15 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What is the importance of the mangroves?"] = pd.Series(environment_knowledge15)

patterns = ["It contributes to food security (thank to fishing products)",
            "Prevent flooding", 
            "Protect the reefs", 
            "Birds habitat",
            "Growing environment for crustaceans",
            "Other"]

col_name = "What is the importance of the mangroves?"

environment_knowledge15_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge15_df

Unnamed: 0,response,frequency
0,Growing environment for crustaceans,15
1,Protect the reefs,7
2,It contributes to food security (thank to fishing products),12
3,Prevent flooding,11
4,Birds habitat,6
5,Other,1


In [56]:
col_name = "Q18 - Daprè ou, èske mang lan an danje ?"
values = {"Wi":"Yes", 
          "Non – Si non, pase a Q11":"No",
         }

environment_knowledge16 = rename_col_values(students_df, col_name, values)
n_s_df["Are mangroves in danger?"] = pd.Series(environment_knowledge16)

environment_knowledge16_df = count_freq_simple_answer(n_s_df, "Are mangroves in danger?")
environment_knowledge16_df

Unnamed: 0,response,frequency
0,Yes,31
1,,13
2,No,8


In [57]:
col_name = "Q18.1 Poukisa selon ou mang lan an danje ? "
values = {"Leta a pa prezan/pa gen kontwòl nan jan moun ap eksplwate mangwòv yo":"Absence of government/no control of mangroves exploitation", 
          "Kominote ap mal eksplwate mangwòv yo (Yo koupe yo twòp pou fe chabon ak kay)":"Community over-exploitation of mangroves",
          "Mangwòv yo ap fin konble ak tè ki soti nan tèt mòn yo akoz ewozyon ":"Continuous deposition of sediments in mangroves",
          "Nivo lamè a ap bese lakoz gen plantasyon mangwòv ki ap fin seche":"Decrease in the sea level",
          "Moun yo ap konstwi kay nan espas kote ki ta dwe gen mang selman, ":"Urbanization of mangrove areas",
          "Yo jete anpil fatra (dechè plastik, vye materyo konstriksyon) nan espas mangwòv yo ":"Mangroves become a dumping ground for garbage",
          "Lòt (Presize)":"Other",
         }
environment_knowledge17 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["Why do you believe the mangroves are in danger?"] = pd.Series(environment_knowledge17)

patterns = ["Absence of government/no control of mangroves exploitation",
            "Community over-exploitation of mangroves", 
            "Continuous deposition of sediments in mangroves", 
            "Decrease in the sea level",
            "Urbanization of mangrove areas",
            "Mangroves become a dumping ground for garbage",
            "Other"]

col_name = "Why do you believe the mangroves are in danger?"

environment_knowledge17_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge17_df

Unnamed: 0,response,frequency
0,Absence of government/no control of mangroves exploitation,12
1,Community over-exploitation of mangroves,16
2,Urbanization of mangrove areas,1
3,Mangroves become a dumping ground for garbage,4


In [58]:
col_name = "Q19 - Eske ou konnen ki aksyon oubyen mezi ki ka pwoteje mang lan ?"
values = {"Wi":"Yes", 
          "Non – SI non, pase a Q12":"No",
         }

environment_knowledge18 = rename_col_values(students_df, col_name, values)
n_s_df["Do you know what can be done to protect the mangroves?"] = pd.Series(environment_knowledge18)

environment_knowledge18_df = count_freq_simple_answer(n_s_df, "Do you know what can be done to protect the mangroves?")
environment_knowledge18_df

Unnamed: 0,response,frequency
0,,52


In [59]:
col_name = "Q19.1 Kisa ki kap fèt pou pwoteje mangwòv yo selon ou ? "
values = {"Elabore yon plan jesyon ak amenajman kominotè ":"Elaborate a plan with community management", 
          "Met sou pye yon komite jesyon":"Organize a management committee",
          "Itilize espas mangwòv yo pou bay moun yo lot resous tankou elvaj myèl":"Use mangroves areas for bee keeping",
          "Plante plis mang ":"Plant more mangroves",
          "Valorize espas ak resous li yo atravè aktivite touris":"Valorize mangroves forest with tourism",
          "Edikasyon ak sansibilizsyon popilasyon an sou jan yo dwe eksplwate l":"Educate and sensitize the population on how to exploit mangroves",
          "Mete an aplikasyon lwa ak reglemantasyon sou mangwòv yo ":"Enforce law to protect the mangroves",
          "Konsèvasyon sòl nan mòn yo ak rebwazman nan basen vesan an ":"Soil conservation in the mountains",
          "Itilize materyèl pèch ki pi dirab":"use of fishing tools that are more durable",
          "Lòt (Presize)":"Other",
         }
environment_knowledge19 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What can be done to protect the mangroves according to you?"] = pd.Series(environment_knowledge19)

patterns = ["Elaborate a plan with community management",
            "Organize a management committee", 
            "Use mangroves areas for bee keeping", 
            "Plant more mangroves",
            "Valorize mangroves forest with tourism",
            "Educate and sensitize the population on how to exploit mangroves",
            "Enforce law to protect the mangroves",
            "Soil conservation in the mountains",
            "use of fishing tools that are more durable",
            "Other"]

col_name = "What can be done to protect the mangroves according to you?"

environment_knowledge19_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge19_df

Unnamed: 0,response,frequency
0,Organize a management committee,11
1,Elaborate a plan with community management,15
2,Enforce law to protect the mangroves,3
3,Plant more mangroves,5
4,Soil conservation in the mountains,2
5,Other,3
6,Educate and sensitize the population on how to exploit mangroves,6


In [60]:
col_name = "Q20- Ki enpòtans ou konnen yon pye bwa genyen ?"
values = {"Kenbe tè yo kont ewozyon ":"Protect soil agains erosion", 
          "Pemèt dlo rantre byen nan tè a":"Facilitate water filtration to the ground",
          "Bay manje pou moun ak bet":"Provide food for human and animals",
          "Medikaman natirèl":"Natural medicines",
          "Kontwole tanperati a":"Controle the earth temperature",
          "Bay oksijèn pou moun respire byen ":"Provide oxygen",
          "Mwen pa konnen":"Don't know",
          "Pwoteksyon kont inondasyon":"Protection against flooding",
          "Pwoteksyon kont gwo van (briz van) ":"Wind breaker",
          "Lòt (Presize)":"Other",
         }

environment_knowledge20 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What is the importance of trees according to you?"] = pd.Series(environment_knowledge20)

patterns = ["Protect soil agains erosion",
            "Facilitate water filtration to the ground", 
            "Provide food for human and animals", 
            "Natural medicines",
            "Controle the earth temperature",
            "Provide oxygen",
            "Don't know",
            "Protection against flooding",
            "Wind breaker",
            "Other"]

col_name = "What is the importance of trees according to you?"

environment_knowledge20_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge20_df

Unnamed: 0,response,frequency
0,Facilitate water filtration to the ground,14
1,Protect soil agains erosion,22
2,Provide oxygen,38
3,Natural medicines,19
4,Protection against flooding,14
5,Provide food for human and animals,21
6,Other,3
7,Wind breaker,5
8,Controle the earth temperature,12
9,Don't know,7


In [61]:
col_name = "Q21- Eske ou konn koman pou w fe pepinye pou pwodwi pye bwa ?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge21 = rename_col_values(students_df, col_name, values)
n_s_df["Do you have skills in tree nursery?"] = pd.Series(environment_knowledge21)

environment_knowledge21_df = count_freq_simple_answer(n_s_df, "Do you have skills in tree nursery?")
environment_knowledge21_df

Unnamed: 0,response,frequency
0,Yes,43
1,No,9


In [62]:
col_name = "Q23 - Eske w ou ta renmen pran fòmasyon pou konnen kijan pou w fe pepinyè pou pwodwi pye mang ?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge23 = rename_col_values(students_df, col_name, values)
n_s_df["Are you willing to be trained mangroves nursery?"] = pd.Series(environment_knowledge23)

environment_knowledge23_df = count_freq_simple_answer(n_s_df, "Are you willing to be trained mangroves nursery?")
environment_knowledge23_df

Unnamed: 0,response,frequency
0,Yes,52


In [63]:
col_name = "Q24 – Eske ou prè pou w fè pati yon klèb anviwonman"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge24 = rename_col_values(students_df, col_name, values)
n_s_df["Are you ready to be part of an environment club?"] = pd.Series(environment_knowledge24)

environment_knowledge24_df = count_freq_simple_answer(n_s_df, "Are you ready to be part of an environment club?")
environment_knowledge24_df

Unnamed: 0,response,frequency
0,Yes,52


In [64]:
col_name = "Q25 - Eske w prè pou w pale ak paran w sou meyè fason yo ka pwoteje anvironman an?"
values = {"Wi":"Yes", 
          "Non":"No",
         }

environment_knowledge25 = rename_col_values(students_df, col_name, values)
n_s_df["Are you willing to sensitize your parents on how to protect the environment?"] = pd.Series(environment_knowledge25)

environment_knowledge25_df = count_freq_simple_answer(n_s_df, "Are you willing to sensitize your parents on how to protect the environment?")
environment_knowledge25_df

Unnamed: 0,response,frequency
0,Yes,52


In [65]:
col_name = "Q26 – Si w’ap mache ou achte yon boutèy dlo pastik oubyen yon sachè dlo, lè w fin bwè l kisa w’ap fè ak boutèy la oubyen sachè a si kote w la pa gen yon poubèl?"
values = {"Jete l bò wout la":"Throw it at the roadside", 
          "Chèche kote k deja gen yon pil fatra jete l sou li":"Throw it on a pile of rubbish",
          "Mete l nan poch mwen oubyen valiz mwen jete l":"Put it in my pocket or bag to throw it later in a trash bin",
          "Pote l nan poch oubyen valiz mwen jete l nan":"Put it in my pocket or bag to throw it later",
          "Choute boutèy la kou balon sou tout wout la":"Just kick as a ball with my feet along the way",
          "Lòt (Presize)":"Other",
         }

environment_knowledge26 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What would you do with a plastic water bag or bottle after drinking it if there is no trash bin nearby?"] = pd.Series(environment_knowledge26)

patterns = ["Throw it at the roadside",
            "Throw it on a pile of rubbish", 
            "Put it in my pocket or bag to throw it later in a trash bin", 
            "Put it in my pocket or bag to throw it later",
            "Just kick as a ball with my feet along the way",
            "Other"]

col_name = "What would you do with a plastic water bag or bottle after drinking it if there is no trash bin nearby?"

environment_knowledge26_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge26_df

Unnamed: 0,response,frequency
0,Put it in my pocket or bag to throw it later in a trash bin,36
1,Put it in my pocket or bag to throw it later,49
2,Throw it on a pile of rubbish,4
3,Just kick as a ball with my feet along the way,1


In [66]:
col_name = "Q27 – Si w t’ap bwè yon sachè oubyen yon boutèy dlo nan yon machin, kisa wap fè ak sache a oubyen boutèy la le w fini?"
values = {"Voye l jete atè a?":"Throw it on the ground", 
          "Lage anba ban machin nan":"Throw it bellow the car",
          "Mete l nan valiz mwen ma jete l nan poubel lem rive":"Put it in my bag and put it in a trash bin at home",
          "Pote l nan pòch oubyen valiz mwen jete l nan poubèl si m jwenn wout sou wout mwen":"Put it in my pocket or my bag and throw it in a trash bin on the road",
          "Lòt (Presize)":"Other",
         }

environment_knowledge27 =  rename_col_with_multiple_values(students_df, col_name, values)

n_s_df["What would you do with a plastic water bag or bottle after drinking it in a car?"] = pd.Series(environment_knowledge27)

patterns = ["Throw it on the ground",
            "Throw it bellow the car", 
            "Put it in my bag and put it in a trash bin at home", 
            "Put it in my pocket or my bag and throw it in a trash bin on the road",
            "Other"]

col_name = "What would you do with a plastic water bag or bottle after drinking it in a car?"

environment_knowledge27_df = count_freq_multiple_answer(n_s_df, col_name, patterns)
environment_knowledge27_df

Unnamed: 0,response,frequency
0,Put it in my bag and put it in a trash bin at home,45
1,Put it in my pocket or my bag and throw it in a trash bin on the road,13
2,Throw it bellow the car,2


In [67]:
col_name = "Q28 – Si paran w deside koupe yon pye baw ki bay anpil fwi oubyen ki sitiye nan yon pant, ki konsey ou t’ap anvi bal pou l pa koupe l ?"
values = {"Mwen pa konnen":"Don't know", 
          "Ekplikel enpotans pye bwa a":"Explain him/her the importance of the tree",
          "Lòt (Presize)":"Other",
         }

environment_knowledge28 =  rename_col_with_multiple_values(students_df, col_name, values)
eng_col_name = "What would you do if one of your parents decided to cut down a tree that provides a lot of fruit or that is on a slope?"
n_s_df[eng_col_name] = pd.Series(environment_knowledge28)

patterns = ["Don't know",
            "Explain him/her the importance of the tree", 
            "Other"]

environment_knowledge28_df = count_freq_multiple_answer(n_s_df, eng_col_name, patterns)
environment_knowledge28_df

Unnamed: 0,response,frequency
0,Explain him/her the importance of the tree,49
1,Other,6
2,Don't know,1


In [68]:
n_s_df.shape

(52, 32)

In [69]:
n_s_df.to_csv (out_put_path + 'students_clean_end.csv', index = False, header=True)

In [70]:
#for c in n_s_df["How should the enrironment be for us to be in good health?"]:
#    print(c)


In [71]:
#for c in students_df["Q6 –  Kisa ki anviwonman an? "]:
#    print(c)
col_name = "What does the law about the environment and mangrove protection say?"
n_total3 = n_s_df[[col_name]].dropna()[col_name].count()
n_total3
#n_male3 = s_df_m[[col_name]].dropna()[col_name].count()
#n_female3 = s_df_f[[col_name]].dropna()[col_name].count()

20