### FEATURE ENGINEERING

In [293]:
import pandas as pd
import numpy as np
%matplotlib inline 
import seaborn as sns 
import matplotlib.pyplot as plt 
plt.style.use('seaborn') 
np.set_printoptions(suppress=True)
pd.set_option('display.max_columns', 40)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [294]:
df = pd.read_csv('ida_prepared.csv')

#####  Dropping 'Africa' as a value of the column 'Country'

In [295]:
df.drop(df.loc[df['Country'] == 'Africa'].index, inplace=True)


### Which Countries have borrowed the most from the IDA?

In [296]:
import plotly.express as px
fig = px.sunburst(df, path=['Region', 'Country'], values='Original Principal Amount',
                  color='Original Principal Amount',
                  color_continuous_scale='RdBu',
                  color_continuous_midpoint=np.average(df['Original Principal Amount'], 
                                                       weights=df['Original Principal Amount']))
fig.show()

### Loan Duration 

In [297]:
for col in df[[ "Agreement Signing Date","End of Period","First Repayment Date", "Last Repayment Date", 
               "Board Approval Date", "Effective Date (Most Recent)",
                "Closed Date (Most Recent)", "Last Disbursement Date" ]]:
    df[col] = pd.to_datetime(df[col])

#### Calculating the Duration of Loans 

In [298]:
df['Loan Duration'] = df['Last Repayment Date'] .sub(df['Board Approval Date'], axis=0)

In [299]:
df['Loan Duration']

0      17279 days
1      18165 days
2      18196 days
3      18124 days
4      18241 days
          ...    
5637   12739 days
5638   12770 days
5639   14441 days
5640   12746 days
5641   12756 days
Name: Loan Duration, Length: 5640, dtype: timedelta64[ns]

In [300]:
df['Loan Duration in Years'] = df['Loan Duration'] / np.timedelta64(1, 'Y')

In [301]:
df[['Loan Duration in Years', 'Country']].sort_values(by=['Loan Duration in Years'])

Unnamed: 0,Loan Duration in Years,Country
5530,18.421,India
3596,19.505,Bosnia and Herzegovina
2857,19.505,Sri Lanka
2922,19.508,Albania
3826,19.510,Sri Lanka
...,...,...
7,50.027,Chad
103,50.030,Lesotho
409,50.054,China
253,50.054,Pakistan


In [302]:
df.groupby(['Region','Country'])['Loan Duration in Years'].sum()

Region      Country     
AFRICA      Angola           735.106
            Benin           3680.308
            Burkina Faso    3826.899
            Burundi         1822.228
            Cabo Verde      1857.569
                              ...   
SOUTH ASIA  India          11003.024
            Maldives         679.047
            Nepal           4392.950
            Pakistan        6655.285
            Sri Lanka       3686.066
Name: Loan Duration in Years, Length: 97, dtype: float64

In [303]:
df['Time to Repay'] = df['Last Repayment Date']- df['End of Period']

In [304]:
df['Time to Repay']

0      1279 days
1      2557 days
2      2679 days
3      2663 days
4      2831 days
          ...   
5637   4732 days
5638   4763 days
5639   6528 days
5640   5083 days
5641   5432 days
Name: Time to Repay, Length: 5640, dtype: timedelta64[ns]

In [305]:
df['Time to Repay in Years'] = df['Time to Repay']/ np.timedelta64(1, 'Y')

In [306]:
df['Time to Repay in Years']

0       3.502
1       7.001
2       7.335
3       7.291
4       7.751
        ...  
5637   12.956
5638   13.041
5639   17.873
5640   13.917
5641   14.872
Name: Time to Repay in Years, Length: 5640, dtype: float64

#### Years to Repay Debts per Region and Country

In [307]:
df['Region Total'] = df.groupby('Region')['Time to Repay in Years'].transform('sum')
df['Debt Left in Years'] = df.groupby('Country')['Time to Repay in Years'].transform('sum')

In [308]:
df_repaying = df[df['Credit Status'] == 'Repaying']

In [309]:
fig = px.sunburst(df_repaying, path=['Region', 'Country'], 
                  values='Debt Left in Years',
                  color='Debt Left in Years',
                  color_continuous_scale='RdBu')
fig.show()

### What is the money borrowed for?

### Parsing Text in df['Project Name']

In [310]:
df['Original Project Name'] = df['Project Name'].copy()

In [311]:
df.loc[df['Project Name'].str.contains('SAHELIAN ZONE PROJEC'), 'Project Name'] = 'Irrigation'

In [312]:
df.loc[df['Project Name'].str.contains('IRRIG'), 'Project Name'] = 'Irrigation'

In [313]:
df.loc[df['Project Name'].str.contains('LIVESTOCK'), 'Project Name'] = 'Livestock'

In [314]:
df.loc[df['Project Name'].str.contains('Health'), 'Project Name'] = 'Health'

In [315]:
df.loc[df['Project Name'].str.contains('EDUCATION'), 'Project Name'] = 'Education'

In [316]:
df.loc[df['Project Name'].str.contains('URBAN'), 'Project Name'] = 'Urbanization'

In [317]:
df.loc[df['Project Name'].str.contains('Women'), 'Project Name'] = 'Women and Children'

In [318]:
df.loc[df['Project Name'].str.contains('Child'), 'Project Name'] = 'Women and Children'

In [319]:
df.loc[df['Project Name'].str.contains('Road'), 'Project Name'] = 'Roads and Transport'

In [320]:
df.loc[df['Project Name'].str.contains('RAJASTHAN DPEP'), 'Project Name'] = 'Roads and Transport'

In [321]:
df.loc[df['Project Name'].str.contains('TA for Econ Reform Projec'), 'Project Name'] = 'Others'

In [322]:
df.loc[df['Project Name'].str.contains('Gas'), 'Project Name'] = 'Energy'

In [323]:
df.loc[df['Project Name'].str.contains('Agricultural'), 'Project Name'] = 'Agriculture'

In [324]:
df.loc[df['Project Name'].str.contains('Urban'), 'Project Name'] = 'Urbanization'

In [325]:
df.loc[df['Project Name'].str.contains('Judicial'), 'Project Name'] = 'Justice'

In [326]:
df.loc[df['Project Name'].str.contains('COVID'), 'Project Name'] = 'Health-Covid'

In [327]:
df.loc[df['Project Name'].str.contains('Public'), 'Project Name'] = 'Public Service'

In [328]:
df.loc[df['Project Name'].str.contains('Lake'), 'Project Name'] = 'Water'

In [329]:
df.loc[df['Project Name'].str.contains('Forest'), 'Project Name'] = 'Forests'

In [330]:
df.loc[df['Project Name'].str.contains('Tech'), 'Project Name'] = 'Technical Assistance'

In [331]:
df.loc[df['Project Name'].str.contains('Water'), 'Project Name'] = 'Water'

In [332]:
df.loc[df['Project Name'].str.contains('Agriculture'), 'Project Name'] = 'Agriculture'

In [333]:
df.loc[df['Project Name'].str.contains('Education'), 'Project Name'] = 'Education'

In [334]:
df.loc[df['Project Name'].str.contains('Energy'), 'Project Name'] = 'Energy'

In [335]:
df.loc[df['Project Name'].str.contains('TRANSPORT'), 'Project Name'] = 'Roads and Transport'

In [336]:
df.loc[df['Project Name'].str.contains('Credit'), 'Project Name'] = 'Credit and Finance'

In [337]:
df.loc[df['Project Name'].str.contains('Drought'), 'Project Name'] = 'Agriculture'

In [338]:
df.loc[df['Project Name'].str.contains('FINANCIAL'), 'Project Name'] = 'Credit and Finance'

In [339]:
df.loc[df['Project Name'].str.contains('HIV/AIDS'), 'Project Name'] = 'Health'

In [340]:
df.loc[df['Project Name'].str.contains('KARNATAKA RWSS'), 'Project Name'] = 'Water'

In [341]:
df.loc[df['Project Name'].str.contains('Ethiopia Enhancing Shared Prosperity'), 'Project Name'] = 'Public Service'

In [342]:
df.loc[df['Project Name'].str.contains('FORESTRY'), 'Project Name'] = 'Forests'

In [343]:
df.loc[df['Project Name'].str.contains('FORESTRY'), 'Project Name'] = 'Forests'

In [344]:
df.loc[df['Project Name'].str.contains('Cyclone Risk'), 'Project Name'] = 'Environmental Risks'

In [345]:
df.loc[df['Project Name'].str.contains('RECONSTRUCTION'), 'Project Name'] = 'Infrastructure'

In [346]:
df.loc[df['Project Name'].str.contains('POWER'), 'Project Name'] = 'Energy'

In [347]:
df.loc[df['Project Name'].str.contains('Pov'), 'Project Name'] = 'Poverty Reduction'

In [348]:
df.loc[df['Project Name'].str.contains('Financial'), 'Project Name'] = 'Credit and Finance'

In [349]:
df.loc[df['Project Name'].str.contains('WARCIP APL2- Mauritania-Togo'), 'Project Name'] = 'Infrastructure'

In [350]:
df.loc[df['Project Name'].str.contains('POP & HUMAN RESOURCE'), 'Project Name'] = 'Infrastructure'

In [351]:
df.loc[df['Project Name'].str.contains('SAC'), 'Project Name'] = 'Credit and Finance'

In [352]:
df.loc[df['Project Name'].str.contains('MKTG.'), 'Project Name'] = 'Industrialization'

In [353]:
df.loc[df['Project Name'].str.contains('Flood'), 'Project Name'] = 'Environmental Risks'

In [354]:
df.loc[df['Project Name'].str.contains('AGRI.REHAB.&DEV'), 'Project Name'] = 'Agriculture'

In [355]:
df.loc[df['Project Name'].str.contains('COMMUNITY DEVELOPMENT FUND'), 'Project Name'] = 'Credit and Finance'

In [356]:
df.loc[df['Project Name'].str.contains('PRIV. PUB. SECT. MGT'), 'Project Name'] = 'Public Service'

In [357]:
df.loc[df['Project Name'].str.contains('Wat'), 'Project Name'] = 'Water'

In [358]:
df.loc[df['Project Name'].str.contains('HWYS.'), 'Project Name'] = 'Roads and Transport'

In [359]:
df.loc[df['Project Name'].str.contains('HIGHWAY'), 'Project Name'] = 'Roads and Transport'

In [360]:
df.loc[df['Project Name'].str.contains('SECOND AGRIC.EXTENSI'), 'Project Name'] = 'Agriculture'

In [361]:
df.loc[df['Project Name'].str.contains('AGRIC.'), 'Project Name'] = 'Agriculture'

In [362]:
df.loc[df['Project Name'].str.contains('NVMT MGMT'), 'Project Name'] = 'Environmental Risks'

In [363]:
df.loc[df['Project Name'].str.contains('WURI'), 'Project Name'] = 'Regional Integration'

In [364]:
df.loc[df['Project Name'].str.contains('3A-CEMAC Transp Transit Facil'), 'Project Name'] = 'Roads and Transport'

In [365]:
df.loc[df['Project Name'].str.contains('NVMT MGMT'), 'Project Name'] = 'Environmental Risks'

In [366]:
df.loc[df['Project Name'].str.contains('NVMT MGMT'), 'Project Name'] = 'Environmental Risks'

In [367]:
df.loc[df['Project Name'].str.contains('Fiscal'), 'Project Name'] = 'Public Service'

In [368]:
df.loc[df['Project Name'].str.contains('SME/FINANCE'), 'Project Name'] = 'Credit and Finance'

In [369]:
df.loc[df['Project Name'].str.contains('TEACHER'), 'Project Name'] = 'Education'

In [370]:
df.loc[df['Project Name'].str.contains('Educ.'), 'Project Name'] = 'Education'

In [371]:
df.loc[df['Project Name'].str.contains('PSMAC'), 'Project Name'] = 'Public Sector'

In [372]:
df.loc[df['Project Name'].str.contains('PILOT EXTENSION'), 'Project Name'] = 'Agriculture'

In [373]:
df.loc[df['Project Name'].str.contains('JUDICIAL'), 'Project Name'] = 'Justice'

In [374]:
df.loc[df['Project Name'].str.contains('Rural'), 'Project Name'] = 'Rural Development'

In [375]:
df.loc[df['Project Name'].str.contains('Learning'), 'Project Name'] = 'Education'

In [376]:
df.loc[df['Project Name'].str.contains('Telecommunications'), 'Project Name'] = 'Infrastructure'

In [377]:
df.loc[df['Project Name'].str.contains('GOV'), 'Project Name'] = 'Public Service'

In [378]:
df.loc[df['Project Name'].str.contains('SRWSSDP'), 'Project Name'] = 'Rural Development'

In [379]:
df.loc[df['Project Name'].str.contains('TZ-PRSC'), 'Project Name'] = 'Poverty Reduction'

In [380]:
df.loc[df['Project Name'].str.contains('Poorest'), 'Project Name'] = 'Poverty Reduction'

In [381]:
df.loc[df['Project Name'].str.contains('MUN INFRA REH'), 'Project Name'] = 'Roads and Transport'

In [382]:
df.loc[df['Project Name'].str.contains('Hwy'), 'Project Name'] = 'Roads and Transport'

In [383]:
df.loc[df['Project Name'].str.contains('Skills'), 'Project Name'] = 'Education'

In [384]:
df.loc[df['Project Name'].str.contains('AGRI.RESEARCH'), 'Project Name'] = 'Agriculture'

In [385]:
df.loc[df['Project Name'].str.contains('AG. SERVICES'), 'Project Name'] = 'Agriculture'

In [386]:
df.loc[df['Project Name'].str.contains('Fin Mgmt'), 'Project Name'] = 'Public Service'

In [387]:
df.loc[df['Project Name'].str.contains('Basin'), 'Project Name'] = 'Water'

In [388]:
df.loc[df['Project Name'].str.contains('AG.'), 'Project Name'] = 'Agriculture'

In [389]:
df.loc[df['Project Name'].str.contains('EDUC.'), 'Project Name'] = 'Education'

In [390]:
df.loc[df['Project Name'].str.contains('WATER'), 'Project Name'] = 'Water'

In [391]:
df.loc[df['Project Name'].str.contains('GH-Transport'), 'Project Name'] = 'Roads and Transport'

In [392]:
df.loc[df['Project Name'].str.contains('FISHERY'), 'Project Name'] = 'Fishery'

In [393]:
df.loc[df['Project Name'].str.contains('RAIL'), 'Project Name'] = 'Roads and Transport'

In [394]:
df.loc[df['Project Name'].str.contains('HUMAN RESOURCES ADJ'), 'Project Name'] = 'Education'

In [395]:
df.loc[df['Project Name'].str.contains('Dam'), 'Project Name'] = 'Water'

In [396]:
df.loc[df['Project Name'].str.contains('FINANCE SECTOR'), 'Project Name'] = 'Public Service'

In [397]:
df.loc[df['Project Name'].str.contains('IND/TRADE ADJUSTMENT'), 'Project Name'] = 'Industrialization'

In [398]:
df.loc[df['Project Name'].str.contains('NI AG TECHN & RURAL EDU'), 'Project Name'] = 'Rural Development'

In [399]:
df.loc[df['Project Name'].str.contains('Reform'), 'Project Name'] = 'Public Service'

In [400]:
df.loc[df['Project Name'].str.contains('RURAL'), 'Project Name'] = 'Rural Development'

In [401]:
df.loc[df['Project Name'].str.contains('Post Conflict'), 'Project Name'] = 'Post Conflict Reconstruction'

In [402]:
df.loc[df['Project Name'].str.contains('IN: MPDPIP-II'), 'Project Name'] = 'Poverty Reduction'

In [403]:
df.loc[df['Project Name'].str.contains('IMN-PRIV.SEC. DEVT CR'), 'Project Name'] = 'Credit and Finance'

In [404]:
df.loc[df['Project Name'].str.contains('Mineral'), 'Project Name'] = 'Mining'

In [405]:
df.loc[df['Project Name'].str.contains('Infrastructure'), 'Project Name'] = 'Infrastructure'

In [406]:
df.loc[df['Project Name'].str.contains('IND.& TRADE POLICY'), 'Project Name'] = 'Industrialization'

In [407]:
df.loc[df['Project Name'].str.contains('TZ- NEAS STATCAP'), 'Project Name'] = 'Public Service'

In [408]:
df.loc[df['Project Name'].str.contains('BALOCHISTAN NRM PROJ'), 'Project Name'] = 'Poverty Reduction'

In [409]:
df.loc[df['Project Name'].str.contains('SN-Nutr Enhanc. Prog'), 'Project Name'] = 'Health'

In [410]:
df.loc[df['Project Name'].str.contains('MINERAL'), 'Project Name'] = 'Mining'

In [411]:
df.loc[df['Project Name'].str.contains('REFORM'), 'Project Name'] = 'Public Service'

In [412]:
df.loc[df['Project Name'].str.contains('GH-Statistics Development Program'), 'Project Name'] = 'Public Service'

In [413]:
df.loc[df['Project Name'].str.contains('EDUCATION'), 'Project Name'] = 'Education'

In [414]:
df.loc[df['Project Name'].str.contains('PORTS'), 'Project Name'] = 'Roads and Transport'

In [415]:
df.loc[df['Project Name'].str.contains('3A-TG/BN Engineering TAL'), 'Project Name'] = 'Roads and Transport'

In [416]:
df.loc[df['Project Name'].str.contains('HYDROC.SEC.REF&CAPI'), 'Project Name'] = 'Roads and Transport'

In [417]:
df.loc[df['Project Name'].str.contains('UG-PRSC'), 'Project Name'] = 'Public Service'

In [418]:
df.loc[df['Project Name'].str.contains('Irrigation'), 'Project Name'] = 'Irrigation'

In [419]:
df.loc[df['Project Name'].str.contains('RY-Fisheries'), 'Project Name'] = 'Fishery'

In [420]:
df.loc[df['Project Name'].str.contains('SOC PROT'), 'Project Name'] = 'Others'

In [421]:
df.loc[df['Project Name'].str.contains('FM/accountability'), 'Project Name'] = 'Public Service'

In [422]:
df.loc[df['Project Name'].str.contains('Mining'), 'Project Name'] = 'Mining'

In [423]:
df.loc[df['Project Name'].str.contains('FISC SUST.CR.I'), 'Project Name'] = 'Public Service'

In [424]:
df.loc[df['Project Name'].str.contains('TIMBER'), 'Project Name'] = 'Forests'


In [425]:
df.loc[df['Project Name'].str.contains('OIL'), 'Project Name'] = 'Energy'

In [426]:
df.loc[df['Project Name'].str.contains('CM-Livestock'), 'Project Name'] = 'Livestock'

In [427]:
df.loc[df['Project Name'].str.contains('Electrification'), 'Project Name'] = 'Energy'

In [428]:
df.loc[df['Project Name'].str.contains('BT: DPC 2'), 'Project Name'] = 'Others'

In [429]:
df.loc[df['Project Name'].str.contains('REHABILITATION'), 'Project Name'] = 'Post Conflict Reconstruction'

In [430]:
df.loc[df['Project Name'].str.contains('VOCATIONAL/TECHNICAL'), 'Project Name'] = 'Education'

In [431]:
df.loc[df['Project Name'].str.contains('PK PRSC II'), 'Project Name'] = 'Others'

In [432]:
df.loc[df['Project Name'].str.contains('GH-Tourism Development Project'), 'Project Name'] = 'Tourism'

In [433]:
df.loc[df['Project Name'].str.contains('PUBLIC SERVICE'), 'Project Name'] = 'Public Service'

In [434]:
df.loc[df['Project Name'].str.contains('Aviation'), 'Project Name'] = 'Roads and Transport'

In [435]:
df.loc[df['Project Name'].str.contains('WS/Sanitation'), 'Project Name'] = 'Others'

In [436]:
df.loc[df['Project Name'].str.contains('VOCATIONAL/TECHNICAL'), 'Project Name'] = 'Education'

In [437]:
df.loc[df['Project Name'].str.contains('Tourism'), 'Project Name'] = 'Tourism'

In [438]:
df.loc[df['Project Name'].str.contains('SINDH'), 'Project Name'] = 'Others'

In [439]:
df.loc[df['Project Name'].str.contains('CI-Electricity'), 'Project Name'] = 'Energy'

In [440]:
df.loc[df['Project Name'].str.contains('MN-UB SERVICES IMPROVMT 2'), 'Project Name'] = 'Others'

In [441]:
df.loc[df['Project Name'].str.contains('Strengthening Systems for SP and CR'), 'Project Name'] = 'Others'

In [442]:
df.loc[df['Project Name'].str.contains('MULTIPROJ-ADB'), 'Project Name'] = 'Others'

In [443]:
df.loc[df['Project Name'].str.contains('CG:Transparency & Governance repeat Proj'), 'Project Name'] = 'Public Service'

In [444]:
df.loc[df['Project Name'].str.contains('MICRO & SMALL ENTERP'), 'Project Name'] = 'Credit and Finance'

In [445]:
df.loc[df['Project Name'].str.contains('Agricultural'), 'Project Name'] = 'Agriculture'

In [446]:
df.loc[df['Project Name'].str.contains('PNG - Emergency TB Project'), 'Project Name'] = 'Environmental Risks'

In [447]:
df.loc[df['Project Name'].str.contains('EDUCATION SECAL'), 'Project Name'] = 'Education'

In [448]:
df.loc[df['Project Name'].str.contains('EMPLYMT PROMO LIL'), 'Project Name'] = 'Credit and Finance'

In [449]:
df.loc[df['Project Name'].str.contains('MZ-PRSC'), 'Project Name'] = 'Others'

In [450]:
df.loc[df['Project Name'].str.contains('KARNATAKA WS & ENV/S'), 'Project Name'] = 'Water'

In [451]:
df.loc[df['Project Name'].str.contains('SN - Electricity Sector Support'), 'Project Name'] = 'Energy'

In [452]:
df.loc[df['Project Name'].str.contains('HN Social Protection'), 'Project Name'] = 'Health'

In [453]:
df.loc[df['Project Name'].str.contains('Vietnam PRSC III'), 'Project Name'] = ''

In [454]:
df.loc[df['Project Name'].str.contains('Edu'), 'Project Name'] = 'Education'

In [455]:
df.loc[df['Project Name'].str.contains('HEALTH/POPULATION/RU'), 'Project Name'] = 'Health'

In [456]:
df.loc[df['Project Name'].str.contains('Transport'), 'Project Name'] = 'Roads and Transport'

In [457]:
df.loc[df['Project Name'].str.contains('FEEDER ROADS'), 'Project Name'] = 'Roads and Transport'

In [458]:
df.loc[df['Project Name'].str.contains('Administration'), 'Project Name'] = 'Public Service'

In [459]:
df.loc[df['Project Name'].str.contains('Georgia Competitiveness and Growth DPO3'), 'Project Name'] = 'Others'

In [460]:
df.loc[df['Project Name'].str.contains('THIRD T.A.'), 'Project Name'] = 'Others'

In [461]:
df.loc[df['Project Name'].str.contains('ENERGY'), 'Project Name'] = 'Energy'

In [462]:
df.loc[df['Project Name'].str.contains('NRDP'), 'Project Name'] = 'Rural Development'

In [463]:
df.loc[df['Project Name'].str.contains('OFFICE DU NIGER'), 'Project Name'] = 'Irrigation'

In [464]:
df.loc[df['Project Name'].str.contains('LS-Social Assistance Project'), 'Project Name'] = 'Others'

In [465]:
df.loc[df['Project Name'].str.contains('PCP'), 'Project Name'] = 'Others'

In [466]:
df.loc[df['Project Name'].str.contains('CV-PRSC 4 -DPL'), 'Project Name'] = 'Others'

In [467]:
df.loc[df['Project Name'].str.contains('EXPORT REHAB T.A.'), 'Project Name'] = 'Others'

In [468]:
df.loc[df['Project Name'].str.contains('MZ-Spatial Development Planning TA'), 'Project Name'] = 'Infrastructure'

In [469]:
df.loc[df['Project Name'].str.contains('GD Economic and Social DPL'), 'Project Name'] = 'Others'

In [470]:
df.loc[df['Project Name'].str.contains('IN: Low-Income Housing Finance'), 'Project Name'] = 'Credit and Finance'

In [471]:
df.loc[df['Project Name'].str.contains('REG & MUNI INFRA DEV'), 'Project Name'] = 'Infrastructure'

In [472]:
df.loc[df['Project Name'].str.contains('PRIV SECT DEV & C.B.'), 'Project Name'] = 'Credit and Finance'

In [473]:
df.loc[df['Project Name'].str.contains('HN FIFTH SOCIAL INVESTMENT FUND PROJECT'), 'Project Name'] = 'Others'

In [474]:
df.loc[df['Project Name'].str.contains('VCRTP'), 'Project Name'] = 'Others'

In [475]:
df.loc[df['Project Name'].str.contains('HEALTH/POP.II'), 'Project Name'] = 'Health'

In [476]:
df.loc[df['Project Name'].str.contains('Guinea Second MFM DPO'), 'Project Name'] = 'Others'

In [477]:
df.loc[df['Project Name'].str.contains('ROAD'), 'Project Name'] = 'Roads and Transport'

In [478]:
df.loc[df['Project Name'].str.contains('IMMUNIZATION STRENGTHENING PROJECT'), 'Project Name'] = 'Health'

In [479]:
df.loc[df['Project Name'].str.contains('EG-HEALTH SECTOR'), 'Project Name'] = 'Health'

In [480]:
df.loc[df['Project Name'].str.contains('ECON. REC. CRD II'), 'Project Name'] = 'Infrastructure'

In [481]:
df.loc[df['Project Name'].str.contains('EMG HOUSING REPAIR'), 'Project Name'] = 'Infrastructure'

In [482]:
df.loc[df['Project Name'].str.contains('HEALTH I'), 'Project Name'] = 'Health'

In [483]:
df.loc[df['Project Name'].str.contains('LK: Puttalam Housing Project'), 'Project Name'] = 'Urbanization'

In [484]:
df.loc[df['Project Name'].str.contains('ED SECT DEV'), 'Project Name'] = 'Urbanization'

In [485]:
df.loc[df['Project Name'].str.contains('ER-AIDS'), 'Project Name'] = 'Health'

In [486]:
df.loc[df['Project Name'].str.contains('BI-Social Action'), 'Project Name'] = 'Others'

In [487]:
df.loc[df['Project Name'].str.contains('REAL ESTATE CADASTRE'), 'Project Name'] = 'Urbanization'

In [488]:
df.loc[df['Project Name'].str.contains('PETROLEUM EXLORATION'), 'Project Name'] = 'Energy'

In [489]:
df.loc[df['Project Name'].str.contains('PforR for PSTA 4'), 'Project Name'] = 'Energy'

In [490]:
df.loc[df['Project Name'].str.contains('AIDS'), 'Project Name'] = 'Health'

In [491]:
df.loc[df['Project Name'].str.contains('TELECOM'), 'Project Name'] = 'Telecommunication'

In [492]:
df.loc[df['Project Name'].str.contains('Infrast'), 'Project Name'] = 'Infrastructure'

In [493]:
df.loc[df['Project Name'].str.contains('SAL'), 'Project Name'] = 'Credit and Finance'

In [494]:
df.loc[df['Project Name'].str.contains('Action Fund'), 'Project Name'] = 'Poverty Reduction'

In [495]:
df.loc[df['Project Name'].str.contains('SEED'), 'Project Name'] = 'Agriculture'

In [496]:
df.loc[df['Project Name'].str.contains('WTR'), 'Project Name'] = 'Water'

In [497]:
df.loc[df['Project Name'].str.contains('MICROCREDIT'), 'Project Name'] = 'Credit and Finance'

In [498]:
df.loc[df['Project Name'].str.contains('Governance'), 'Project Name'] = 'Public Service'

In [499]:
df.loc[df['Project Name'].str.contains('OMVG Interconnection Project'), 'Project Name'] = 'Energy'

In [500]:
df.loc[df['Project Name'].str.contains('MUNICIPAL'), 'Project Name'] = 'Public Service'

In [501]:
df.loc[df['Project Name'].str.contains('Emergency Assistance'), 'Project Name'] = 'Environmental Risks'

In [502]:
df.loc[df['Project Name'].str.contains('BRRP'), 'Project Name'] = 'Public Service'

In [503]:
df.loc[df['Project Name'].str.contains('SCHISTOSOMIASIS CONTROL'), 'Project Name'] = 'Health'

In [504]:
df.loc[df['Project Name'].str.contains('ET-Agr Research'), 'Project Name'] = 'Agriculture'

In [505]:
df.loc[df['Project Name'].str.contains('Strategic Cities'), 'Project Name'] = 'Urbanization'

In [506]:
df.loc[df['Project Name'].str.contains('POOR'), 'Project Name'] = 'Poverty Reduction'

In [507]:
df.loc[df['Project Name'].str.contains('ECONOMIC RECOVERY'), 'Project Name'] = 'Credit and Finance'

In [508]:
df.loc[df['Project Name'].str.contains('ENVIRONMENT'), 'Project Name'] = 'Environmental Risks'

In [509]:
df.loc[df['Project Name'].str.contains('FLOOD'), 'Project Name'] = 'Environmental Risks'

In [510]:
df.loc[df['Project Name'].str.contains('SS-Local Governance'), 'Project Name'] = 'Public Service'

In [511]:
df.loc[df['Project Name'].str.contains('IRR/DRAIN'), 'Project Name'] = 'Irrigation'

In [512]:
df.loc[df['Project Name'].str.contains('Municipalities'), 'Project Name'] = 'Public Service'

In [513]:
df.loc[df['Project Name'].str.contains('Electricity'), 'Project Name'] = 'Energy'

In [514]:
df.loc[df['Project Name'].str.contains('Village'), 'Project Name'] = 'Rural Development'

In [515]:
df.loc[df['Project Name'].str.contains('REHAB.III'), 'Project Name'] = 'Roads and Transport'

In [516]:
df.loc[df['Project Name'].str.contains('ADBP V'), 'Project Name'] = 'Others'

In [517]:
df.loc[df['Project Name'].str.contains('Pakistan Housing Finance'), 'Project Name'] = 'Credit and Finance'

In [518]:
df.loc[df['Project Name'].str.contains('SOC INS TA'), 'Project Name'] = 'Others'

In [519]:
df.loc[df['Project Name'].str.contains('STOCKROUTE'), 'Project Name'] = 'Livestock'

In [520]:
df.loc[df['Project Name'].str.contains('Nutrition'), 'Project Name'] = 'Health'

In [521]:
df.loc[df['Project Name'].str.contains('BJ-Youth Employment'), 'Project Name'] = 'Education'

In [522]:
df.loc[df['Project Name'].str.contains('PUBLIC SEC.ADJ'), 'Project Name'] = 'Public Service'

In [523]:
df.loc[df['Project Name'].str.contains('Towns'), 'Project Name'] = 'Urbanization'

In [524]:
df.loc[df['Project Name'].str.contains('Wtrshed'), 'Project Name'] = 'Water'

In [525]:
df.loc[df['Project Name'].str.contains('ZM-ERIPTA'), 'Project Name'] = 'Credit and Finance'

In [526]:
df.loc[df['Project Name'].str.contains('DRC-Emerg Econ'), 'Project Name'] = 'Post Conflict Reconstruction'

In [527]:
df.loc[df['Project Name'].str.contains('SUGAR'), 'Project Name'] = 'Agriculture'

In [528]:
df.loc[df['Project Name'].str.contains('CHINA INVEST. BANK'), 'Project Name'] = 'Credit and Finance'

In [529]:
df.loc[df['Project Name'].str.contains('Clean Air'), 'Project Name'] = 'Credit and Finance'

In [530]:
df.loc[df['Project Name'].str.contains('REHAB.IMPORT'), 'Project Name'] = 'Roads and Transport'

In [531]:
df.loc[df['Project Name'].str.contains('Vocational Training'), 'Project Name'] = 'Education'

In [532]:
df.loc[df['Project Name'].str.contains('CYCLONE'), 'Project Name'] = 'Environmental Risks'

In [533]:
df.loc[df['Project Name'].str.contains('Cyclone'), 'Project Name'] = 'Environmental Risks'

In [534]:
df.loc[df['Project Name'].str.contains('Transp'), 'Project Name'] = 'Roads and Transport'

In [535]:
df.loc[df['Project Name'].str.contains('Transp.Trade'), 'Project Name'] = 'Credit and Finance'

In [536]:
df.loc[df['Project Name'].str.contains('EMG IND RESTART'), 'Project Name'] = 'Others'

In [537]:
df.loc[df['Project Name'].str.contains('PRIV.SCTR.CAP.BLDG'), 'Project Name'] = 'Others'

In [538]:
df.loc[df['Project Name'].str.contains('Agric'), 'Project Name'] = 'Others'

In [539]:
df.loc[df['Project Name'].str.contains('WAPP'), 'Project Name'] = 'Others'

In [540]:
df.loc[df['Project Name'].str.contains('FOOD'), 'Project Name'] = 'Others'

In [541]:
df.loc[df['Project Name'].str.contains('DRC Priv Sec Dev Competitiveness'), 'Project Name'] = 'Others'

In [542]:
df.loc[df['Project Name'].str.contains('KH-SOCIAL FUND'), 'Project Name'] = 'Others'

In [543]:
df.loc[df['Project Name'].str.contains('TRNSPRT SCTR RHB'), 'Project Name'] = 'Roads and Transport'

In [544]:
df.loc[df['Project Name'].str.contains('GH-PRSC 5 DPL'), 'Project Name'] = 'Roads and Transport'

In [545]:
df.loc[df['Project Name'].str.contains('VN-Highway Rehab'), 'Project Name'] = 'Others'

In [546]:
df.loc[df['Project Name'].str.contains('ML-PRSC 5 - DPL'), 'Project Name'] = 'Roa'

In [547]:
df.loc[df['Project Name'].str.contains('PET.& GEO-THERMAL EXP.N'), 'Project Name'] = 'Energy'

In [548]:
df.loc[df['Project Name'].str.contains('T.A.'), 'Project Name'] = 'Energy'

In [549]:
df.loc[df['Project Name'].str.contains('A.P. EXTENSION'), 'Project Name'] = 'Agriculture'

### Limiting Data for Plotting to 150 

In [550]:
f = df.head(150)

In [551]:
df.loc[df['Project Name'].str.contains('CASHEW NUTS II'), 'Project Name'] = 'Agriculture'

In [552]:
df.loc[df['Project Name'].str.contains('LOWER BURMA PADDY DE'), 'Project Name'] = 'Agriculture'

In [553]:
df.loc[df['Project Name'].str.contains('SCARP MARDAN'), 'Project Name'] = 'Irrigation'

In [554]:
df.loc[df['Project Name'].str.contains('MOROGORO TEXTILE'), 'Project Name'] = 'Industrialization'

In [555]:
df.loc[df['Project Name'].str.contains('TIHAMA III'), 'Project Name'] = 'Agriculture'

In [556]:
df.loc[df['Project Name'].str.contains('TOURISM'), 'Project Name'] = 'Tourism'

In [557]:
df.loc[df['Project Name'].str.contains('OMASUYOS-LOS ANDES R'), 'Project Name'] = 'Rural Development'

In [558]:
df.loc[df['Project Name'].str.contains('MINING'), 'Project Name'] = 'Mining'

In [559]:
df.loc[df['Project Name'].str.contains('COCOA/COFFEE II'), 'Project Name'] = 'Agriculture'

In [560]:
df.loc[df['Project Name'].str.contains('SANTA CRUZ'), 'Project Name'] = 'Water'

In [561]:
df.loc[df['Project Name'].str.contains('CARIBBEAN DEVT BANK'), 'Project Name'] = 'Others'

In [562]:
df.loc[df['Project Name'].str.contains('SMALL SCALE INDUSTRI'), 'Project Name'] = 'Industrialization'

In [563]:
df.loc[df['Project Name'].str.contains('POPULATION II'), 'Project Name'] = 'Others'

In [564]:
df.loc[df['Project Name'].str.contains('FISHERIES'), 'Project Name'] = 'Fishery'

In [565]:
df.loc[df['Project Name'].str.contains('BARINGO SEMI-ARID AR'), 'Project Name'] = 'Irrigation'

In [566]:
df.loc[df['Project Name'].str.contains('NUTRITION'), 'Project Name'] = 'Health'

In [567]:
df.loc[df['Project Name'].str.contains('RUBBER'), 'Project Name'] = 'Industrialization'

In [568]:
df.loc[df['Project Name'].str.contains('DFC/SME'), 'Project Name'] = 'Credit and Finance'

In [569]:
df.loc[df['Project Name'].str.contains('POPULAT. II'), 'Project Name'] = 'Credit and Finance'

In [570]:
df_head = df.head(50)

In [571]:
df_head['Total Principal Amount'] = df_head['Original Principal Amount'].sum()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#### A Faceted Barplot of Borrowed Fund against Project Type(Here as 'Project Name')

In [572]:
import plotly.express as px
fig = px.bar(df_head, x="Region", y="Total Principal Amount", 
             color="Project Name", barmode="group",
             facet_row="Currency of Commitment", 
             facet_col="Credit Status",
             )
fig.show()

#### Percentage of total Amount Borrowed in Africa per Country independent of credit status

In [573]:
df['Total Principal Amount'] = df['Original Principal Amount'].sum()

In [574]:
import plotly.express as px

fig = px.pie(df.loc[df['Region']== 'AFRICA'], values='Total Principal Amount', 
             names='Country',
             title='Country Percentage')
fig.show()

### Histogram

In [575]:
fig = px.histogram(df_head, x="Original Principal Amount", 
                   color="Region", marginal="violin", 
                   hover_data=df_head.columns,
                   histnorm='probability density',
                   opacity=0.8,
                   log_y=False
                         )
fig.show()

#### A Treemap as an Alternative Representation

In [576]:
fig = px.treemap(df, path=[px.Constant('IDA Regions'), 'Region', 'Country'], 
                 values='Total Principal Amount',
                 color='Original Principal Amount'
                  )
fig.show()

In [577]:
df.to_csv('ida_fe.csv', index = False)