In [1]:
import pandas as pd
import json
import requests
import os

## Deliverable 1: Extract
----
### Option 1: Use Python Dictionary Methods

1. Import the `backer_info.csv` file into a DataFrame.
2. Iterate through the DataFrame and convert each row to a dictionary. 
3. Iterate through each dictionary and do the following:
    * Extract the dictionary values from the keys using Python list comprehension.
    * Add the values for each row to a new list. 
4. Create a new DataFrame with the retrieved data. 
5. Export the DataFrame as `backers_data.csv`.

In [2]:
# Get the backers_info from the crowdfunding_info sheet. 
pd.set_option('max_colwidth', 400)
backer_info = pd.read_csv("backer_info.csv")
backer_df = pd.DataFrame(backer_info)
backer_df.head()

Unnamed: 0,backer_info
0,"{""backer_id"": ""av166"", ""cf_id"": 968, ""name"": ""Angelo Vincent"", ""email"": ""avincent@live.com""}"
1,"{""backer_id"": ""ha127"", ""cf_id"": 563, ""name"": ""Hubert Arnold"", ""email"": ""harnold@yandex.com""}"
2,"{""backer_id"": ""lg794"", ""cf_id"": 65, ""name"": ""Loris Goulet"", ""email"": ""lgoulet@yandex.com""}"
3,"{""backer_id"": ""tb566"", ""cf_id"": 563, ""name"": ""Teodora Brunelli"", ""email"": ""tbrunelli@outlook.com""}"
4,"{""backer_id"": ""lh506"", ""cf_id"": 563, ""name"": ""Lexie Hunt"", ""email"": ""lhunt@live.com""}"


In [3]:
# Step 1: Load JSON data into DataFrame
backers_df = pd.json_normalize(backer_df['backer_info'].apply(pd.io.json.loads))
backers_df

Unnamed: 0,backer_id,cf_id,name,email
0,av166,968,Angelo Vincent,avincent@live.com
1,ha127,563,Hubert Arnold,harnold@yandex.com
2,lg794,65,Loris Goulet,lgoulet@yandex.com
3,tb566,563,Teodora Brunelli,tbrunelli@outlook.com
4,lh506,563,Lexie Hunt,lhunt@live.com
...,...,...,...,...
8170,st581,65,Serita Thebault,sthebault@yandex.com
8171,gf637,563,Glenn Foerstner,gfoerstner@yahoo.com
8172,rc983,1114,Robt Collin,rcollin@outlook.com
8173,cz381,65,Corina Zappa,czappa@outlook.com


## Deliverable 2: Transform and Clean Data
----
1. Check the data types of the columns and convert the "cf_id" column to an integer, if necessary.
2. Split the name in the "name" column into first and last names, and add them to "first_name" and "last_name" columns in the DataFrame. 
3. Drop the "name" column in the DataFrame.
4. Place the columns in the following order; "backer_id", "cf_id", "first_name", "last_name" and "email".

In [4]:
# Check data types.
backers_df.dtypes

backer_id    object
cf_id         int64
name         object
email        object
dtype: object

In [5]:
# Convert cf_id to an integer if necessary.
backers_df["cf_id"] = pd.to_numeric(backers_df["cf_id"])
backers_df["cf_id"].dtypes

dtype('int64')

In [6]:
# Split the "name" column into "first_name" and "last_name" columns.
backers_df[["first_name", "last_name"]] = backers_df["name"].str.split(" ", n=1, expand=True)
backers_df

Unnamed: 0,backer_id,cf_id,name,email,first_name,last_name
0,av166,968,Angelo Vincent,avincent@live.com,Angelo,Vincent
1,ha127,563,Hubert Arnold,harnold@yandex.com,Hubert,Arnold
2,lg794,65,Loris Goulet,lgoulet@yandex.com,Loris,Goulet
3,tb566,563,Teodora Brunelli,tbrunelli@outlook.com,Teodora,Brunelli
4,lh506,563,Lexie Hunt,lhunt@live.com,Lexie,Hunt
...,...,...,...,...,...,...
8170,st581,65,Serita Thebault,sthebault@yandex.com,Serita,Thebault
8171,gf637,563,Glenn Foerstner,gfoerstner@yahoo.com,Glenn,Foerstner
8172,rc983,1114,Robt Collin,rcollin@outlook.com,Robt,Collin
8173,cz381,65,Corina Zappa,czappa@outlook.com,Corina,Zappa


In [7]:
#  Drop the name column
backers_df = backers_df.drop(columns = "name")
# Reorder the columns
backers_df = backers_df[["backer_id", "cf_id", "first_name", "last_name", "email"]]
backers_df.head(10)

Unnamed: 0,backer_id,cf_id,first_name,last_name,email
0,av166,968,Angelo,Vincent,avincent@live.com
1,ha127,563,Hubert,Arnold,harnold@yandex.com
2,lg794,65,Loris,Goulet,lgoulet@yandex.com
3,tb566,563,Teodora,Brunelli,tbrunelli@outlook.com
4,lh506,563,Lexie,Hunt,lhunt@live.com
5,lh382,563,Lambert,Huber,lhuber@live.com
6,em444,563,Emmy,Morin,emorin@live.com
7,at582,1572,Armonda,Trani,atrani@live.com
8,id772,1911,Ilana,Duke,iduke@outlook.com
9,lw275,65,Leigha,Wright,lwright@live.com


In [8]:
# Export the DataFrame as a CSV file using encoding='utf8'.
backers_df.to_csv("backers.csv", index=False, encoding = "utf8")
