# [ETL Mini-Project] Create the DataFrames
---
## Step #0 - Import Dependencies and Setup

In [1]:
# Import Pandas and NumPy Libraries (For Data Structuring, Creating NumPy Arrays and NumPy List Comprehensions)
# Import Json Library (To convert JSON strings into Python Dictionaries; Used for 'Contacts' DataFrame)
# Import DateTime Library (Used primarily for returning values in 'Date' format for 'Campaign' DataFrame)
import pandas as pd
import numpy as np
import json
from datetime import datetime as dt

# Expand the column width for all Pandas DataFrames
pd.set_option('max_colwidth', 400)

## Step #1 - Category & Subcategory DataFrames (`category_df` / `subcategory_df`)
---
### Read the Crowdfunding XLSX File (`crowdfunding_df`)

In [2]:
# Read the 'Crowdfunding' XLSX file and store in a new DataFrame
crowdfunding_df = pd.read_excel('Resources/crowdfunding.xlsx')

# Display the 'Crowdfunding' DataFrame
crowdfunding_df.head()

Unnamed: 0,cf_id,contact_id,company_name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category & sub-category
0,147,4661,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1581573600,1614578400,False,False,food/food trucks
1,1621,3765,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1611554400,1621918800,False,True,music/rock
2,1812,4187,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1608184800,1640844000,False,False,technology/web
3,2156,4941,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1634792400,1642399200,False,False,music/rock
4,1365,2199,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1608530400,1629694800,False,False,theater/plays


### Get Information for `crowdfunding_df`

In [3]:
crowdfunding_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   cf_id                    1000 non-null   int64 
 1   contact_id               1000 non-null   int64 
 2   company_name             1000 non-null   object
 3   blurb                    1000 non-null   object
 4   goal                     1000 non-null   int64 
 5   pledged                  1000 non-null   int64 
 6   outcome                  1000 non-null   object
 7   backers_count            1000 non-null   int64 
 8   country                  1000 non-null   object
 9   currency                 1000 non-null   object
 10  launched_at              1000 non-null   int64 
 11  deadline                 1000 non-null   int64 
 12  staff_pick               1000 non-null   bool  
 13  spotlight                1000 non-null   bool  
 14  category & sub-category  1000 non-null   

### Split `category & sub-category` to Get New Columns (`category` / `subcategory`)

In [4]:
# Split the string value from the "category & sub-category" column (delimiter = /) and store values in new columns 
crowdfunding_df[["category","subcategory"]] = crowdfunding_df["category & sub-category"].str.split('/', expand = True)

# Display the 'Crowdfunding' DataFrame (After Split)
crowdfunding_df.head()

Unnamed: 0,cf_id,contact_id,company_name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category & sub-category,category,subcategory
0,147,4661,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1581573600,1614578400,False,False,food/food trucks,food,food trucks
1,1621,3765,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1611554400,1621918800,False,True,music/rock,music,rock
2,1812,4187,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1608184800,1640844000,False,False,technology/web,technology,web
3,2156,4941,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1634792400,1642399200,False,False,music/rock,music,rock
4,1365,2199,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1608530400,1629694800,False,False,theater/plays,theater,plays


### List of all Unique Categories in `category` Column (`categories`)

In [5]:
# Get list of all unique category values from the "category" column
categories = crowdfunding_df["category"].unique()

# Return length of the list of unique categories
count_cat = len(categories)

# Log the information
print(f"There are {count_cat} categories.")
print()
print(categories)

There are 9 categories.

['food' 'music' 'technology' 'theater' 'film & video' 'publishing' 'games'
 'photography' 'journalism']


### List of all Unique Sub-Categories in `subcategory` Column (`subcategories`)

In [6]:
# Get list of all unique sub-category values from the "subcategory" column
subcategories = crowdfunding_df["subcategory"].unique()

# Return length of the list of unique sub-categories
count_subcat = len(subcategories)

# Log the information
print(f"There are {count_subcat} sub-categories.")
print()
print(subcategories)

There are 24 sub-categories.

['food trucks' 'rock' 'web' 'plays' 'documentary' 'electric music' 'drama'
 'indie rock' 'wearables' 'nonfiction' 'animation' 'video games' 'shorts'
 'fiction' 'photography books' 'radio & podcasts' 'metal' 'jazz'
 'translations' 'television' 'mobile games' 'world music'
 'science fiction' 'audio']


### NumPy Arrays - Each Matching Entries of Unique Categories & Sub-Categories (`cat_ids` / `subcat_ids`)

In [7]:
# Return a NumPy array with sequential values ranging the entire length of the list of unique categories
cat_ids = np.arange(1, count_cat + 1)

# Return a NumPy array with sequential values ranging the entire length of the list of unique sub-categories
subcat_ids = np.arange(1, count_subcat + 1)

# Log the new NumPy Arrays
print(cat_ids)
print()
print(subcat_ids)

[1 2 3 4 5 6 7 8 9]

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24]


### List Comprehension - Update Category / Sub-Category Entry ID Format (`new_cat_ids` / `new_subcat_ids`)

In [8]:
# For every category id entry in the NumPy Array, format it as 'cat' followed by its entry ID number
new_cat_ids = np.array(["cat{}".format(cat) for cat in cat_ids])

# For every sub-category id entry in the NumPy Array, format it as 'subcat' followed by its entry ID number
new_subcat_ids = np.array(["subcat{}".format(subcat) for subcat in subcat_ids])

# Log the Formatted NumPy Arrays
print(new_cat_ids)
print()
print(new_subcat_ids)

['cat1' 'cat2' 'cat3' 'cat4' 'cat5' 'cat6' 'cat7' 'cat8' 'cat9']

['subcat1' 'subcat2' 'subcat3' 'subcat4' 'subcat5' 'subcat6' 'subcat7'
 'subcat8' 'subcat9' 'subcat10' 'subcat11' 'subcat12' 'subcat13'
 'subcat14' 'subcat15' 'subcat16' 'subcat17' 'subcat18' 'subcat19'
 'subcat20' 'subcat21' 'subcat22' 'subcat23' 'subcat24']


### Create Category DataFrame Using `new_cat_ids` & `categories` Lists (`category_df`)

In [9]:
# Store the NumPy Array & List for Category in a new DataFrame
category_df = pd.DataFrame({"category_id":new_cat_ids, "category":categories})

# Display the 'Category' DataFrame
category_df

Unnamed: 0,category_id,category
0,cat1,food
1,cat2,music
2,cat3,technology
3,cat4,theater
4,cat5,film & video
5,cat6,publishing
6,cat7,games
7,cat8,photography
8,cat9,journalism


### Create Sub-Category DataFrame Using `new_subcat_ids` & `subcategories` Lists (`subcategory_df`)

In [10]:
# Store the NumPy Array & List for Sub-Category in a new DataFrame
subcategory_df = pd.DataFrame({"subcategory_id":new_subcat_ids, "subcategory":subcategories})

# Display the 'Sub-Category' DataFrame
subcategory_df

Unnamed: 0,subcategory_id,subcategory
0,subcat1,food trucks
1,subcat2,rock
2,subcat3,web
3,subcat4,plays
4,subcat5,documentary
5,subcat6,electric music
6,subcat7,drama
7,subcat8,indie rock
8,subcat9,wearables
9,subcat10,nonfiction


### Export DataFrames To CSV Files (`category.csv` / `subcategory.csv`)

In [11]:
# Export both DataFrames as separate CSV files.
category_df.to_csv("Resources/category.csv", index = False)
subcategory_df.to_csv("Resources/subcategory.csv", index = False)


print("******Category DataFrame to CSV File (Resources/category.csv)******")
print()
print("******Sub-Category DataFrame to CSV File (Resources/subcategory.csv)******")

******Category DataFrame to CSV File (Resources/category.csv)******

******Sub-Category DataFrame to CSV File (Resources/subcategory.csv)******


## Step #2 - Campaign DataFrame  (`final_campaign_df`)
---
### Copy `crowdfunding_df` to a New Pandas DataFrame (`raw_campaign_df`)

In [12]:
# Use Pandas 'Copy' Function to Clone the 'Crowdfunding' DataFrame
raw_campaign_df = crowdfunding_df.copy()

# Display the Cloned DataFrame
raw_campaign_df.head()

Unnamed: 0,cf_id,contact_id,company_name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category & sub-category,category,subcategory
0,147,4661,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1581573600,1614578400,False,False,food/food trucks,food,food trucks
1,1621,3765,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1611554400,1621918800,False,True,music/rock,music,rock
2,1812,4187,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1608184800,1640844000,False,False,technology/web,technology,web
3,2156,4941,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1634792400,1642399200,False,False,music/rock,music,rock
4,1365,2199,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1608530400,1629694800,False,False,theater/plays,theater,plays


### Rename `blurb` / `launched_at` / `deadline` Column Headers

In [13]:
# Use Pandas 'Rename' Function to update column header labels
raw_campaign_df.rename(columns = {"blurb":"description",
                                  "launched_at":"launch_date",
                                  "deadline":"end_date"}, inplace = True)

# Log the list of all column header names (After Renaming)
print(raw_campaign_df.columns.values)

['cf_id' 'contact_id' 'company_name' 'description' 'goal' 'pledged'
 'outcome' 'backers_count' 'country' 'currency' 'launch_date' 'end_date'
 'staff_pick' 'spotlight' 'category & sub-category' 'category'
 'subcategory']


### Convert `goal` / `pledged` Columns to `float` Data Type

In [14]:
# Use Pandas 'As Type' Function to change data types for 'goal' & 'pledged' columns
raw_campaign_df = raw_campaign_df.astype({"goal":"float64", "pledged":"float64"})

# Get information of the DataFrame (After Data Type Conversion)
raw_campaign_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   cf_id                    1000 non-null   int64  
 1   contact_id               1000 non-null   int64  
 2   company_name             1000 non-null   object 
 3   description              1000 non-null   object 
 4   goal                     1000 non-null   float64
 5   pledged                  1000 non-null   float64
 6   outcome                  1000 non-null   object 
 7   backers_count            1000 non-null   int64  
 8   country                  1000 non-null   object 
 9   currency                 1000 non-null   object 
 10  launch_date              1000 non-null   int64  
 11  end_date                 1000 non-null   int64  
 12  staff_pick               1000 non-null   bool   
 13  spotlight                1000 non-null   bool   
 14  category & sub-category  

### Convert `launch_date` / `end_date` Datapoints to `datetime` Format (YYYY-MM-DD)

In [15]:
# Convert launch & end date columns to 'datetime' format; clarify the datapoint units are in seconds (unix timestamps)
raw_campaign_df["launch_date"] = pd.to_datetime(raw_campaign_df["launch_date"], unit = "s").dt.date
raw_campaign_df["end_date"] = pd.to_datetime(raw_campaign_df["end_date"], unit = "s").dt.date

# Display the DataFrame (After Formatting the Date Columns to YYYY-MM-DD)
raw_campaign_df.head()

Unnamed: 0,cf_id,contact_id,company_name,description,goal,pledged,outcome,backers_count,country,currency,launch_date,end_date,staff_pick,spotlight,category & sub-category,category,subcategory
0,147,4661,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100.0,0.0,failed,0,CA,CAD,2020-02-13,2021-03-01,False,False,food/food trucks,food,food trucks
1,1621,3765,Odom Inc,Managed bottom-line architecture,1400.0,14560.0,successful,158,US,USD,2021-01-25,2021-05-25,False,True,music/rock,music,rock
2,1812,4187,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400.0,142523.0,successful,1425,AU,AUD,2020-12-17,2021-12-30,False,False,technology/web,technology,web
3,2156,4941,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200.0,2477.0,failed,24,US,USD,2021-10-21,2022-01-17,False,False,music/rock,music,rock
4,1365,2199,Larson-Little,Proactive foreground core,7600.0,5265.0,failed,53,US,USD,2020-12-21,2021-08-23,False,False,theater/plays,theater,plays


### Add `category_id` / `subcategory_id` Columns; Drop Irrelevant Columns (`final_campaign_df`)

In [16]:
# Inner merge
# Only keep rows in the left DataFrame (raw_campaign_df) where value of 'category' exists on both DataFrames
final_campaign_df = pd.merge(raw_campaign_df, category_df, how = "left", on = ["category"], sort = False)

# Inner merge 
# Only keep rows in the left DataFrame (raw_campaign_df) where value of 'subcategory' exists on both DataFrames
final_campaign_df = pd.merge(final_campaign_df, subcategory_df, how = "left", on = ["subcategory"], sort = False)

# Using Pandas 'Drop' Function, remove columns as per the project's requirements
final_campaign_df = final_campaign_df.drop(columns = ["staff_pick",
                                                      "spotlight",
                                                      "category & sub-category",
                                                      "category",
                                                      "subcategory"])

# Display the Final 'Campaign' DataFrame
final_campaign_df.head()

Unnamed: 0,cf_id,contact_id,company_name,description,goal,pledged,outcome,backers_count,country,currency,launch_date,end_date,category_id,subcategory_id
0,147,4661,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100.0,0.0,failed,0,CA,CAD,2020-02-13,2021-03-01,cat1,subcat1
1,1621,3765,Odom Inc,Managed bottom-line architecture,1400.0,14560.0,successful,158,US,USD,2021-01-25,2021-05-25,cat2,subcat2
2,1812,4187,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400.0,142523.0,successful,1425,AU,AUD,2020-12-17,2021-12-30,cat3,subcat3
3,2156,4941,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200.0,2477.0,failed,24,US,USD,2021-10-21,2022-01-17,cat2,subcat2
4,1365,2199,Larson-Little,Proactive foreground core,7600.0,5265.0,failed,53,US,USD,2020-12-21,2021-08-23,cat4,subcat4


### Review Information for `final_campaign_df`

In [17]:
final_campaign_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   cf_id           1000 non-null   int64  
 1   contact_id      1000 non-null   int64  
 2   company_name    1000 non-null   object 
 3   description     1000 non-null   object 
 4   goal            1000 non-null   float64
 5   pledged         1000 non-null   float64
 6   outcome         1000 non-null   object 
 7   backers_count   1000 non-null   int64  
 8   country         1000 non-null   object 
 9   currency        1000 non-null   object 
 10  launch_date     1000 non-null   object 
 11  end_date        1000 non-null   object 
 12  category_id     1000 non-null   object 
 13  subcategory_id  1000 non-null   object 
dtypes: float64(2), int64(3), object(9)
memory usage: 109.5+ KB


### Export DataFrame To CSV File (`campaign.csv`)

In [18]:
# Export DataFrame as CSV.
final_campaign_df.to_csv("Resources/campaign.csv", index = False)

print("******Campaign DataFrame to CSV File (Resources/campaign.csv)******")

******Campaign DataFrame to CSV File (Resources/campaign.csv)******


## Step #3 - Contacts DataFrame  (`final_contacts_df`)
---
### Read the Contacts XLSX File (`raw_contacts_df`)

In [19]:
# Read the 'Contacts' XLSX file and store in a new DataFrame
# Parameter `header=2` used when reading in the data i.e. third row (0-based indexing) is the dataset header
raw_contacts_df = pd.read_excel('Resources/contacts.xlsx', header = 2)

# Display the Raw 'Contacts' DataFrame
raw_contacts_df.head()

Unnamed: 0,Unnamed: 1
0,contact_info
1,"{""contact_id"": 4661, ""name"": ""Cecilia Velasco"", ""email"": ""cecilia.velasco@rodrigues.fr""}"
2,"{""contact_id"": 3765, ""name"": ""Mariana Ellis"", ""email"": ""mariana.ellis@rossi.org""}"
3,"{""contact_id"": 4187, ""name"": ""Sofie Woods"", ""email"": ""sofie.woods@riviere.com""}"
4,"{""contact_id"": 4941, ""name"": ""Jeanette Iannotti"", ""email"": ""jeanette.iannotti@yahoo.com""}"


### Convert Each Row from DataFrame to `dict` Using JSON (`contact_info_dict`) 

In [20]:
# Empty List to store the each row of data as a Python Dictionary
contact_info_dict = []

# For every row in the Raw 'Contacts' DataFrame (Starting from the 2nd Row)...
# Parse the row of data as Dictionary
# Store dictionary in the list
for index, row in raw_contacts_df.iloc[1:].iterrows():
    row_dict = json.loads(row[0])
    contact_info_dict.append(row_dict)


# Display list of all dictionaries
contact_info_dict

[{'contact_id': 4661,
  'name': 'Cecilia Velasco',
  'email': 'cecilia.velasco@rodrigues.fr'},
 {'contact_id': 3765,
  'name': 'Mariana Ellis',
  'email': 'mariana.ellis@rossi.org'},
 {'contact_id': 4187,
  'name': 'Sofie Woods',
  'email': 'sofie.woods@riviere.com'},
 {'contact_id': 4941,
  'name': 'Jeanette Iannotti',
  'email': 'jeanette.iannotti@yahoo.com'},
 {'contact_id': 2199,
  'name': 'Samuel Sorgatz',
  'email': 'samuel.sorgatz@gmail.com'},
 {'contact_id': 5650,
  'name': 'Socorro Luna',
  'email': 'socorro.luna@hotmail.com'},
 {'contact_id': 5889,
  'name': 'Carolina Murray',
  'email': 'carolina.murray@knight.com'},
 {'contact_id': 4842, 'name': 'Kayla Moon', 'email': 'kayla.moon@yahoo.de'},
 {'contact_id': 3280,
  'name': 'Ariadna Geisel',
  'email': 'ariadna.geisel@rangel.com'},
 {'contact_id': 5468,
  'name': 'Danielle Ladeck',
  'email': 'danielle.ladeck@scalfaro.net'},
 {'contact_id': 3064,
  'name': 'Tatiana Thompson',
  'email': 'tatiana.thompson@hunt.net'},
 {'conta

### Python Comprehension List - Extract Values from each Dictionary and Store in List (`dict_values`)

In [21]:
# For every dictionary in the list, get the values from each key as a list, and store in the nested list
dict_values = [list(d.values()) for d in contact_info_dict]

# Display the contents from the Nested List
dict_values

[[4661, 'Cecilia Velasco', 'cecilia.velasco@rodrigues.fr'],
 [3765, 'Mariana Ellis', 'mariana.ellis@rossi.org'],
 [4187, 'Sofie Woods', 'sofie.woods@riviere.com'],
 [4941, 'Jeanette Iannotti', 'jeanette.iannotti@yahoo.com'],
 [2199, 'Samuel Sorgatz', 'samuel.sorgatz@gmail.com'],
 [5650, 'Socorro Luna', 'socorro.luna@hotmail.com'],
 [5889, 'Carolina Murray', 'carolina.murray@knight.com'],
 [4842, 'Kayla Moon', 'kayla.moon@yahoo.de'],
 [3280, 'Ariadna Geisel', 'ariadna.geisel@rangel.com'],
 [5468, 'Danielle Ladeck', 'danielle.ladeck@scalfaro.net'],
 [3064, 'Tatiana Thompson', 'tatiana.thompson@hunt.net'],
 [4904, 'Caleb Benavides', 'caleb.benavides@rubio.com'],
 [1299, 'Sandra Hardy', 'sandra.hardy@web.de'],
 [5602, 'Lotti Morris', 'lotti.morris@yahoo.co.uk'],
 [5753, 'Reinhilde White', 'reinhilde.white@voila.fr'],
 [4495, 'Kerry Patel', 'kerry.patel@hutchinson.com'],
 [4269, 'Sophie Antoine', 'sophie.antoine@andersen.com'],
 [2226, 'Martha Girard', 'martha.girard@web.de'],
 [1558, 'Step

### Store Extracted Data in a New DataFrame (`final_contacts_df`)

In [22]:
# Pre-define the column headers for the final 'Contact' DataFrame
column_headers = ["contact_id", "name", "email"]

# Store the extracted values in a new DataFrame
final_contacts_df = pd.DataFrame(dict_values, columns = column_headers)

# Display the Final 'Contacts' DataFrame
final_contacts_df

Unnamed: 0,contact_id,name,email
0,4661,Cecilia Velasco,cecilia.velasco@rodrigues.fr
1,3765,Mariana Ellis,mariana.ellis@rossi.org
2,4187,Sofie Woods,sofie.woods@riviere.com
3,4941,Jeanette Iannotti,jeanette.iannotti@yahoo.com
4,2199,Samuel Sorgatz,samuel.sorgatz@gmail.com
...,...,...,...
995,3684,Whitney Noack,whitney.noack@laboratorios.org
996,5784,Gelsomina Migliaccio,gelsomina.migliaccio@junk.com
997,1498,Evangelista Pereira,evangelista.pereira@thompson-peterson.biz
998,6073,Gareth Comolli,gareth.comolli@tiscali.fr


### Split `name` Column to First Name & Last Name Columns (`first_name` / `last_name`)

In [23]:
# Split the string value from the "name" column (delimiter = " ") and store values in new columns 
final_contacts_df[["first_name","last_name"]] = final_contacts_df["name"].str.split(" ", expand = True)

# Drop the 'name' column and update the sequence of the other columns
final_contacts_df = final_contacts_df[["contact_id", "first_name", "last_name", "email"]]

# Display the Final 'Contacts' DataFrame (After Split and Column Sequence Update)
final_contacts_df

Unnamed: 0,contact_id,first_name,last_name,email
0,4661,Cecilia,Velasco,cecilia.velasco@rodrigues.fr
1,3765,Mariana,Ellis,mariana.ellis@rossi.org
2,4187,Sofie,Woods,sofie.woods@riviere.com
3,4941,Jeanette,Iannotti,jeanette.iannotti@yahoo.com
4,2199,Samuel,Sorgatz,samuel.sorgatz@gmail.com
...,...,...,...,...
995,3684,Whitney,Noack,whitney.noack@laboratorios.org
996,5784,Gelsomina,Migliaccio,gelsomina.migliaccio@junk.com
997,1498,Evangelista,Pereira,evangelista.pereira@thompson-peterson.biz
998,6073,Gareth,Comolli,gareth.comolli@tiscali.fr


In [24]:
# Review information of the DataFrame
final_contacts_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   contact_id  1000 non-null   int64 
 1   first_name  1000 non-null   object
 2   last_name   1000 non-null   object
 3   email       1000 non-null   object
dtypes: int64(1), object(3)
memory usage: 31.4+ KB


### Export DataFrame To CSV File (`contacts.csv`)

In [25]:
# Export the Final 'Contacts' DataFrame as CSV. 
final_contacts_df.to_csv("Resources/contacts.csv", encoding='utf8', index = False)

print("******Contacts DataFrame to CSV File (Resources/contacts.csv)******")

******Contacts DataFrame to CSV File (Resources/contacts.csv)******
