In [1]:
import pandas as pd
from pathlib import Path

# Reading a CSV file as a Pandas DataFrame

### Step 1: Create a path to the file

In [2]:
# Detail the relative path to the CSV file being imported
csvpath = Path("../Resources/sales.csv")

### Step 2: Read the CSV into a DataFrame using Pandas

In [3]:
# Use the Pandas read_csv function, specifiying the relative Path
sales_dataframe = pd.read_csv(csvpath)

## Step 3: Review the DataFrame

In [4]:
# Review the first five rows of the DataFrame with the head function
sales_dataframe.head()


Unnamed: 0,FullName,Email,Address,Zip,CreditCard,SalePrice
0,Elwanda White,alyre2036@live.com,352 Lakeshore Mall,9236,5327 0855 9720 7055,84.33
1,Lyndon Elliott,arrowy1873@outlook.com,1234 Avery Plaza,1330,3717 498777 19636,879.95
2,Daisey Sellers,toucan2024@outlook.com,469 Elwood Street,7631,3758 579477 35734,907.58
3,Issac Reeves,asarin1958@gmail.com,565 Phelps Field,81168,4400 0380 4162 1622,545.88
4,Bradford Kinney,mibound1801@yandex.com,853 Mission Rock Freeway,41721,3712 263405 60178,517.49


In [5]:
# Review the last five rows of the DataFrame with the head function
# The nubmer of rows viewed can be altered
sales_dataframe.tail()


Unnamed: 0,FullName,Email,Address,Zip,CreditCard,SalePrice
95,Basil Rios,holloo1814@yahoo.com,605 Sloat Terrace,8108,5581 7245 8451 9384,937.14
96,Hipolito Rios,calefacient2049@gmail.com,1248 Fountain Boulevard,37077,2333 0264 5343 2022,645.82
97,Clayton Robertson,coner2015@live.com,230 Paper Parkway,57242,2414 3939 6501 2607,128.73
98,Tony Chandler,sexillion2061@live.com,118 Flood Plantation,70135,5581 8059 1883 1841,820.0
99,Cuc Mcgowan,dispatched1820@gmail.com,87 Louisiana Grove,70959,2678 4140 5531 3894,764.69


In [6]:
# Review both the head and the tail by incorporating the display function
display(sales_dataframe.head())
display(sales_dataframe.tail(7))


Unnamed: 0,FullName,Email,Address,Zip,CreditCard,SalePrice
0,Elwanda White,alyre2036@live.com,352 Lakeshore Mall,9236,5327 0855 9720 7055,84.33
1,Lyndon Elliott,arrowy1873@outlook.com,1234 Avery Plaza,1330,3717 498777 19636,879.95
2,Daisey Sellers,toucan2024@outlook.com,469 Elwood Street,7631,3758 579477 35734,907.58
3,Issac Reeves,asarin1958@gmail.com,565 Phelps Field,81168,4400 0380 4162 1622,545.88
4,Bradford Kinney,mibound1801@yandex.com,853 Mission Rock Freeway,41721,3712 263405 60178,517.49


Unnamed: 0,FullName,Email,Address,Zip,CreditCard,SalePrice
93,Jerrold Luna,cherish1912@yahoo.com,1383 Merrie Crescent,555,4431 5573 4636 2445,126.06
94,Jonas Larsen,going1804@yandex.com,1323 Van Dyke Arcade,7839,2672 8966 5206 2195,323.13
95,Basil Rios,holloo1814@yahoo.com,605 Sloat Terrace,8108,5581 7245 8451 9384,937.14
96,Hipolito Rios,calefacient2049@gmail.com,1248 Fountain Boulevard,37077,2333 0264 5343 2022,645.82
97,Clayton Robertson,coner2015@live.com,230 Paper Parkway,57242,2414 3939 6501 2607,128.73
98,Tony Chandler,sexillion2061@live.com,118 Flood Plantation,70135,5581 8059 1883 1841,820.0
99,Cuc Mcgowan,dispatched1820@gmail.com,87 Louisiana Grove,70959,2678 4140 5531 3894,764.69


In [7]:
# Generate a concise summary of the DataFrame with the info function
# This includes number of columns, the name of the columns, the count of each column, and data type
sales_dataframe.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   FullName    100 non-null    object 
 1   Email       100 non-null    object 
 2   Address     100 non-null    object 
 3   Zip         100 non-null    int64  
 4   CreditCard  100 non-null    object 
 5   SalePrice   100 non-null    float64
dtypes: float64(1), int64(1), object(4)
memory usage: 4.8+ KB


In [8]:
# Generate summary statistics of the DataFrame with the describe function
# This will only generate information for numeric columns
# This contains information like count, average, standard deviation, min and max values
sales_dataframe.describe()

Unnamed: 0,Zip,SalePrice
count,100.0,100.0
mean,40952.16,533.0072
std,30207.118496,275.531072
min,555.0,29.72
25%,11109.75,328.5075
50%,40033.5,536.11
75%,65834.75,767.885
max,99877.0,998.76


- - -

# Reading a CSV with no Header

In [9]:
# Without a header in the CSV, Pandas will use the first row as the header
csvpath = Path("../Resources/sales_no_header.csv")
sales_data = pd.read_csv(csvpath)
sales_data.head()

Unnamed: 0,Elwanda White,alyre2036@live.com,352 Lakeshore Mall,9236,5327 0855 9720 7055,84.33
0,Lyndon Elliott,arrowy1873@outlook.com,1234 Avery Plaza,1330,3717 498777 19636,879.95
1,Daisey Sellers,toucan2024@outlook.com,469 Elwood Street,7631,3758 579477 35734,907.58
2,Issac Reeves,asarin1958@gmail.com,565 Phelps Field,81168,4400 0380 4162 1622,545.88
3,Bradford Kinney,mibound1801@yandex.com,853 Mission Rock Freeway,41721,3712 263405 60178,517.49
4,Fermina Cobb,kingfisher2013@live.com,929 Prague Trail,16625,2351 7156 8193 8639,889.95


In [10]:
# Reading in the CSV file accouting for the lack of a header row
sales_data = pd.read_csv(csvpath, header=None)
sales_data.head()

Unnamed: 0,0,1,2,3,4,5
0,Elwanda White,alyre2036@live.com,352 Lakeshore Mall,9236,5327 0855 9720 7055,84.33
1,Lyndon Elliott,arrowy1873@outlook.com,1234 Avery Plaza,1330,3717 498777 19636,879.95
2,Daisey Sellers,toucan2024@outlook.com,469 Elwood Street,7631,3758 579477 35734,907.58
3,Issac Reeves,asarin1958@gmail.com,565 Phelps Field,81168,4400 0380 4162 1622,545.88
4,Bradford Kinney,mibound1801@yandex.com,853 Mission Rock Freeway,41721,3712 263405 60178,517.49


In [11]:
# Rewrite the column names
columns = ["Full Name", "Email", "Address", "Zip Code", "Credit Card Number", "Sale Price"]
sales_data.columns = columns
sales_data.head()

Unnamed: 0,Full Name,Email,Address,Zip Code,Credit Card Number,Sale Price
0,Elwanda White,alyre2036@live.com,352 Lakeshore Mall,9236,5327 0855 9720 7055,84.33
1,Lyndon Elliott,arrowy1873@outlook.com,1234 Avery Plaza,1330,3717 498777 19636,879.95
2,Daisey Sellers,toucan2024@outlook.com,469 Elwood Street,7631,3758 579477 35734,907.58
3,Issac Reeves,asarin1958@gmail.com,565 Phelps Field,81168,4400 0380 4162 1622,545.88
4,Bradford Kinney,mibound1801@yandex.com,853 Mission Rock Freeway,41721,3712 263405 60178,517.49


---

# Reading in a CSV file and setting the DateTime Index

In [12]:
# Reading in the CSV file without setting the Datetime Index
earnings_df = pd.read_csv(Path("../Resources/earnings.csv"))

# Review the DataFrame
earnings_df


Unnamed: 0,Date,Net Sales (mill),Net Income (mill),EPS
0,9/30/2017,229234,48351,$9.27
1,9/29/2018,265595,59531,$12.01
2,9/28/2019,260174,55256,$11.97


In [13]:
# Reading in the CSV file and setting the Datetime Index
earnings_date_index_df = pd.read_csv(
    Path("../Resources/earnings.csv"),
    index_col="Date", 
    parse_dates=True, 
    infer_datetime_format=True
)

# Review the DataFrame
earnings_date_index_df

Unnamed: 0_level_0,Net Sales (mill),Net Income (mill),EPS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-09-30,229234,48351,$9.27
2018-09-29,265595,59531,$12.01
2019-09-28,260174,55256,$11.97
