# How to import and write data using Pandas

* Load CSV, EXCEL, URL files
* View the data
* Save and export the data

In [1]:
# Load the libraries

# For reading and writing files
import pandas as pd

# For reading .xlsx files
import openpyxl

# Read CSV (comma-separated values)

In [2]:
# Read csv files

# data = pd.read_csv('file_location/filename.csv')

data_csv = pd.read_csv("Data/data.csv")

In [3]:
# Viewing the data

data_csv.head() # prints the first 5 rows of data

Unnamed: 0.1,Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,1,N102AA,2007.0,Fixed wing single engine,AVIAT AIRCRAFT INC,A-1B,1,2.0,,Reciprocating
1,2,N103AA,2007.0,Fixed wing single engine,AVIAT AIRCRAFT INC,A-1B,1,2.0,,Reciprocating
2,3,N103SY,2014.0,Fixed wing multi engine,EMBRAER S A,ERJ 170-200 LR,2,88.0,,Turbo-fan
3,4,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55.0,,Turbo-fan
4,5,N105AA,1978.0,Fixed wing single engine,CESSNA,R182,1,4.0,111.0,Reciprocating


In [4]:
# View the info

data_csv.info() # info on the data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3521 entries, 0 to 3520
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    3521 non-null   int64  
 1   tailnum       3521 non-null   object 
 2   year          3419 non-null   float64
 3   type          3521 non-null   object 
 4   manufacturer  3521 non-null   object 
 5   model         3521 non-null   object 
 6   engines       3521 non-null   int64  
 7   seats         3511 non-null   float64
 8   speed         62 non-null     float64
 9   engine        3521 non-null   object 
dtypes: float64(3), int64(2), object(5)
memory usage: 275.2+ KB


# Read Excel Files

In [6]:
# To read in Excel files that end with .xls, .xlsx...

data_excel = pd.read_excel("Data/data.xlsx", engine = "openpyxl")

In [7]:
# View the data

data_excel.tail() # prints the last 5 rows of the data

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
3516,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100.0,,Turbo-fan
3517,N997DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS AIRCRAFT CO,MD-88,2,142.0,,Turbo-fan
3518,N998AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100.0,,Turbo-fan
3519,N998DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142.0,,Turbo-jet
3520,N999DN,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142.0,,Turbo-jet


In [8]:
# Excel file ends in .xls

data_excel2 = pd.read_excel("Data/data.xls")

In [9]:
# View the data

print(data_excel2)

     tailnum    year                      type                   manufacturer  \
0     N102AA  2007.0  Fixed wing single engine             AVIAT AIRCRAFT INC   
1     N103AA  2007.0  Fixed wing single engine             AVIAT AIRCRAFT INC   
2     N103SY  2014.0   Fixed wing multi engine                    EMBRAER S A   
3     N10575  2002.0   Fixed wing multi engine                        EMBRAER   
4     N105AA  1978.0  Fixed wing single engine                         CESSNA   
...      ...     ...                       ...                            ...   
3516  N997AT  2002.0   Fixed wing multi engine                         BOEING   
3517  N997DL  1992.0   Fixed wing multi engine  MCDONNELL DOUGLAS AIRCRAFT CO   
3518  N998AT  2002.0   Fixed wing multi engine                         BOEING   
3519  N998DL  1992.0   Fixed wing multi engine  MCDONNELL DOUGLAS CORPORATION   
3520  N999DN  1992.0   Fixed wing multi engine  MCDONNELL DOUGLAS CORPORATION   

               model  engin

In [10]:
# If Excel files has multiple sheets

data_excel3 = pd.read_excel("Data/data.xls", sheet_name = "Sheet2")

In [11]:
# View the data

data_excel3.tail()

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
3516,N997AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100.0,,Turbo-fan
3517,N997DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS AIRCRAFT CO,MD-88,2,142.0,,Turbo-fan
3518,N998AT,2002.0,Fixed wing multi engine,BOEING,717-200,2,100.0,,Turbo-fan
3519,N998DL,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142.0,,Turbo-jet
3520,N999DN,1992.0,Fixed wing multi engine,MCDONNELL DOUGLAS CORPORATION,MD-88,2,142.0,,Turbo-jet


# Import files from URL

In [12]:
# For CSV files stored in URL 

data_url = pd.read_csv("https://raw.githubusercontent.com/jayleetx/nycflights/master/data-raw/planes.csv")

In [13]:
# View the data

data_url.head()

Unnamed: 0,tailnum,year,type,manufacturer,model,engines,seats,speed,engine
0,N102AA,2007.0,Fixed wing single engine,AVIAT AIRCRAFT INC,A-1B,1,2.0,,Reciprocating
1,N103AA,2007.0,Fixed wing single engine,AVIAT AIRCRAFT INC,A-1B,1,2.0,,Reciprocating
2,N103SY,2014.0,Fixed wing multi engine,EMBRAER S A,ERJ 170-200 LR,2,88.0,,Turbo-fan
3,N10575,2002.0,Fixed wing multi engine,EMBRAER,EMB-145LR,2,55.0,,Turbo-fan
4,N105AA,1978.0,Fixed wing single engine,CESSNA,R182,1,4.0,111.0,Reciprocating


# How to save and export data from Python

In [14]:
# Write/Export the csv

data_csv.to_csv("Data/new_csv_file.csv")

In [15]:
# Write/Export to Excel

data_excel.to_excel("Data/new_excel_file.xlsx", sheet_name="Sheet1")

# The END

That's all for today, let me know in the comments if you have any questions/comments, thanks for watching!

more resources: https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#