## Import Modules

In [3]:
import pandas as pd
import numpy as np

In [4]:
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
        'last_name': ['Miller', 'Jacobson', ".", 'Milner', 'Cooze'], 
        'age': [42, 52, 36, 24, 73], 
        'preTestScore': [4, 24, 31, ".", "."],
        'postTestScore': ["25,000", "94,000", 57, 62, 70]}

In [5]:
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore'])

In [7]:
df

Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,Jason,Miller,42,4,25000
1,Molly,Jacobson,52,24,94000
2,Tina,.,36,31,57
3,Jake,Milner,24,.,62
4,Amy,Cooze,73,.,70


### Save dataframe as csv in the working director


In [8]:
df.to_csv('pandas_created_file.csv')

### Load a csv

In [10]:
df = pd.read_csv('pandas_created_file.csv')
df

Unnamed: 0.1,Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,0,Jason,Miller,42,4,25000
1,1,Molly,Jacobson,52,24,94000
2,2,Tina,.,36,31,57
3,3,Jake,Milner,24,.,62
4,4,Amy,Cooze,73,.,70


### Load a csv with no headers

In [11]:
df = pd.read_csv('pandas_created_file.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5
0,,first_name,last_name,age,preTestScore,postTestScore
1,0.0,Jason,Miller,42,4,25000
2,1.0,Molly,Jacobson,52,24,94000
3,2.0,Tina,.,36,31,57
4,3.0,Jake,Milner,24,.,62
5,4.0,Amy,Cooze,73,.,70


### Load a csv while specifying column names


In [12]:
df = pd.read_csv('pandas_created_file.csv', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df

Unnamed: 0,UID,First Name,Last Name,Age,Pre-Test Score,Post-Test Score
0,,first_name,last_name,age,preTestScore,postTestScore
1,0.0,Jason,Miller,42,4,25000
2,1.0,Molly,Jacobson,52,24,94000
3,2.0,Tina,.,36,31,57
4,3.0,Jake,Milner,24,.,62
5,4.0,Amy,Cooze,73,.,70


### Load a csv with setting the index column to UID


In [13]:
df = pd.read_csv('pandas_created_file.csv', index_col='UID', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df

Unnamed: 0_level_0,First Name,Last Name,Age,Pre-Test Score,Post-Test Score
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,first_name,last_name,age,preTestScore,postTestScore
0.0,Jason,Miller,42,4,25000
1.0,Molly,Jacobson,52,24,94000
2.0,Tina,.,36,31,57
3.0,Jake,Milner,24,.,62
4.0,Amy,Cooze,73,.,70


### Load a csv while setting the index columns to First Name and Last Name


In [14]:
df = pd.read_csv('pandas_created_file.csv', index_col=['First Name', 'Last Name'], names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,UID,Age,Pre-Test Score,Post-Test Score
First Name,Last Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
first_name,last_name,,age,preTestScore,postTestScore
Jason,Miller,0.0,42,4,25000
Molly,Jacobson,1.0,52,24,94000
Tina,.,2.0,36,31,57
Jake,Milner,3.0,24,.,62
Amy,Cooze,4.0,73,.,70


### Load a csv while specifying “.” as missing values


In [15]:
df = pd.read_csv('pandas_created_file.csv', na_values=['.'])
pd.isnull(df)

Unnamed: 0.1,Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,True,False,False,False
3,False,False,False,False,True,False
4,False,False,False,False,True,False


### Load a csv while interpreting “,” in strings around numbers as thousands seperators


In [16]:
df = pd.read_csv('pandas_created_file.csv', thousands=',')
df

Unnamed: 0.1,Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,0,Jason,Miller,42,4,25000
1,1,Molly,Jacobson,52,24,94000
2,2,Tina,.,36,31,57
3,3,Jake,Milner,24,.,62
4,4,Amy,Cooze,73,.,70
