## Import Modules

In [1]:
import pandas as pd
import numpy as np

In [2]:
raw_data = {
    'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 
    'last_name': ['Miller', 'Jacobson', ".", 'Milner', 'Cooze'], 
    'age': [42, 52, 36, 24, 73], 
    'preTestScore': [4, 24, 31, ".", "."],
    'postTestScore': ["25,000", "94,000", 57, 62, 70]
}

In [3]:
df = pd.DataFrame(raw_data, columns = raw_data.keys())

In [4]:
df

Unnamed: 0,first_name,last_name,age,postTestScore,preTestScore
0,Jason,Miller,42,25000,4
1,Molly,Jacobson,52,94000,24
2,Tina,.,36,57,31
3,Jake,Milner,24,62,.
4,Amy,Cooze,73,70,.


In [5]:
dir(df)

['T',
 '_AXIS_ALIASES',
 '_AXIS_IALIASES',
 '_AXIS_LEN',
 '_AXIS_NAMES',
 '_AXIS_NUMBERS',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_AXIS_SLICEMAP',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_wrap__',
 '__bool__',
 '__bytes__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__idiv__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',


### Save dataframe as csv in the working director


In [6]:
df.to_csv('pandas_created_file.csv')

### Load a csv

In [10]:
df = pd.read_csv('pandas_created_file.csv')
df

Unnamed: 0.1,Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,0,Jason,Miller,42,4,25000
1,1,Molly,Jacobson,52,24,94000
2,2,Tina,.,36,31,57
3,3,Jake,Milner,24,.,62
4,4,Amy,Cooze,73,.,70


### Load a csv with no headers

In [11]:
df = pd.read_csv('pandas_created_file.csv', header=None)
df

Unnamed: 0,0,1,2,3,4,5
0,,first_name,last_name,age,preTestScore,postTestScore
1,0.0,Jason,Miller,42,4,25000
2,1.0,Molly,Jacobson,52,24,94000
3,2.0,Tina,.,36,31,57
4,3.0,Jake,Milner,24,.,62
5,4.0,Amy,Cooze,73,.,70


### Load a csv while specifying column names


In [12]:
df = pd.read_csv('pandas_created_file.csv', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df

Unnamed: 0,UID,First Name,Last Name,Age,Pre-Test Score,Post-Test Score
0,,first_name,last_name,age,preTestScore,postTestScore
1,0.0,Jason,Miller,42,4,25000
2,1.0,Molly,Jacobson,52,24,94000
3,2.0,Tina,.,36,31,57
4,3.0,Jake,Milner,24,.,62
5,4.0,Amy,Cooze,73,.,70


### Load a csv with setting the index column to UID


In [13]:
df = pd.read_csv('pandas_created_file.csv', index_col='UID', names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df

Unnamed: 0_level_0,First Name,Last Name,Age,Pre-Test Score,Post-Test Score
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,first_name,last_name,age,preTestScore,postTestScore
0.0,Jason,Miller,42,4,25000
1.0,Molly,Jacobson,52,24,94000
2.0,Tina,.,36,31,57
3.0,Jake,Milner,24,.,62
4.0,Amy,Cooze,73,.,70


### Load a csv while setting the index columns to First Name and Last Name


In [14]:
df = pd.read_csv('pandas_created_file.csv', index_col=['First Name', 'Last Name'], names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,UID,Age,Pre-Test Score,Post-Test Score
First Name,Last Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
first_name,last_name,,age,preTestScore,postTestScore
Jason,Miller,0.0,42,4,25000
Molly,Jacobson,1.0,52,24,94000
Tina,.,2.0,36,31,57
Jake,Milner,3.0,24,.,62
Amy,Cooze,4.0,73,.,70


### Load a csv while specifying “.” as missing values


In [15]:
df = pd.read_csv('pandas_created_file.csv', na_values=['.'])
pd.isnull(df)

Unnamed: 0.1,Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,True,False,False,False
3,False,False,False,False,True,False
4,False,False,False,False,True,False


### Load a csv while interpreting “,” in strings around numbers as thousands seperators


In [16]:
df = pd.read_csv('pandas_created_file.csv', thousands=',')
df

Unnamed: 0.1,Unnamed: 0,first_name,last_name,age,preTestScore,postTestScore
0,0,Jason,Miller,42,4,25000
1,1,Molly,Jacobson,52,24,94000
2,2,Tina,.,36,31,57
3,3,Jake,Milner,24,.,62
4,4,Amy,Cooze,73,.,70


In [7]:
help(pd)

Help on package pandas:

NAME
    pandas

FILE
    c:\python27\lib\site-packages\pandas\__init__.py

DESCRIPTION
    pandas - a powerful data analysis and manipulation library for Python
    
    **pandas** is a Python package providing fast, flexible, and expressive data
    structures designed to make working with "relational" or "labeled" data both
    easy and intuitive. It aims to be the fundamental high-level building block for
    doing practical, **real world** data analysis in Python. Additionally, it has
    the broader goal of becoming **the most powerful and flexible open source data
    analysis / manipulation tool available in any language**. It is already well on
    its way toward this goal.
    
    Main Features
    -------------
    Here are just a few of the things that pandas does well:
    
      - Easy handling of missing data in floating point as well as non-floating
        point data.
      - Size mutability: columns can be inserted and deleted from DataFrame 

In [8]:
help(pd.read_csv)

Help on function read_csv in module pandas.io.parsers:

read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal='.', lineterminator=None, quotechar='"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, doublequote=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)
    Read CSV (comma-separated) file into DataFrame
    
    Also supports optionally iterat