# Pandas Crash Course

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('salaries.csv')

In [3]:
print(df)

     Name  Salary  Age
0    John   50000   34
1   Sally  120000   45
2  Alyssa   80000   27


#### Select columns with a bracket call:

In [4]:
print(df['Name'])

0      John
1     Sally
2    Alyssa
Name: Name, dtype: object


In [5]:
print(df['Salary'])

0     50000
1    120000
2     80000
Name: Salary, dtype: int64


#### Select multiple columns

In [6]:
print(df[['Name','Salary']])

     Name  Salary
0    John   50000
1   Sally  120000
2  Alyssa   80000


#### Similar to NumPy, you can create calls of min(), max(), mean(), etc... on a pandas dataframe.

In [7]:
print(df['Age'].mean())

35.333333333333336


#### Just like Numpy, we can use conditional filtering to select rows that meet certain critera.

In [8]:
ser_of_bool = df['Age'] > 30
print(ser_of_bool)

0     True
1     True
2    False
Name: Age, dtype: bool


In [9]:
df[df['Age'] > 30]

Unnamed: 0,Name,Salary,Age
0,John,50000,34
1,Sally,120000,45


#### There are lots of other commands you can do with pandas!

In [10]:
df['Age'].unique() # list of unique values for Age

array([34, 45, 27], dtype=int64)

In [11]:
df['Age'].nunique() # number of unqiue values

3

In [12]:
df.info() # General info about your dataframe

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
Name      3 non-null object
Salary    3 non-null int64
Age       3 non-null int64
dtypes: int64(2), object(1)
memory usage: 152.0+ bytes


In [13]:
df.describe() # Statistics about your dataframe

Unnamed: 0,Salary,Age
count,3.0,3.0
mean,83333.333333,35.333333
std,35118.845843,9.073772
min,50000.0,27.0
25%,65000.0,30.5
50%,80000.0,34.0
75%,100000.0,39.5
max,120000.0,45.0


In [14]:
df.columns # Grab a list of all columns

Index(['Name', 'Salary', 'Age'], dtype='object')

In [15]:
df.index # Create an index list

RangeIndex(start=0, stop=3, step=1)

#### You can convert a numpy matrix to a dataframe with:

In [16]:
mat = np.arange(50).reshape(5,10)
mat

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])