In [1]:
import numpy as np
import pandas as pd

data_df = pd.read_csv("JEOPARDY_DATA.csv")
data_df.head()

Unnamed: 0,Show Number,Air Date,Round,Category,Value,Question,Answer
0,4680,12/31/2004,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was ...",Copernicus
1,4680,12/31/2004,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe
2,4680,12/31/2004,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,The city of Yuma in this state has a record av...,Arizona
3,4680,12/31/2004,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's
4,4680,12/31/2004,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Co...",John Adams


In [2]:
data_df.columns

Index(['Show Number', ' Air Date', ' Round', ' Category', ' Value',
       ' Question', ' Answer'],
      dtype='object')

In [3]:
data_df.columns = data_df.columns.str.replace(" ","")
data_df.columns

Index(['ShowNumber', 'AirDate', 'Round', 'Category', 'Value', 'Question',
       'Answer'],
      dtype='object')

In [4]:
data_df.dtypes

ShowNumber     int64
AirDate       object
Round         object
Category      object
Value         object
Question      object
Answer        object
dtype: object

In [6]:
#m/d/Y is the order the data is currently in before we manipulate it
data_df['AirDate'] = pd.to_datetime(data_df['AirDate'], format='%m/%d/%Y')
data_df.head()

Unnamed: 0,ShowNumber,AirDate,Round,Category,Value,Question,Answer
0,4680,2004-12-31,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was ...",Copernicus
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,The city of Yuma in this state has a record av...,Arizona
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Co...",John Adams


In [7]:
data_df.dtypes

ShowNumber             int64
AirDate       datetime64[ns]
Round                 object
Category              object
Value                 object
Question              object
Answer                object
dtype: object

In [8]:
# assign is used to assign a new column
data_df = data_df.assign(month = lambda x : x['AirDate'].dt.month)
data_df.head()

Unnamed: 0,ShowNumber,AirDate,Round,Category,Value,Question,Answer,month
0,4680,2004-12-31,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was ...",Copernicus,12
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe,12
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,The city of Yuma in this state has a record av...,Arizona,12
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's,12
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Co...",John Adams,12


In [11]:
#axis = 1 is referencing the headers
data_df['month2'] = data_df.apply(lambda x : x['AirDate'].month, axis=1)
data_df.head()

Unnamed: 0,ShowNumber,AirDate,Round,Category,Value,Question,Answer,month,month2
0,4680,2004-12-31,Jeopardy!,HISTORY,$200,"For the last 8 years of his life, Galileo was ...",Copernicus,12,12
1,4680,2004-12-31,Jeopardy!,ESPN's TOP 10 ALL-TIME ATHLETES,$200,No. 2: 1912 Olympian; football star at Carlisl...,Jim Thorpe,12,12
2,4680,2004-12-31,Jeopardy!,EVERYBODY TALKS ABOUT IT...,$200,The city of Yuma in this state has a record av...,Arizona,12,12
3,4680,2004-12-31,Jeopardy!,THE COMPANY LINE,$200,"In 1963, live on ""The Art Linkletter Show"", th...",McDonald's,12,12
4,4680,2004-12-31,Jeopardy!,EPITAPHS & TRIBUTES,$200,"Signer of the Dec. of Indep., framer of the Co...",John Adams,12,12


In [12]:
# Filters 
value_list = data_df['Value'].tolist()
value_list = value_list[0:50]
value_list

['$200 ',
 '$200 ',
 '$200 ',
 '$200 ',
 '$200 ',
 '$200 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$2,000 ',
 '$800 ',
 '$1,000 ',
 '$1,000 ',
 '$1,000 ',
 '$1,000 ',
 '$1,000 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$400 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$1,200 ',
 '$2,000 ',
 '$1,200 ',
 '$1,200 ',
 '$1,200 ',
 '$1,600 ',
 '$1,600 ',
 '$1,600 ',
 '$1,600 ',
 '$1,600 ']

In [13]:
# we want to work with this data but we want to preserve
# the original values
filtered_list = list(filter(lambda num: int(num.replace("$","")
                                            .replace(" ","").replace(",",""))>400,
                            value_list))

filtered_list

['$600 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$600 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$2,000 ',
 '$800 ',
 '$1,000 ',
 '$1,000 ',
 '$1,000 ',
 '$1,000 ',
 '$1,000 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$800 ',
 '$1,200 ',
 '$2,000 ',
 '$1,200 ',
 '$1,200 ',
 '$1,200 ',
 '$1,600 ',
 '$1,600 ',
 '$1,600 ',
 '$1,600 ',
 '$1,600 ']