## Dataset overview

In [2]:
import pandas as pd
data = pd.read_csv("thanksgiving.csv", encoding="Latin-1")
data.head()

Unnamed: 0,RespondentID,Do you celebrate Thanksgiving?,What is typically the main dish at your Thanksgiving dinner?,What is typically the main dish at your Thanksgiving dinner? - Other (please specify),How is the main dish typically cooked?,How is the main dish typically cooked? - Other (please specify),What kind of stuffing/dressing do you typically have?,What kind of stuffing/dressing do you typically have? - Other (please specify),What type of cranberry saucedo you typically have?,What type of cranberry saucedo you typically have? - Other (please specify),...,Have you ever tried to meet up with hometown friends on Thanksgiving night?,"Have you ever attended a ""Friendsgiving?""",Will you shop any Black Friday sales on Thanksgiving Day?,Do you work in retail?,Will you employer make you work on Black Friday?,How would you describe where you live?,Age,What is your gender?,How much total combined money did all members of your HOUSEHOLD earn last year?,US Region
0,4337954960,Yes,Turkey,,Baked,,Bread-based,,,,...,Yes,No,No,No,,Suburban,18 - 29,Male,"$75,000 to $99,999",Middle Atlantic
1,4337951949,Yes,Turkey,,Baked,,Bread-based,,Other (please specify),Homemade cranberry gelatin ring,...,No,No,Yes,No,,Rural,18 - 29,Female,"$50,000 to $74,999",East South Central
2,4337935621,Yes,Turkey,,Roasted,,Rice-based,,Homemade,,...,Yes,Yes,Yes,No,,Suburban,18 - 29,Male,"$0 to $9,999",Mountain
3,4337933040,Yes,Turkey,,Baked,,Bread-based,,Homemade,,...,Yes,No,No,No,,Urban,30 - 44,Male,"$200,000 and up",Pacific
4,4337931983,Yes,Tofurkey,,Baked,,Bread-based,,Canned,,...,Yes,No,No,No,,Urban,30 - 44,Male,"$100,000 to $124,999",Pacific


In [7]:
data.columns

Index(['RespondentID', 'Do you celebrate Thanksgiving?',
       'What is typically the main dish at your Thanksgiving dinner?',
       'What is typically the main dish at your Thanksgiving dinner? - Other (please specify)',
       'How is the main dish typically cooked?',
       'How is the main dish typically cooked? - Other (please specify)',
       'What kind of stuffing/dressing do you typically have?',
       'What kind of stuffing/dressing do you typically have? - Other (please specify)',
       'What type of cranberry saucedo you typically have?',
       'What type of cranberry saucedo you typically have? - Other (please specify)',
       'Do you typically have gravy?',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Brussel sprouts',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Carrots',
       'Which of these side dishes aretypically served

## Finding the number of people that celebrate Thanksgiving

In [8]:
data["Do you celebrate Thanksgiving?"].value_counts()

Yes    980
No      78
Name: Do you celebrate Thanksgiving?, dtype: int64

## Filtering people that celebrate Thanksgiving

In [9]:
celebrated = (data["Do you celebrate Thanksgiving?"] == "Yes")
data_celebrated = data[celebrated]
data_celebrated["Do you celebrate Thanksgiving?"].value_counts()

Yes    980
Name: Do you celebrate Thanksgiving?, dtype: int64

## Exploring the main dish

In [10]:
data["What is typically the main dish at your Thanksgiving dinner?"].value_counts()

Turkey                    859
Other (please specify)     35
Ham/Pork                   29
Tofurkey                   20
Chicken                    12
Roast beef                 11
I don't know                5
Turducken                   3
Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64

In [11]:
data[data["What is typically the main dish at your Thanksgiving dinner?"] == "Tofurkey"]["Do you typically have gravy?"].value_counts()

Yes    12
No      8
Name: Do you typically have gravy?, dtype: int64

## Finding the number of people that ate pies

In [13]:
apple_isnull = data_celebrated["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple"].isnull()
pumpkin_isnull = data_celebrated["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin"].isnull()
pecan_isnull = data_celebrated["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pecan"].isnull()

ate_pies = apple_isnull & pumpkin_isnull & pecan_isnull
ate_pies.value_counts()

False    876
True     104
dtype: int64

## Finding the distribution of age

In [14]:
data["Age"].value_counts()

45 - 59    286
60+        264
30 - 44    259
18 - 29    216
Name: Age, dtype: int64

In [15]:
def str_to_int(string):
    if pd.isnull(string) == True:
        return None
    string_split = string.split(" ")[0]
    string_replace = string_split.replace("+", "")
    string_int = int(string_replace)
    return string_int

data["int_age"] = data["Age"].apply(str_to_int)
data["int_age"].describe()

count    1025.000000
mean       39.383415
std        15.398493
min        18.000000
25%        30.000000
50%        45.000000
75%        60.000000
max        60.000000
Name: int_age, dtype: float64

## Finding the distribution of income

In [16]:
data["How much total combined money did all members of your HOUSEHOLD earn last year?"].value_counts()

$25,000 to $49,999      180
Prefer not to answer    136
$50,000 to $74,999      135
$75,000 to $99,999      133
$100,000 to $124,999    111
$200,000 and up          80
$10,000 to $24,999       68
$0 to $9,999             66
$125,000 to $149,999     49
$150,000 to $174,999     40
$175,000 to $199,999     27
Name: How much total combined money did all members of your HOUSEHOLD earn last year?, dtype: int64

In [17]:
def string_to_income(string):
    if pd.isnull(string) == True:
        return None
    string_split = string.split(" ")[0]
    if string_split == "Prefer":
        return None
    string_replace = string_split.replace(",", "")
    string_replace = string_replace.replace("$", "")
    string_int = int(string_replace)
    return string_int

data["int_income"] = data["How much total combined money did all members of your HOUSEHOLD earn last year?"].apply(string_to_income)
data["int_income"].describe()

count       889.000000
mean      74077.615298
std       59360.742902
min           0.000000
25%       25000.000000
50%       50000.000000
75%      100000.000000
max      200000.000000
Name: int_income, dtype: float64

## Finding the correlation between travel distance and income

In [18]:
data_selected_under150k = data[data["int_income"] < 150000]
travel_under150k = data_selected_under150k["How far will you travel for Thanksgiving?"]
print(travel_under150k.value_counts())

Thanksgiving is happening at my home--I won't travel at all                         281
Thanksgiving is local--it will take place in the town I live in                     203
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    150
Thanksgiving is out of town and far away--I have to drive several hours or fly       55
Name: How far will you travel for Thanksgiving?, dtype: int64


In [19]:
data_selected_above150k = data[data["int_income"] > 150000]
travel_above150k = data_selected_above150k["How far will you travel for Thanksgiving?"]
print(travel_above150k.value_counts())

Thanksgiving is happening at my home--I won't travel at all                         49
Thanksgiving is local--it will take place in the town I live in                     25
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    16
Thanksgiving is out of town and far away--I have to drive several hours or fly      12
Name: How far will you travel for Thanksgiving?, dtype: int64


## Finding links between  friendship, age and income

In [14]:
pivot = data.pivot_table(index = "Have you ever tried to meet up with hometown friends on Thanksgiving night?", 
                         columns = 'Have you ever attended a "Friendsgiving?"', 
                         values = "int_age")
pivot

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,42.283702,37.010526
Yes,41.47541,33.976744


In [15]:
pivot2 = data.pivot_table(index="Have you ever tried to meet up with hometown friends on Thanksgiving night?", 
                        columns='Have you ever attended a "Friendsgiving?"',
                        values="int_income")
pivot2

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,78914.549654,72894.736842
Yes,78750.0,66019.736842
