# Bonus: Temperature Analysis I

In [1]:
import pandas as pd
from datetime import datetime as dt

In [2]:
# "tobs" is "temperature observations"
df = pd.read_csv('Resources/hawaii_measurements.csv')
df.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [3]:
# Convert the date column format from string to datetime

df["Date"] =  pd.to_datetime(df['date'],format='%Y-%m-%d')
df

Unnamed: 0,station,date,prcp,tobs,Date
0,USC00519397,2010-01-01,0.08,65,2010-01-01
1,USC00519397,2010-01-02,0.00,63,2010-01-02
2,USC00519397,2010-01-03,0.00,74,2010-01-03
3,USC00519397,2010-01-04,0.00,76,2010-01-04
4,USC00519397,2010-01-06,,73,2010-01-06
...,...,...,...,...,...
19545,USC00516128,2017-08-19,0.09,71,2017-08-19
19546,USC00516128,2017-08-20,,78,2017-08-20
19547,USC00516128,2017-08-21,0.56,76,2017-08-21
19548,USC00516128,2017-08-22,0.50,76,2017-08-22


In [4]:
# Set the date column as the DataFrame index
df.set_index('Date',inplace=True)
df

Unnamed: 0_level_0,station,date,prcp,tobs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-01,USC00519397,2010-01-01,0.08,65
2010-01-02,USC00519397,2010-01-02,0.00,63
2010-01-03,USC00519397,2010-01-03,0.00,74
2010-01-04,USC00519397,2010-01-04,0.00,76
2010-01-06,USC00519397,2010-01-06,,73
...,...,...,...,...
2017-08-19,USC00516128,2017-08-19,0.09,71
2017-08-20,USC00516128,2017-08-20,,78
2017-08-21,USC00516128,2017-08-21,0.56,76
2017-08-22,USC00516128,2017-08-22,0.50,76


In [5]:
# Drop the date column

del df["date"]
df

#df=df.drop(colums='date')
#df

Unnamed: 0_level_0,station,prcp,tobs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-01,USC00519397,0.08,65
2010-01-02,USC00519397,0.00,63
2010-01-03,USC00519397,0.00,74
2010-01-04,USC00519397,0.00,76
2010-01-06,USC00519397,,73
...,...,...,...
2017-08-19,USC00516128,0.09,71
2017-08-20,USC00516128,,78
2017-08-21,USC00516128,0.56,76
2017-08-22,USC00516128,0.50,76


### Compare June and December data across all years 

In [15]:
from scipy import stats
from numpy import mean, std

In [16]:
df=df.reset_index()
df

Unnamed: 0,index,Date,station,prcp,tobs
0,0,2010-01-01,USC00519397,0.08,65
1,1,2010-01-02,USC00519397,0.00,63
2,2,2010-01-03,USC00519397,0.00,74
3,3,2010-01-04,USC00519397,0.00,76
4,4,2010-01-06,USC00519397,,73
...,...,...,...,...,...
19545,19545,2017-08-19,USC00516128,0.09,71
19546,19546,2017-08-20,USC00516128,,78
19547,19547,2017-08-21,USC00516128,0.56,76
19548,19548,2017-08-22,USC00516128,0.50,76


In [17]:
# Filter data for desired months

df["Date"].dtype

dtype('<M8[ns]')

In [18]:
df_jun_dec=df.loc[(df["Date"].dt.month==6) | (df["Date"].dt.month==12) ,["Date","tobs"]]
df_jun_dec.head()

Unnamed: 0,Date,tobs
133,2010-06-01,78
134,2010-06-02,76
135,2010-06-03,78
136,2010-06-04,76
137,2010-06-05,77


In [25]:
# Identify the average temperature for June
jun_df=df_jun_dec.loc[df_jun_dec["Date"].dt.month==6]
jun_temp=jun_df["tobs"]
jun_temp_list=jun_temp.tolist()

m_j=mean(jun_temp_list)
std_j=std(jun_temp_list)

print(f"Average Temperature: {m_j} , Standard Deviation: {std_j}")

Average Temperature: 74.94411764705882 , Standard Deviation: 3.2564588468846476


In [24]:
# Identify the average temperature for December
dec_df=df_jun_dec.loc[df_jun_dec["Date"].dt.month==12]
dec_temp=dec_df["tobs"]
dec_temp_list=dec_temp.tolist()

m_d=mean(dec_temp_list)
std_d=std(dec_temp_list)

print(f"Average Temperature: {m_d} , Standard Deviation: {std_d}")

Average Temperature: 71.04152933421226 , Standard Deviation: 3.744684974507512


In [13]:
# Create collections of temperature data

jun_list=[]

for jun in jun_temp_list:
    if type(jun)==int:
        jun_list.append(jun)
        
dec_list=[]

for dec in dec_temp_list:
    if type(dec)==int:
        dec_list.append(dec)

In [14]:
# Run paired t-test
stats.ttest_ind(jun_list, dec_list)

Ttest_indResult(statistic=31.60372399000329, pvalue=3.9025129038616655e-191)

### Analysis

In [None]:
# pvalue is significant that's why june and december months 
# have significantly different weather condition.