In [1]:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [3]:
trees = pd.read_csv('/Users/yamini/nyc-data-mining/data/2015StreetTreesCensus_TREES.csv')
temps = pd.read_csv('/Users/yamini/nyc-data-mining/data/Hyperlocal_Temperature_Monitoring.csv')

In [4]:
# Converting datatypes for all float columns to reduce memory usage and enable comparisons
dtypes = {
    'Latitude': float,
    'Longitude': float,
    'AirTemp': float,
    'Sensor.ID': str,
    'Day': 'datetime64[ns]',
    'Year': int,
    'Install.Type': str,
    'Borough': str,
    'ntacode': str
}

# Convert column data types using the dictionary
temps = temps.astype(dtypes)

In [5]:
# Define dictionary of column names and data types
dtypes = {
    'created_at': 'datetime64[ns]',
    'tree_id': 'int32',
    'block_id': 'int32',
    'tree_dbh': 'int32',
    'stump_diam': 'int32',
    'curb_loc': 'str',
    'status': 'str',
    'health': 'str',
    'spc_latin': 'str',
    'spc_common': 'str',
    'steward': 'str',
    'guards': 'str',
    'sidewalk': 'str',
    'user_type': 'str',
    'problems': 'str',
    'root_stone': 'str',
    'root_grate': 'str',
    'root_other': 'str',
    'trnk_wire': 'str',
    'trnk_light': 'str',
    'trnk_other': 'str',
    'brnch_ligh': 'str',
    'brnch_shoe': 'str',
    'brnch_othe': 'str',
    'address': 'str',
    'zipcode': 'int32',
    'zip_city': 'str',
    'cb_num': 'int32',
    'borocode': 'int32',
    'boroname': 'str',
    'cncldist': 'int32',
    'st_assem': 'int32',
    'st_senate': 'int32',
    'nta': 'str',
    'nta_name': 'str',
    'boro_ct': 'int32',
    'state': 'str',
    'Latitude': 'float32',
    'longitude': 'float32',
    'x_sp': 'float32',
    'y_sp': 'float32'
}

# Convert datatypes using dictionary
trees = trees.astype(dtypes)

In [6]:
# Dropping spatial columns (since we are not mapping using GIS)
trees.drop(columns=['block_id', 'x_sp', 'y_sp' ], inplace=True)

In [7]:
# sampling the temp data
# keeping only hour 0, 6, 12, 18 for each day for each location
temps_sampled = temps[temps['Hour'].isin([0, 6, 12, 18])]
temps_sampled.head()

Unnamed: 0,Sensor.ID,AirTemp,Day,Hour,Latitude,Longitude,Year,Install.Type,Borough,ntacode
5,Bk-BR_01,65.9655,2018-06-15,6,40.666205,-73.91691,2018,Street Tree,Brooklyn,BK81
11,Bk-BR_01,73.428667,2018-06-15,12,40.666205,-73.91691,2018,Street Tree,Brooklyn,BK81
17,Bk-BR_01,78.066,2018-06-15,18,40.666205,-73.91691,2018,Street Tree,Brooklyn,BK81
23,Bk-BR_01,68.077833,2018-06-16,0,40.666205,-73.91691,2018,Street Tree,Brooklyn,BK81
29,Bk-BR_01,65.9865,2018-06-16,6,40.666205,-73.91691,2018,Street Tree,Brooklyn,BK81


In [10]:
temps['nta'] = temps['ntacode']
temps.drop(columns=['ntacode'], inplace=True)

In [11]:
relevant_nta_codes = temps['nta'].unique()
trees_nta_filtered = trees[trees['nta'].isin(relevant_nta_codes)]

In [13]:
trees_nta_filtered

Unnamed: 0,created_at,tree_id,the_geom,tree_dbh,stump_diam,curb_loc,status,health,spc_latin,spc_common,steward,guards,sidewalk,user_type,problems,root_stone,root_grate,root_other,trnk_wire,trnk_light,trnk_other,brnch_ligh,brnch_shoe,brnch_othe,address,zipcode,zip_city,cb_num,borocode,boroname,cncldist,st_assem,st_senate,nta,nta_name,boro_ct,state,Latitude,longitude
4,2015-08-30,189565,POINT (-73.97597938483258 40.66677775537875),21,0,OnCurb,Alive,Good,Tilia americana,American linden,,,Damage,Volunteer,Stones,Yes,No,No,No,No,No,No,No,No,603 6 STREET,11215,Brooklyn,306,3,Brooklyn,39,44,21,BK37,Park Slope-Gowanus,3016500,New York,40.666779,-73.975983
14,2015-08-31,192998,POINT (-73.92113023015189 40.664317398984245),30,0,OnCurb,Alive,Fair,Platanus x acerifolia,London planetree,1or2,,Damage,Volunteer,"Stones,BranchOther",Yes,No,No,No,No,No,No,No,Yes,2126 UNION STREET,11212,Brooklyn,316,3,Brooklyn,41,55,20,BK81,Brownsville,3090000,New York,40.664318,-73.921127
20,2015-08-31,193093,POINT (-73.98588943758692 40.66123875468547),11,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,,,NoDamage,Volunteer,,No,No,No,No,No,No,No,No,No,367 PROSPECT AVENUE,11215,Brooklyn,307,3,Brooklyn,38,44,21,BK37,Park Slope-Gowanus,3014900,New York,40.661240,-73.985893
24,2015-08-30,189700,POINT (-73.97218377293484 40.67483918746511),2,0,OnCurb,Alive,Good,Quercus phellos,willow oak,3or4,,NoDamage,Volunteer,,No,No,No,No,No,No,No,No,No,26 8 AVENUE,11217,Brooklyn,306,3,Brooklyn,39,52,21,BK37,Park Slope-Gowanus,3015900,New York,40.674839,-73.972183
30,2015-08-30,189164,POINT (-73.98670281663865 40.668826466756094),7,0,OnCurb,Alive,Good,Gleditsia triacanthos var. inermis,honeylocust,,,NoDamage,Volunteer,,No,No,No,No,No,No,No,No,No,456 5 AVENUE,11215,Brooklyn,306,3,Brooklyn,39,52,21,BK37,Park Slope-Gowanus,3013900,New York,40.668827,-73.986702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
683755,2015-08-30,189595,POINT (-73.97530940880509 40.67168450833897),20,0,OnCurb,Alive,Good,Ginkgo biloba,ginkgo,1or2,Helpful,Damage,Volunteer,Stones,Yes,No,No,No,No,No,No,No,No,31 FISKE PLACE,11215,Brooklyn,306,3,Brooklyn,39,52,21,BK37,Park Slope-Gowanus,3015700,New York,40.671684,-73.975311
683777,2015-08-28,181723,POINT (-73.93405265616676 40.66877030549942),12,0,OnCurb,Alive,Good,Ulmus americana,American elm,1or2,,NoDamage,TreesCount Staff,,No,No,No,No,No,No,No,No,No,1040 EASTERN PARKWAY,11213,Brooklyn,309,3,Brooklyn,35,43,20,BK61,Crown Heights North,3035300,New York,40.668770,-73.934052
683780,2015-09-01,195173,POINT (-73.91365742077433 40.74612239615302),15,0,OnCurb,Alive,Fair,Platanus x acerifolia,London planetree,,Helpful,NoDamage,TreesCount Staff,,No,No,No,No,No,No,No,No,No,50-017 SKILLMAN AVENUE,11377,Woodside,402,4,Queens,26,30,12,QN31,Hunters Point-Sunnyside-West Maspeth,4016900,New York,40.746124,-73.913658
683782,2015-08-29,184210,POINT (-73.89919195639256 40.85457006199844),3,0,OnCurb,Alive,Good,Quercus palustris,pin oak,1or2,,NoDamage,Volunteer,,No,No,No,No,No,No,No,No,No,2185 VALENTINE AVENUE,10457,Bronx,205,2,Bronx,15,86,33,BX41,Mount Hope,2038100,New York,40.854568,-73.899193


In [14]:
pd.merge(trees_nta_filtered, temps, on='nta', how='left').to_csv('merged_data.csv')

: 

: 