# Boston Weather

# Setup
First, import what we need.

In [49]:
import math
import altair as alt
from altair import datum
import pandas as pd
import numpy as np
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from vega_datasets import data

Then, check versions of installed packages.

In [50]:
'ipywidgets ' + widgets.__version__ + ', altair ' + alt.__version__ + ', pandas ' + pd.__version__ + ', numpy ' + np.__version__

'ipywidgets 8.0.4, altair 5.1.2, pandas 1.5.3, numpy 1.24.3'

Allow Altair to use more than 5000 rows

In [51]:
alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

## Weather

In [52]:
bostonWeatherDF = pd.read_csv('boston-weather-mid.csv')
bostonWeatherDF

Unnamed: 0,year,month,maxTemp,minTemp,meanTemp,precipitation,CDDBase65
0,1872,Jan,,,,2.0,
1,1872,Feb,,,,2.0,
2,1872,Mar,48.0,-8.0,18.0,4.0,0.0
3,1872,Apr,85.0,28.0,38.0,1.0,1.0
4,1872,May,85.0,40.0,51.0,3.0,16.0
...,...,...,...,...,...,...,...
1816,2023,May,86.0,42.0,51.0,2.0,39.0
1817,2023,Jun,84.0,47.0,59.0,3.0,106.0
1818,2023,Jul,91.0,61.0,68.0,10.0,332.0
1819,2023,Aug,88.0,59.0,65.0,6.0,216.0


Data from [Weather.gov NOWData](https://www.weather.gov/wrh/Climate?wfo=box). Extracted monthly summary data `por`–`2023`, replaced "T" and "M" with blank for the missing data, loaded in PowerQuery to unpivot months then to pivot measure.

In [53]:
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
yearsInData=[bostonWeatherDF['year'].min(), bostonWeatherDF['year'].max()]
tempsInData=[bostonWeatherDF['meanTemp'].min(), bostonWeatherDF['meanTemp'].max()]

In [54]:
@interact(years=widgets.IntRangeSlider(
    value=yearsInData,
    min=yearsInData[0],
    max=yearsInData[1],
    step=1,
    description='Years:'
))
def tempMonthChangeOverTimeChart(years):    
    return alt.Chart(
        bostonWeatherDF, 
        title=alt.Title(
            'How the Boston temperatures for each month have changed over time',
            subtitle='A comparison of ' + str(years[0]) + '–' + str(years[1])
        )
    ).transform_filter(
        ((datum.year >= years[0]) & (datum.year <= years[1]))
    ).encode(
        color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred', domain=yearsInData), sort='ascending')
    ).mark_line(point=True).encode(
        x=alt.X('month:O').sort(monthOrder),
        y=alt.Y('meanTemp:Q',  title='mean monthly temp in ⁰F', scale=alt.Scale(domain=tempsInData)),
        tooltip=['year', 'meanTemp'],
        opacity=alt.value(.3)
    ).properties(
        width=600,
        height=600
    )

interactive(children=(IntRangeSlider(value=(1872, 2023), description='Years:', max=2023, min=1872), Output()),…

In [13]:
@interact(month=widgets.Dropdown(
    options=monthOrder,
    value='Aug',
    description='Month:',
))
def tempYearChangeOverTimeChart(month):

    brush = alt.selection_interval(encodings=['x'])
    
    colorChart = alt.Chart(
        title=alt.Title(
            'How the Boston ' + month + ' temperature has changed over time',
            subtitle='A comparison of ' + str(yearsInData[0]) + '–' + str(yearsInData[1]) + ' with interactive selection mean across years'
        )
    ).encode(
        color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred')),
    ).add_params(
        brush
    )

    points = colorChart.mark_point().encode(
        x=alt.X('year:Q', scale=alt.Scale(zero=False)),
        y=alt.Y('meanTemp:Q', title='mean monthly temp in ⁰F', scale=alt.Scale(zero=False)),
        tooltip=['year', 'maxTemp', 'meanTemp', 'minTemp', 'CDDBase65']        
    )

    meanLine = alt.Chart().mark_rule(color='firebrick').encode(
        y=alt.Y('mean(meanTemp):Q', scale=alt.Scale(zero=False)),
        size=alt.SizeValue(3)
    ).transform_filter(
        brush
    )

    tempScatter = alt.layer(points, meanLine, data=bostonWeatherDF).transform_filter(
        (datum.month == month)
    )
    
    return tempScatter.properties(
        width=600,
        height=300
    )

interactive(children=(Dropdown(description='Month:', index=7, options=('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun…

In [14]:
@interact(month=widgets.Dropdown(
    options=monthOrder,
    value='Aug',
    description='Month:',
))
def tempScatter(month):

    
    colorChart = alt.Chart(
        bostonWeatherDF, 
        title=alt.Title(
            'Cooling Degree Days Base 65 score for ' + month + ' over the years',
            subtitle="A comparison of 1872–2023"
        )
    ).transform_filter(
        (datum.month == month)
    ).encode(
        color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred'))
    )
    
    lines = colorChart.mark_point().encode(
        x=alt.X('year:Q', scale=alt.Scale(zero=False)),
        y=alt.Y('CDDBase65:Q', scale=alt.Scale(zero=False)),
        tooltip=['year', 'maxTemp', 'meanTemp', 'minTemp', 'CDDBase65']        
    )

    tempScatter=lines.properties(
        width=600,
        height=300
    )

    return tempScatter

interactive(children=(Dropdown(description='Month:', index=7, options=('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun…

In [19]:
@interact(month=widgets.Dropdown(
    options=list(set(bostonWeatherDF['month'].tolist())),
    value='Jan',
    description='Month:',
))

 

def scatterPlot(month):

    # filtering to the month that's set by the widget
    month = str(month)
    vis2_df = bostonWeatherDF.loc[bostonWeatherDF['month'] == month]
    brush = alt.selection_interval(encodings=['x'])

    points = alt.Chart(
            vis2_df,
            title=alt.Title(
                "How the Boston " + month + " temperature has changed over time"
            , subtitle="A comparison of 1872-2023 with interactive selection mean across years"
            )
        ).mark_point().encode(
        x = alt.X('year',
                  scale=alt.Scale(domain=[1860, 2040]),
                 axis=alt.Axis(tickCount=20)),
        y= alt.Y('meanTemp', title = "mean monthly temp. in degrees fahrenheit").scale(zero=False),
        tooltip=['year', 'minTemp', 'maxTemp', 'meanTemp'],
        color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred')),
        size=alt.condition(brush, alt.value(50), alt.value(1)),
        ).properties(
            width=550,
            height=300
        ).add_params(
            brush
        )

    meanLine = alt.Chart().mark_rule(color='black', opacity=.5).encode(
        y=alt.Y('median(meanTemp):Q', scale=alt.Scale(zero=False), title = "year"),
        size=alt.SizeValue(3)
        ).transform_filter(
            brush
        )

 

    return alt.layer(points, meanLine, data=vis2_df).transform_filter(
        datum.month == month
    )

interactive(children=(Dropdown(description='Month:', index=2, options=('Oct', 'May', 'Jan', 'Dec', 'Mar', 'Jul…

In [72]:
yearsInData=[bostonWeatherDF['year'].min(), bostonWeatherDF['year'].max()]
tempsInData=[bostonWeatherDF['meanTemp'].min(), bostonWeatherDF['meanTemp'].max()]
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

In [76]:
@interact(years=widgets.IntRangeSlider(
    value=yearsInData,
    min=yearsInData[0],
    max=yearsInData[1],
    step=1,
    description='Years:'
))
def tempMonthChangeOverTimeChart(years):    
    return alt.Chart(
        bostonWeatherDF
    ).transform_filter(
        ((datum.year >= years[0]) & (datum.year <= years[1]))
    ).encode(
        color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred', domain=yearsInData), sort='ascending')
    ).mark_line(point=True).encode(
        x=alt.X('month:O').sort(monthOrder),
        y=alt.Y('meanTemp:Q', scale=alt.Scale(domain=tempsInData)),
        tooltip=['year', 'meanTemp'],
        
    ).properties(
        width=600,
        height=600
    )

interactive(children=(IntRangeSlider(value=(1872, 2023), description='Years:', max=2023, min=1872), Output()),…

In [35]:
# loading the csv file into a dataframe
bostonWeatherDF = pd.read_csv("boston-weather-mid.csv")
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

yearCol = 'year'

# initializing the year range
yearRangeInData=[bostonWeatherDF[yearCol].min(),
                 bostonWeatherDF[yearCol].max()]

bostonWeatherDF

Unnamed: 0,year,month,maxTemp,minTemp,meanTemp,precipitation,CDDBase65
0,1872,Jan,,,,2.0,
1,1872,Feb,,,,2.0,
2,1872,Mar,48.0,-8.0,18.0,4.0,0.0
3,1872,Apr,85.0,28.0,38.0,1.0,1.0
4,1872,May,85.0,40.0,51.0,3.0,16.0
...,...,...,...,...,...,...,...
1816,2023,May,86.0,42.0,51.0,2.0,39.0
1817,2023,Jun,84.0,47.0,59.0,3.0,106.0
1818,2023,Jul,91.0,61.0,68.0,10.0,332.0
1819,2023,Aug,88.0,59.0,65.0,6.0,216.0


In [36]:
# source: https://stackoverflow.com/questions/48042915/sort-a-pandas-dataframe-series-by-month-name
# source: https://vega.github.io/vega/docs/schemes/
# source: https://stackoverflow.com/questions/69436980/change-thickness-of-one-line-on-altair-chart
# source: https://altair-viz.github.io/gallery/line_chart_with_points.html
# source: https://stackoverflow.com/questions/57695261/changing-the-size-of-altair-plot-renders-in-jupyter-notebook
# source: https://stackoverflow.com/questions/66158128/change-legend-number-range-in-altair-plot

# IS THERE A TOOL TIP???????????????

# setting up the slider widget
@interact(
    yearRange=widgets.IntRangeSlider(
        value=yearsInData,
        min=yearsInData[0],
        max=yearsInData[1],
        step=1,
        description='Years:'
    )
)
def lineChart(yearRange):
    """
    Creates a line chart displaying the mean temp changing over the months in the given range of years
    yearRange: a widget that sets the range of years the visualization will show data from
    
    returns an altair time-series chart
    """
    # Filtering the df based on the year range set in the slider
    year_range_df = bostonWeatherDF[(bostonWeatherDF['year'] >= yearRange[0]) & (bostonWeatherDF['year'] <= yearRange[1])]

    # Returning the chart
    return alt.Chart(year_range_df, title = alt.Title(
                "How the Boston temperatures for each month have changed over time"
                 
                # updating subtitle based on year range
                , subtitle='A comparison of '+ str(yearRange[0]) + '-' + str(yearRange[1])
                )).transform_filter(
                        (datum[yearCol] >= (yearsInData[0]) &
                        (datum[yearCol] <= (yearRange[1])))

                # Setting line and point marks
                ).mark_line(point = True

                # setting axes values
                ).encode(
                    x=alt.X('month').sort(months),

                    # y-axis staying the same
                    y= alt.Y('meanTemp', title = "mean monthly temp. in degrees Fahrenheit",
                        scale=alt.Scale(domain=[5, 73]),
                        axis=alt.Axis(tickCount=20)),
        
                    # setting up tooltip so additional info is shown when you hover over a point
                    tooltip=['year', 'month', 'minTemp', 'maxTemp', 'meanTemp', 'precipitation'],
        
                    # setting up legend
                    color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred', domain=[1872, 2023])),
                    strokeWidth=alt.value(0.5)
               
                # setting chart dimensions
                ).properties(
                    width=550,
                    height=600
                    )
   


interactive(children=(IntRangeSlider(value=(1872, 2023), description='Years:', max=2023, min=1872), Output()),…

In [78]:
YearsRangeInData=[bostonWeatherDF['year'].min(), bostonWeatherDF['year'].max()]
TempsRangeInData=[bostonWeatherDF['meanTemp'].min(), bostonWeatherDF['meanTemp'].max()]
month = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

In [80]:
@interact(yearsRange=widgets.IntRangeSlider(
    value=YearsRangeInData,
    min=YearsRangeInData[0],
    max=YearsRangeInData[1],
    step=1,
    description='Years:'
))
def tempMonthChangeOverTimeChart(yearsRange):    
    return alt.Chart(
        bostonWeatherDF
    ).transform_filter(
        ((datum.year >= years[0]) & (datum.year <= years[1]))
    ).encode(
        color=alt.Color('year', scale=alt.Scale(scheme='lightgreyred', domain=YearsRangeInData), sort='ascending')
    ).mark_line(point=True).encode(
        x=alt.X('month:O').sort(month),
        y=alt.Y('meanTemp:Q', scale=alt.Scale(domain=TempsRangeInData)),
        tooltip=['year', 'meanTemp'],

 

    ).properties(
        width=600,
        height=600
    )


interactive(children=(IntRangeSlider(value=(1872, 2023), description='Years:', max=2023, min=1872), Output()),…