## Database linking

In [76]:
from sqlalchemy import create_engine, Column, Integer, String, Float, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship
import pandas as pd
import json

In [77]:
#Establish connection to the database
#Change dbName to name of the database in pgAdmin/postgres. 
dbName = 'Project_3'
DATABASE_URL = 'postgresql://postgres:postgres@localhost:5432/' + dbName

# Create a database engine
engine = create_engine(DATABASE_URL)

# Declare a base class for declarative table definitions
Base = declarative_base()

# Drop all tables (useful for debugging, be cautious in production)
#Included for debugging purposes, only uncomment for those reasons
Base.metadata.drop_all(engine)

In [78]:
#Declare each table as a class
class State(Base):
    __tablename__ = 'states'

    state_id = Column(String(2), primary_key=True)
    state_name = Column(String(20), nullable=False)
    geo_center_lat = Column(Float, nullable=False)
    geo_center_long = Column(Float, nullable=False)

class StatesCensusData(Base):
    __tablename__ = 'states_census_data'

    state_id = Column(String(2), ForeignKey('states.state_id'), primary_key=True)
    year = Column(Integer, primary_key=True)
    population = Column(Integer, nullable=False)

class QuarterlyProduction(Base):
    __tablename__ = 'quarterly_production'

    state_id = Column(String(2), ForeignKey('states.state_id'), primary_key=True)
    year = Column(Integer, primary_key=True)
    quarter = Column(String(2), primary_key=True)
    total_quarter_prod = Column(Float)
    taxable_bottles_cans_prod = Column(Float)
    taxable_kegs_prod = Column(Float)
    taxable_prem_use_prod = Column(Float)
    tax_free_export_prod = Column(Float)
    tax_free_prem_use_prod = Column(Float)
    stocks_on_hand = Column(Float)

class AnnualCraftProduction(Base):
    __tablename__ = 'annual_craft_production'

    year = Column(Integer, primary_key=True)
    brewery_cat = Column(String(20), primary_key=True)
    annual_craft_production_amount = Column(Integer)

class AnnualCraftBreweryCounts(Base):
    __tablename__ = 'annual_craft_brewery_counts'

    year = Column(Integer, primary_key=True)
    brewery_cat = Column(String(20), primary_key=True)
    brewery_counts = Column(Integer)

class AnnualStateCraftProduction(Base):
    __tablename__ = 'annual_state_craft_production'

    state_id = Column(String(2), ForeignKey('states.state_id'), primary_key=True)
    year = Column(Integer, primary_key=True)
    annual_craft_state_prod_amount = Column(Integer)

class AnnualTTBStatePermitCount(Base):
    __tablename__ = 'annual_ttb_state_permit_count'

    state_id = Column(String(2), ForeignKey('states.state_id'), primary_key=True)
    year = Column(Integer, primary_key=True)
    state_ttb_permit_count = Column(Integer)

# Create tables in the database
Base.metadata.create_all(engine)

In [79]:
#Create session - cell may become unneeded, we shall see
from sqlalchemy.orm import sessionmaker

Session = sessionmaker(bind=engine)
session = Session()

## Heatmap Query

In [80]:
#Generate heatmap data, including count of breweries in the state, breweries per capita, and total barrels produced in the state
# SQL query
query = '''
SELECT
    s."StateName" AS statename,
    attb."Year",
    "asc"."AnnualCraftStateProdAmount",
    attb."StateTTBPermitCount",
    scd."Population"
FROM
    "States" s
LEFT JOIN
    "AnnualStateCraftProduction" "asc" ON s."StateID" = "asc"."StateID"
LEFT JOIN
    "AnnualTTBStatePermitCount" attb ON s."StateID" = attb."StateID" AND "asc"."Year" = attb."Year"
LEFT JOIN
    "StatesCensusData" scd ON s."StateID" = scd."StateID" AND "asc"."Year" = scd."Year";
'''

# Execute the query and fetch the results into a DataFrame
heatDF = pd.read_sql_query(query, engine)

In [81]:
heatDF.head()

Unnamed: 0,statename,Year,AnnualCraftStateProdAmount,StateTTBPermitCount,Population
0,Alabama,2017,62738,52,
1,Alaska,2017,204302,45,
2,Arizona,2017,147728,130,
3,Arkansas,2017,42294,44,
4,California,2017,3285525,1106,


In [82]:
alabama_df = heatDF[heatDF['statename'] == 'Alabama']
alabama_df

Unnamed: 0,statename,Year,AnnualCraftStateProdAmount,StateTTBPermitCount,Population
0,Alabama,2017,62738,52,
51,Alabama,2018,76640,55,
102,Alabama,2019,85480,66,
153,Alabama,2020,78847,68,
204,Alabama,2021,96004,77,
255,Alabama,2022,95185,84,


Below is the code to manipulate the query result into the JSON format. In this process, I make some decisions about the data, namely:

Taking the average over the time period for amount produced

Taking the average Permit Count over the time period

Setting the null values in population to -1, and taking the average of those (currently that table is empty in the database)


The above may change after discussion with the group

In [83]:
# Group by 'State'
grouped_df = heatDF.groupby('statename').agg({
    'AnnualCraftStateProdAmount': 'mean',
    'StateTTBPermitCount': 'mean',
    'Population': 'mean'
}).reset_index()

# Replace NaN values in 'Population' with a placeholder value (e.g., -1)
grouped_df['Population'].fillna(-1, inplace=True)

# Calculate 'Per Capita' by dividing 'StateTTBPermitCount' by 'Population'
grouped_df['StateTTBPerCapita'] = grouped_df['StateTTBPermitCount'] / grouped_df['Population']

# Rename columns to match the JSON structure
grouped_df.rename(columns={
    'statename': 'State',
    'AnnualCraftStateProdAmount': 'Production',
    'StateTTBPermitCount': 'TTBPermitCount',
    'StateTTBPerCapita': 'BreweriesPerCapita'
}, inplace=True)

# Drop unnecessary columns
grouped_df.drop(['Population'], axis=1, inplace=True)

grouped_df = grouped_df.round()
grouped_df.head()

Unnamed: 0,State,Production,TTBPermitCount,BreweriesPerCapita
0,Alabama,82482.0,67.0,-67.0
1,Alaska,184680.0,61.0,-61.0
2,Arizona,187002.0,161.0,-161.0
3,Arkansas,45810.0,64.0,-64.0
4,California,3487648.0,1379.0,-1379.0


In [85]:
json_data = {
    "type": "FeatureCollection",
    "features": []
}

for index, row in grouped_df.iterrows():
    feature = {
        "type": "Feature",
        "properties": {
            "State": row['State'],
            "Production": float(row['Production']),
            "Breweries": int(row['TTBPermitCount']),
            "Breweries Per Capita": float(row['BreweriesPerCapita'])
        }
    }
    json_data["features"].append(feature)

# Convert the dictionary to JSON
import json
json_result = json.dumps(json_data, indent=2)

# Print or use the JSON data as needed
print(json_result)
file_path = "../data/heatmap.json"

with open(file_path, 'w') as file:
    file.write(json_result)

print(f"JSON data exported to: {file_path}")

{
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {
        "State": "Alabama",
        "Production": 82482.0,
        "Breweries": 67,
        "Breweries Per Capita": -67.0
      }
    },
    {
      "type": "Feature",
      "properties": {
        "State": "Alaska",
        "Production": 184680.0,
        "Breweries": 61,
        "Breweries Per Capita": -61.0
      }
    },
    {
      "type": "Feature",
      "properties": {
        "State": "Arizona",
        "Production": 187002.0,
        "Breweries": 161,
        "Breweries Per Capita": -161.0
      }
    },
    {
      "type": "Feature",
      "properties": {
        "State": "Arkansas",
        "Production": 45810.0,
        "Breweries": 64,
        "Breweries Per Capita": -64.0
      }
    },
    {
      "type": "Feature",
      "properties": {
        "State": "California",
        "Production": 3487648.0,
        "Breweries": 1379,
        "Breweries Per Capita": -1379.0
   

## Time Series Query (in progress)

Code below does not work, I do not expect it to work. Using ChatGPT to generate these queries makes a mystery "breweryTable" appear, which does not exist. I will go in and fix this all myself

In [None]:
#Generate time series data, using quarterly reports to get each point
#Each point will be total "product consumed", i.e. all product that has left one way or another (n-1 stored + produced - n stored)
quarterTotalProduction = session.query(QuarterlyProduction.Year, QuarterlyProduction.Quarter, func.sum(QuarterlyProduction.TotalQuarterProd).label('TotalProductConsumed')).group_by(QuarterlyProduction.Year, QuarterlyProduction.Quarter).all()

In [None]:
#Generate simple breakdown number of breweries in each category type for infographic. 
brewByCategory = session.query(Breweries.Category, func.count(Breweries.id).label('BreweriesCount')).group_by(Breweries.Category).all()