In [3]:
import pandas as pd
import requests

In [66]:
# Define year range
start_year = 2015
end_year = 2024

years_list = [year for year in range(start_year, end_year+1)]

# Fish Count Data

In [68]:
# Generate URL for defined year range
fish_years = ",".join(map(str, sorted(years_list, reverse=True)))
fish_url = "https://www.adfg.alaska.gov/sf/FishCounts/index.cfm?ADFG=export.JSON&countLocationID=40&year="+fish_years+"&speciesID=420"

# Set User-Agent to mimic browser behavior (without this, the request returns an authentication error)
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
}

# Request data
fish_response = requests.get(fish_url, headers=headers)

# Fetch json if data request is successful
if fish_response.status_code == 200:
    fish_json = fish_response.json()
else:
    print(f"Failed to fetch data: {fish_response.status_code}, {fish_response.text}")

# Read json into dataframe
fish_data = pd.DataFrame(fish_json['DATA'], columns=fish_json['COLUMNS'])
fish_data.head()

Unnamed: 0,YEAR,COUNTDATE,FISHCOUNT,SPECIESID,COUNTLOCATIONID,COUNTLOCATION,SPECIES
0,2024,"July, 01 2024 00:00:00",6366,420,40,Kenai River (late-run sockeye),Sockeye
1,2024,"July, 02 2024 00:00:00",5484,420,40,Kenai River (late-run sockeye),Sockeye
2,2024,"July, 03 2024 00:00:00",9630,420,40,Kenai River (late-run sockeye),Sockeye
3,2024,"July, 04 2024 00:00:00",13590,420,40,Kenai River (late-run sockeye),Sockeye
4,2024,"July, 05 2024 00:00:00",18120,420,40,Kenai River (late-run sockeye),Sockeye


In [70]:
fish_data.describe(include='all')

Unnamed: 0,YEAR,COUNTDATE,FISHCOUNT,SPECIESID,COUNTLOCATIONID,COUNTLOCATION,SPECIES
count,528.0,528,528.0,528.0,528.0,528,528
unique,,528,,,,1,1
top,,"July, 01 2024 00:00:00",,,,Kenai River (late-run sockeye),Sockeye
freq,,1,,,,528,528
mean,2019.376894,,32741.606061,420.0,40.0,,
std,2.831109,,28706.330094,0.0,0.0,,
min,2015.0,,1436.0,420.0,40.0,,
25%,2017.0,,13990.5,420.0,40.0,,
50%,2019.0,,25421.5,420.0,40.0,,
75%,2022.0,,41381.0,420.0,40.0,,


In [72]:
fish_data = fish_data.drop(columns=['SPECIESID','COUNTLOCATIONID','COUNTLOCATION','SPECIES'])
fish_data.head()

Unnamed: 0,YEAR,COUNTDATE,FISHCOUNT
0,2024,"July, 01 2024 00:00:00",6366
1,2024,"July, 02 2024 00:00:00",5484
2,2024,"July, 03 2024 00:00:00",9630
3,2024,"July, 04 2024 00:00:00",13590
4,2024,"July, 05 2024 00:00:00",18120


# Save Raw Data to .csv Files

In [82]:
# Save data to csv files

folder_path = '../data/raw_data/'

# fish count data
fish_file_name = f'{start_year}-{end_year}_fish_data_raw.csv'
fish_data.to_csv(folder_path + fish_file_name, index=False)