Kernel > Restart & Run All

In [20]:
import requests
import pandas as pd
import numpy as np
import tabula as tb
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings('ignore')



# Water Temperature:

Find Stations: https://tidesandcurrents.noaa.gov/map/index.shtml<br>
Data Retrieval Information: https://api.tidesandcurrents.noaa.gov/api/prod/<br>
What do the data mean: https://api.tidesandcurrents.noaa.gov/api/prod/responseHelp.html#preliminarywaterlevel


            Station 			ID
---------------------------------------
    Crescent City		   9419750 [Eureka]
    North Spit			   9418767 [Eureka]
    Arena Cove			  9416841 [Fort Bragg]
    Point Reyes			   9415020 [Bodega Bay]
    San Fransisco 		   9414290 [San Fransisco]
    Port Chicago 		   9415144 [Inland Waters]
    Richmond 			   9414863 [San Fransisco]
    Alameda			       9414750 [San Fransisco]
    Monterey 			    9413450 [Monterey]
    Port San Luis		    9412110 [Morro Bay]
    Oil Platform Harvest	9411406 [Morro Bay]
    Santa Barbara		    9411340 [Santa Barbara]
    Santa Monica		    9410840 [LA]
    Los Angeles			    9410660 [LA]
    La Jolla			    9410230 [San Diego]
    San Diego Bay		    9410170	[San Diego]
[More Stations to be added]

# Functions to get data from the NOAA API:

In [2]:
def get_water_temp(station_id, station_name, year:int, month:str):
    """
    Input:
    -----------
    - station_id: Id of the NOAA station to get water temperature
    - station_name: Name of the location of the NOAA station
    - year: Year for which the water temperature is to be extracted from NOAA API
    - month: Month from which the water temperature is to be extracted from NOAA API
    
    Output:    
    --------
    - Returns a dataframe with four columns, namely:
            * date_val : date(format: yearmonthday)
            * water_temp: water temperature on the day
            * station: NOAA station from which the data was extracted
            * year: year for which the water temperature data was extracted
    
    """
    stations = []
    append_val = []
    response = requests.get(url = "https://api.tidesandcurrents.noaa.gov/api/prod/datagetter?begin_date="+str(year)+str(month)+"02&end_date="+str(year)+str(month)+"27&station="+str(station_id)+"&product=water_temperature&units=english&time_zone=gmt&application=ports_screen&format=json")
    jobj = response.json()
    append_val.append(pd.DataFrame(jobj['metadata'], jobj['data']))
    ini_station = pd.concat(append_val).reset_index()
    ini_values = pd.DataFrame(list(ini_station['index']))
    check = ini_values.copy()
    check['date_val'] =  pd.to_datetime(check['t']).dt.strftime("%Y%m%d").astype(int)
    check['water_temp'] = pd.to_numeric(check['v'])
    check = check.drop(['t', 'f'], axis = 1)
    check = check.groupby('date_val').mean().reset_index()
    check['station'] = str(station_name)
    check['year'] = year
    return check

    

In [3]:
def all_month(station_id, station_name, year:int):
    """
    """
    month = ['01', '02', '03', '04','05','06', '07', '08', '09', '10', '11', '12']
    month_letter = {'01':'January','02':'February','03':'March','04':'April','05':'May','06':'June','07':'July','08':'August','09':'September','10':'October','11':'November','12':'December'}
    append_list = []
    for i in range(len(month)):
        df = get_water_temp(station_id, station_name, year, str(month[i]))
        new = df.copy()
        new['month'] = str(month[i])
        new['month'] = new['month'].map(month_letter)
        append_list.append(new)
    return append_list
   

In [4]:
def certain_year(station_id, station_name, year:int):
    """
    """
    year_specified = pd.concat(all_month(station_id, station_name, year))
    return year_specified

In [5]:
def mean_temperature_year(station_id, station_name, year:int):
    """
    """
    values =  certain_year(station_id, station_name, year)
    mean = values.groupby(['station']).mean().reset_index().drop(['date_val'], axis = 1)
    return mean
    

In [7]:
def mean_temperature_month(station_id, station_name, year:int):
    """
    """
    values =  certain_year(station_id, station_name, year)
    mean = values.groupby(['month']).mean().reset_index().drop(['date_val'], axis = 1)
    return mean
    

In [6]:
def water_temp_YearRange(station_id, station_name, min_year, max_year):
    """
    """
    combined_years = []
    for year in range(min_year, max_year):
        combined_years.append(certain_year(station_id, station_name, year))
    combined_years_df = pd.concat(combined_years)
    return combined_years_df

# Fishery:

Fishery data is collected from https://wildlife.ca.gov/Fishing/Commercial/Landings#260042366-2018

### Get all urls:

In [21]:
def get_urls(url):
    
    """
    Input: 
        - Takes a url as input.
    Output: 
        - Output is a list of all the scraped urls associated with the input url link.  
    """
    
    reqs = requests.get(url)
    soup = BeautifulSoup(reqs.text, 'html.parser')
 
    urls = []
    for link in soup.find_all('a'):
       print(link.get('href'))
   
#Source: https://www.geeksforgeeks.org/extract-all-the-urls-from-the-webpage-using-python/  

In [22]:
url = 'https://wildlife.ca.gov/Fishing/Commercial/Landings'
get_urls(url)

#main-content
None
/Closures
https://www.ca.gov/
/Systems
/Contact
#searchBar
#languageBar
https://wildlife.ca.gov/
None
/
https://wildlife.ca.gov/
https://wildlife.ca.gov/Fishing
https://wildlife.ca.gov/Hunting
https://wildlife.ca.gov/Licensing
https://wildlife.ca.gov/Conservation
https://wildlife.ca.gov/Learning
/
https://wildlife.ca.gov/Fishing
https://wildlife.ca.gov/Fishing/Commercial
https://wildlife.ca.gov/Fishing/Commercial/Landings
None
None
#la-260042586-2019
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178004&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178005&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178008&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178007&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178009&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178010&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178011&inline
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178012&inline
https:

The urls extracted are not in order. So a folder with text files of urls for each port is created for easier extraction. 



In [4]:
Eureka = 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178011&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=171061&inline\n\
http://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=159549&inline\n\
http://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=145751&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=129338&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=105677&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=88094&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=71919&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=57131&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=38100&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31783&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31749&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31715&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31681&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31647&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31613&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31579&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31546&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31512&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31478&inline'
eureka = open('eureka.txt', 'w')
eureka.write(Eureka)
eureka.close()

In [12]:
Monterey = 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178013&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=171064&inline\n\
http://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=159551&inline\n\
http://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=145756&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=129340&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=105679&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=88096&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=71922&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=57107&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=38102&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31759&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31725&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31691&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31657&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31623&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31589&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31555&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31522&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31488&inline\n\
https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31454&inline'
monterey = open('monterey.txt', 'w')
monterey.write(Monterey)
monterey.close()



In [24]:
SanDiego 

NameError: name 'SanDiego' is not defined

In [16]:
open('fishery/SanDiego.txt', 'r').readlines()

['https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=178021&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=171072&inline\n',
 'http://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=159559&inline\n',
 'http://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=145759&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=129420&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=105682&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=88099&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=71926&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=57110&inline=\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=38105&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31762&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31728&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31694&inline\n',
 'https://nrm.dfg.ca.gov/FileHandler.ashx?DocumentID=31660&inline\n',
 'https://nrm.d

## Scrape Data: 

In [91]:
def read_data(file, min_page, max_page):
    """
    Input:
        - file: Takes .txt file with links to pdf for scraping the data
        - min_page: Takes integer value. Input is the minimum of the range of pages to be scraped
        - max_page: Takes integer value. Input is the maximum of the range of pages to be scraped
        
    Output:
        - Gives a list of tabular data extracted from a pdf (given as a form of url link)
    """
    file = open('fishery/'+str(file), 'r')
    Lines = file.readlines()
    read = []
    # Strips the newline character
    for line in Lines:
        read.append(tb.read_pdf(line.strip(),pages = list(range(min_page,max_page))))
    return read

In [92]:
def data_final_fishery(port_name, port_file, min_page, max_page):
    
    """
    Input: 
        - port_name: Takes the name of the port for fishery
        - port_file: Takes .txt file with links to pdf for scraping the data
        - min_page: Takes integer value. Input is the minimum of the range of pages to be scraped
        - max_page: Takes integer value. Input is the maximum of the range of pages to be scraped
    Output:
        - Gives a list of dataframes that contain the fishery information from the years 2000-2019 
          for the specified port
    
    """
    
    year_range = list(range(2019,1999,-1))
    read = read_data(port_file, min_page,max_page)
    page_range = list(range(max_page))
    append_list = []
    for i in range(20):  
        df = pd.concat(read[i])
        df = df.iloc[:,[0,-1]]
        df.columns = ['Species', 'Total'] 
        #df = df[['Species', 'Total']]
        df = df.dropna()
        new = df.copy()
        new['port'] = str(port_name)
        new['Year'] = year_range[i]
        append_list.append(new)
    return append_list



## Eureka

In [93]:
eureka_fishery = pd.concat(data_final_fishery('Eureka', 'eureka.txt', min_page = 1, max_page = 4))

Got stderr: Jun 01, 2022 4:22:38 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:38 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:39 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:39 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: Jun 01, 2022 4:22:41 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:42 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:42 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:42 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: Jun 01, 2022 4:22:44 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:44 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:45 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:45 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: Jun 01, 2022 4:22:46 PM org.apache.pdfbox.pdmodel.font.PDType

Got stderr: Jun 01, 2022 4:22:55 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:56 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:56 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:56 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: Jun 01, 2022 4:22:57 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:22:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: Jun 01, 2022 4:23:00 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:23:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:23:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
Jun 01, 2022 4:23:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: Jun 01, 2022 4:23:03 PM org.apache.pdfbox.pdmodel.font.PDType

In [94]:
eureka_fishery = eureka_fishery.reset_index()


In [96]:
eu_fish = eureka_fishery.copy()
eu_fish = eu_fish.drop(['index'], axis = 1)


In [97]:
eu_fish

Unnamed: 0,Species,Total,port,Year
0,"Anchovy, northern",13569,Eureka,2019
1,Butterfish (Pacific,542,Eureka,2019
2,Cabezon,4109,Eureka,2019
3,"Cod, Pacific",4,Eureka,2019
4,"Fish, unspecified",8430,Eureka,2019
...,...,...,...,...
1306,"Crab, Dungeness..............................",4758757,Eureka,2000
1307,"Crab, rock unspecified.......................",8124,Eureka,2000
1308,"Prawn, spot......................................",306,Eureka,2000
1309,"Shrimp, coonstriped..........................",86369,Eureka,2000


In [99]:
eu_fish.to_csv("Eureka_fishery.csv")

In [None]:
#Crescent Mean:
a = [2000,2001,2002,2003,2004,2007,2008,2009,2013,2014,2015]
cres_mean = []
for i in range(len(a)):
    cres_mean.append(mean_temperature_month('9419750', 'Crescent City', a[i]))
cres_watertemp = pd.concat(cres_mean)
cres_watertemp.to_csv('cres_watertemp')

In [None]:
#North Spit mean monthly:
a = [2005, 2006, 2012]
ns_mean= []
for i in range(len(a)):    
    ns_mean.append(mean_temperature_month('9418767', 'North Spit', a[i]))
ns_watertemp = pd.concat(ns_mean)
ns_watertemp.to_csv('ns_watertemp')

# San Francisco

In [None]:
eureka_fishery = pd.concat(data_final_fishery('Eureka', 'eureka.txt', min_page = 1, max_page = 4))

In [None]:
sf_water = water_temp_YearRange('9414750', 'Alameda', 2000, 2015)

# Monterey

In [15]:
monterey_fishery = pd.concat(data_final_fishery('Monterey', 'monterey.txt', min_page = 1, max_page = 4))

Got stderr: May 31, 2022 11:34:59 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:34:59 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:34:59 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:00 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:35:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:02 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:35:03 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:03 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:03 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:04 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:35:05 PM org.apache.pdfbox.pdmode

Got stderr: May 31, 2022 11:35:13 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:13 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:13 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:13 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:35:14 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:35:16 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:17 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:17 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:35:17 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:35:19 PM org.apache.pdfbox.pdmode

In [17]:
monterey_fishery = monterey_fishery.reset_index()


In [18]:
mon_fish = monterey_fishery.copy()
mon_fish = mon_fish.drop(['index', 'level_0'], axis = 1)
mon_fish


Unnamed: 0,Species,Total,port,Year
0,"Anchovy, northern",20271521,Monterey,2019
1,"Barracuda, California",5,Monterey,2019
2,"Bass, kelp",7,Monterey,2019
3,"Bonito, Pacific",13,Monterey,2019
4,Butterfish (Pacific,6513,Monterey,2019
...,...,...,...,...
1965,"Tuna, skipjack...................................",9248,Monterey,2000
1966,"Tuna, unspecified..............................",121,Monterey,2000
1967,"Tuna, yellowfin..................................",79,Monterey,2000
1968,Turbot...........................................,44,Monterey,2000


In [19]:
mon_fish.to_csv('monterey_fishery.csv')

In [20]:
#Water temperature for Monterey from year 2000,2015
monterey_water = water_temp_YearRange('9413450', 'Monterey', 2000, 2015)

In [21]:
monterey_water.to_csv('monterey_watertemp.csv')

In [22]:
mean = monterey_water.groupby(['year','month']).mean()


In [23]:
monterey_mean = mean.reset_index()
monterey_mean['Station'] = 'Monterey'
monterey_mean.to_csv('monterey_meanwatertemp.csv')

In [24]:
monterey_mean 

Unnamed: 0,year,month,date_val,water_temp,Station
0,2000,April,20000414.5,56.216667,Monterey
1,2000,August,20000814.5,57.324045,Monterey
2,2000,December,20001214.5,54.702439,Monterey
3,2000,February,20000214.5,55.135424,Monterey
4,2000,January,20000114.5,53.606870,Monterey
...,...,...,...,...,...
175,2014,March,20140314.5,56.633838,Monterey
176,2014,May,20140514.5,56.348033,Monterey
177,2014,November,20141114.5,59.552836,Monterey
178,2014,October,20141014.5,61.914487,Monterey


# Sacramento Delta

In [None]:
sacramento_fishery = pd.concat(data_final_fishery('Sacramento', 'sacramento.txt', min_page = 1, max_page = 2))

# Bodega Bay

In [None]:
bodega_fishery  = pd.concat(data_final('Bodega', 'bodega.txt', min_page = 1, max_page = 3))

# Santa Barbara

In [443]:
sb_fishery = pd.concat(data_final_fishery('Santa Barbara', 'santab.txt', min_page = 1, max_page = 4))

Got stderr: May 31, 2022 9:21:14 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:15 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:21:16 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:17 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:17 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:17 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:21:19 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:19 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:19 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:21:20 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:21:21 PM org.apache.pdfbox.pdmodel.font.PDType

In [454]:
sb_fishery = sb_fishery.reset_index()


In [455]:
sb_fish = sb_fishery.copy()
sb_fish = sb_fish.drop(['index', 'level_0'], axis = 1)
sb_fish

Unnamed: 0,Species,Total,port,Year
0,"Anchovy, northern",240060,Santa Barbara,2019
1,"Barracuda, California",526,Santa Barbara,2019
2,"Bass, giant sea",1643,Santa Barbara,2019
3,"Bonito, Pacific",2252,Santa Barbara,2019
4,Butterfish (Pacific,559,Santa Barbara,2019
...,...,...,...,...
1878,Swordfish........................................,17898,Santa Barbara,2000
1879,"Thornyhead, longspine......................",34452,Santa Barbara,2000
1880,"Thornyhead, shortspine.....................",36475,Santa Barbara,2000
1881,Thornyheads.....................................,15030,Santa Barbara,2000


# Morro Bay

In [26]:
morro_fishery = pd.concat(data_final_fishery('Morro Bay', 'morrobay.txt', min_page = 1, max_page = 3))

Got stderr: May 31, 2022 11:39:54 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:39:55 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:39:55 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:39:56 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:39:56 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:39:57 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:39:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:39:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:39:59 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:40:00 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:40:00 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:40:01 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:40:02 PM org.apache.

In [27]:
morro_fishery

Unnamed: 0,Species,Total,port,Year
0,"Anchovy, northern",95,Morro Bay,2019
1,"Bonito, Pacific",311,Morro Bay,2019
2,Cabezon,26248,Morro Bay,2019
3,"Fish, unspecified",3,Morro Bay,2019
4,"Flounder, arrowtooth",14,Morro Bay,2019
...,...,...,...,...
32,"Sole, rex........................................",44083,Morro Bay,2000
33,"Sole, rock.......................................",8,Morro Bay,2000
34,"Sole, sand.......................................",1035,Morro Bay,2000
35,"Sole, unspecified...............................",654,Morro Bay,2000


In [28]:
morro_fishery = morro_fishery.reset_index()


In [32]:
morro_fish = morro_fishery.copy()
morro_fish = morro_fish.drop(['index'], axis = 1)
morro_fish.to_csv('morro_fishery.csv')

In [34]:
sanluis_water = water_temp_YearRange('9412110', 'Port San Luis', 2000, 2015)

In [35]:
sanluis_water.to_csv('sanluis_watertemp.csv')

In [36]:
mean = sanluis_water.groupby(['year','month']).mean()
san_luis_mean = mean.reset_index()
san_luis_mean['Station'] = 'Port San Luis'
san_luis_mean.to_csv('sanluis_meanwatertemp.csv')

In [37]:
san_luis_mean

Unnamed: 0,year,month,date_val,water_temp,Station
0,2000,April,20000414.5,53.672615,Port San Luis
1,2000,August,20000814.5,58.128763,Port San Luis
2,2000,December,20001214.5,54.636922,Port San Luis
3,2000,February,20000214.5,55.663685,Port San Luis
4,2000,January,20000114.5,53.915210,Port San Luis
...,...,...,...,...,...
175,2014,March,20140314.5,56.716730,Port San Luis
176,2014,May,20140514.5,54.098441,Port San Luis
177,2014,November,20141114.5,60.297657,Port San Luis
178,2014,October,20141014.5,63.619605,Port San Luis


# Los Angeles

In [470]:
la_fishery = pd.concat(data_final_fishery('LA', 'la.txt', min_page = 1, max_page = 3))

Got stderr: May 31, 2022 9:41:19 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:20 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:20 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:41:21 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:22 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:22 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:41:23 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:23 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:23 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:41:25 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:25 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 9:41:25 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 9:41:29 PM org.apache.pdfbox.pdmode

In [471]:
la_fishery

Unnamed: 0,Species,Total,port,Year
0,"Anchovy, northern",45028,LA,2019
1,"Bass, giant sea",5627,LA,2019
2,"Bonito, Pacific",1343,LA,2019
3,Butterfish (Pacific,20,LA,2019
5,Cabezon,270,LA,2019
...,...,...,...,...
32,"Salmon, Chinook...............................",267,LA,2000
33,Sanddab..........................................,11189,LA,2000
34,"Sardine, Pacific.................................",86264212,LA,2000
35,"Scorpionfish, California.....................",21606,LA,2000


In [None]:
la_water = water_temp_YearRange('9410660', 'LA', 2000, 2015)

# San Diego

In [38]:
sd_fishery = pd.concat(data_final_fishery('San Diego', 'sandiego.txt', min_page = 1, max_page = 3))

Got stderr: May 31, 2022 11:44:53 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:53 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:53 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:44:54 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:55 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:55 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:44:56 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:57 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:57 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:44:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 11:44:58 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 11:44:59 PM org.apache.

In [39]:
sd_fishery = sd_fishery.reset_index()

In [41]:
sd_fish = sd_fishery.copy()
sd_fish = sd_fish.drop(['index'], axis = 1)
sd_fish.to_csv('sd_fishery.csv')

In [42]:
sd_watertemp = water_temp_YearRange('9410170', 'San Diego', 2000, 2015)#does not have until 2020

In [43]:
sd_watertemp.to_csv('sd_watertemp.csv')

In [44]:
mean = sd_watertemp.groupby(['year','month']).mean()
sd_mean = mean.reset_index()
sd_mean['Station'] = 'San Diego'
sd_mean.to_csv('sd_meanwatertemp.csv')


In [45]:
sd_mean

Unnamed: 0,year,month,date_val,water_temp,Station
0,2000,April,20000414.5,63.574469,San Diego
1,2000,August,20000814.5,73.295615,San Diego
2,2000,December,20001214.5,60.236239,San Diego
3,2000,February,20000214.5,60.511526,San Diego
4,2000,January,20000114.5,57.999519,San Diego
...,...,...,...,...,...
175,2014,March,20140314.5,65.535011,San Diego
176,2014,May,20140514.5,68.421619,San Diego
177,2014,November,20141114.5,69.281927,San Diego
178,2014,October,20141014.5,73.921043,San Diego


# Fort Bragg

In [484]:
fb_fishery = pd.concat(data_final_fishery('Fort Bragg', 'fortbragg.txt', min_page = 1, max_page = 3))

Got stderr: May 31, 2022 10:17:29 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:30 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:30 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 10:17:31 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:32 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:32 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 10:17:33 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:34 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:34 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 10:17:35 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:35 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>
May 31, 2022 10:17:35 PM org.apache.pdfbox.pdmodel.font.PDType1Font <init>

Got stderr: May 31, 2022 10:17:37 PM org.apache.

In [None]:
fb_watertemp = water_temp_YearRange('9416841', 'Arena Cove', 2000, 2015)#does not have until 2020

# Inland Waters

In [206]:
inland = pd.read_csv("fishery/inland.txt", header = None)