# NFPD data exploration

> These are Go notebooks: In order to use the GoNB Jupyter Kernel, please install GoNB from here: https://github.com/janpfeifer/gonb

Note also that for local package development, you can put: `!*go mod edit -replace "github.com/umbralcalc/anglersim=/path/to/anglersim"` at the top of any cell.

In this notebook we're exploring the data from the NFPD bulk download facility.

In [1]:
!*go mod edit -replace "github.com/umbralcalc/anglersim=/home/robert/Code/anglersim"

In [2]:
import (
	"os"
	"github.com/umbralcalc/anglersim/pkg/nfpd"
	"github.com/umbralcalc/stochadex/pkg/analysis"
	gonb_echarts "github.com/janpfeifer/gonb-echarts"
)

%%

sdf := nfpd.GetUniqueSitesDataFrameFromCountsCSV("../dat/FW_Fish_Counts.csv")
fmt.Println(sdf)

f, err := os.Create("../dat/FW_Fish_Unique_Count_Sites.csv")
if err != nil {
	panic(err)
}
sdf.WriteCSV(f)

[16204x2] DataFrame

    SITE_ID SITE_NAME
 0: 13704   SAP/BT Quin, Langley Beck, West Farm
 1: 6926    TATA20
 2: 1778    Wroxham Woods
 3: 11873   West Dipton Burn at Newbiggen Bridge
 4: 13251   Bullapit
 5: 2588    D/s Weir Lodge STW
 6: 23886   Lonesome Lane Salford
 7: 64763   Audley End
 8: 32194   D/S Costessey Weir
 9: 6113    Bedford Gravels
    ...     ...
    <int>   <string>



In [None]:
import (
	"gonum.org/v1/gonum/floats"
	"github.com/umbralcalc/anglersim/pkg/nfpd"
	"github.com/umbralcalc/stochadex/pkg/analysis"
	gonb_echarts "github.com/janpfeifer/gonb-echarts"
)

%%

siteName := "Haxted Mill"
fdf := nfpd.GetSiteCountsDataFrameFromCSV("../dat/FW_Fish_Counts.csv", siteName)

scatter := analysis.NewScatterPlotFromDataFrame(
	&fdf,
	"TIMESTAMP",
	"ALL_RUNS",
	"SPECIES_NAME",
)

xAxis := fdf.Col("TIMESTAMP").Float()
yAxis := fdf.Col("ALL_RUNS").Float()
scatter.SetGlobalOptions(
	charts.WithTitleOpts(opts.Title{
		Title: "Site: " + siteName,
		Bottom: "1%",
	}),
	charts.WithYAxisOpts(opts.YAxis{
		Min: floats.Min(yAxis),
		Max: floats.Max(yAxis),
	}),
	charts.WithXAxisOpts(opts.XAxis{
		Min: floats.Min(xAxis),
		Max: floats.Max(xAxis),
	}),
)

gonb_echarts.Display(scatter, "width: 1024px; height:400px; background: white;")

In [9]:
import (
    "github.com/umbralcalc/anglersim/pkg/nfpd"
)

%%

df := nfpd.GetTypesDataFrameFromCSV("../dat/FW_Fish_Data_Types.csv")

fmt.Println(df)

[635125x9] DataFrame

    \ufeffSURVEY_ID SURVEY_SPECIES_ID SPECIES_ID SPECIES_RUN_ID RUN_NUMBER ...
 0: 144220          617874            283        818267         1          ...
 1: 144220          617874            283        818268         2          ...
 2: 144220          617875            286        818269         1          ...
 3: 144220          617875            286        818270         2          ...
 4: 144220          617876            153        818271         1          ...
 5: 144220          617876            153        818272         2          ...
 6: 144220          617877            152        818273         1          ...
 7: 144220          617877            152        818274         2          ...
 8: 144220          617878            193        818275         1          ...
 9: 144220          617878            193        818276         2          ...
    ...             ...               ...        ...            ...        ...
    <int>           <int>     

In [10]:
import (
    "github.com/umbralcalc/anglersim/pkg/nfpd"
)

%%

df := nfpd.GetSitesDataFrameFromCSV("../dat/FW_Fish_Sites.csv")

fmt.Println(df)

[16718x8] DataFrame

    \ufeffSITE_ID NEW_AREA_NAME                                ...
 0: 36579         Yorkshire                                    ...
 1: 29407         Yorkshire                                    ...
 2: 42664         West Thames                                  ...
 3: 73029         Cumbria and Lancashire                       ...
 4: 11537         Cumbria and Lancashire                       ...
 5: 12902         West Thames                                  ...
 6: 21217         Essex Norfolk and Suffolk                    ...
 7: 12723         Staffordshire Warwickshire and West Midlands ...
 8: 71463         Devon and Cornwall                           ...
 9: 31888         Devon and Cornwall                           ...
    ...           ...                                          ...
    <int>         <string>                                     ...

Not Showing: SITE_RANKED_NGR <string>, SITE_RANKED_EASTING <int>,
SITE_RANKED_NORTHING <int>, N_SURVEYS <in

In [11]:
import (
    "bufio"
	"bytes"
	"fmt"
	"os"

	"github.com/go-gota/gota/dataframe"
)

%%

const maxRows = 100

f, err := os.Open("../dat/FW_Fish_Individual_Lengths.csv")
if err != nil {
	panic(err)
}

scanner := bufio.NewScanner(f)
var buffer bytes.Buffer

if scanner.Scan() {
	buffer.WriteString(scanner.Text() + "\n")
} else {
	panic("Empty file")
}

count := 0
for scanner.Scan() {
	buffer.WriteString(scanner.Text() + "\n")
	count++
	if count >= maxRows {
		break
	}
}
if err := scanner.Err(); err != nil {
	panic(err)
}

df := dataframe.ReadCSV(bytes.NewReader(buffer.Bytes()))

fmt.Println(df)

[100x10] DataFrame

    \ufeffSURVEY_ID SURVEY_SPECIES_ID SPECIES_ID SPECIES_RUN_ID RUN_NUMBER ...
 0: 176011          758857            111        1007731        2          ...
 1: 176011          758857            111        1007731        2          ...
 2: 176011          758857            111        1007731        2          ...
 3: 176011          758857            111        1007730        1          ...
 4: 176011          758857            111        1007730        1          ...
 5: 176011          758857            111        1007730        1          ...
 6: 176011          758857            111        1007730        1          ...
 7: 176011          758857            111        1007730        1          ...
 8: 176011          758857            111        1007730        1          ...
 9: 176011          758857            111        1007730        1          ...
    ...             ...               ...        ...            ...        ...
    <int>           <int>       

In [12]:
import (
    "os"
	"github.com/go-gota/gota/dataframe"
)

%%

f, err := os.Open("../dat/FW_Fish_Banded_Measurements.csv")
if err != nil {
	panic(err)
}

df := dataframe.ReadCSV(f)

fmt.Println(df)

[198485x10] DataFrame

    \ufeffSURVEY_ID SURVEY_SPECIES_ID SPECIES_ID SPECIES_RUN_ID RUN_NUMBER ...
 0: 144220          617874            283        818267         1          ...
 1: 144220          617874            283        818267         1          ...
 2: 144220          617874            283        818267         1          ...
 3: 144220          617875            286        818269         1          ...
 4: 144220          617875            286        818269         1          ...
 5: 144220          617876            153        818271         1          ...
 6: 144220          617876            153        818271         1          ...
 7: 144220          617876            153        818271         1          ...
 8: 144220          617876            153        818271         1          ...
 9: 144220          617876            153        818271         1          ...
    ...             ...               ...        ...            ...        ...
    <int>           <int>    

In [13]:
import (
    "os"
	"github.com/go-gota/gota/dataframe"
)

%%

f, err := os.Open("../dat/FW_Fish_Bulk_Measurements.csv")
if err != nil {
	panic(err)
}

df := dataframe.ReadCSV(f)

fmt.Println(df)

[75807x11] DataFrame

    \ufeffSURVEY_ID SURVEY_SPECIES_ID SPECIES_ID SPECIES_RUN_ID RUN_NUMBER ...
 0: 144220          617878            193        818275         1          ...
 1: 144220          617890            242        818292         1          ...
 2: 144220          617890            242        818293         2          ...
 3: 29319           99086             194        94809          1          ...
 4: 29319           99090             242        94816          1          ...
 5: 29319           99090             242        94817          2          ...
 6: 29323           99137             242        94899          1          ...
 7: 29323           99137             242        94900          2          ...
 8: 29326           99151             242        94923          1          ...
 9: 29342           99286             182        95141          1          ...
    ...             ...               ...        ...            ...        ...
    <int>           <int>     