# NFPD data exploration

> These are Go notebooks: In order to use the GoNB Jupyter Kernel, please install GoNB from here: https://github.com/janpfeifer/gonb

Note also that for local package development, you can put: `!*go mod edit -replace "github.com/umbralcalc/anglersim=/path/to/anglersim"` at the top of any cell.

In this notebook we're exploring the data from the NFPD bulk download facility.

In [None]:
!*go mod edit -replace "github.com/umbralcalc/anglersim=/home/robert/Code/anglersim"

In [None]:
import (
    "bufio"
	"encoding/csv"
	"fmt"
	"os"
	"strings"
	"time"

	"github.com/go-gota/gota/dataframe"
	"github.com/go-gota/gota/series"
	"github.com/umbralcalc/anglersim/pkg/nfpd"
	"github.com/umbralcalc/stochadex/pkg/analysis"
	gonb_echarts "github.com/janpfeifer/gonb-echarts"
)

func flatten(records [][]string) []string {
	lines := make([]string, len(records))
	for i, row := range records {
		lines[i] = strings.Join(row, ",")
	}
	return lines
}

%%

uniqueSites, _ := nfpd.GetUniqueSiteNames("../dat/FW_Fish_Counts.csv")

file, err := os.Open("../dat/FW_Fish_Counts.csv")
if err != nil {
	panic(err)
}
defer file.Close()

scanner := bufio.NewScanner(file)

var headers []string
var filteredRows [][]string

// Read and parse the header
if scanner.Scan() {
	line := scanner.Text()
	r := csv.NewReader(strings.NewReader(line))
	headers, err = r.Read()
	if err != nil {
		panic(err)
	}
	filteredRows = append(filteredRows, headers)
}

// Find index of SITE_NAME column
siteNameIndex := -1
for i, h := range headers {
	if h == "SITE_NAME" {
		siteNameIndex = i
		break
	}
}
if siteNameIndex == -1 {
	panic("SITE_NAME column not found")
}

// Filter rows using bufio.Scanner
siteName := uniqueSites[8]
for scanner.Scan() {
	line := scanner.Text()
	r := csv.NewReader(strings.NewReader(line))
	record, err := r.Read()
	if err != nil {
		continue // skip malformed line
	}
	if record[siteNameIndex] == siteName {
		filteredRows = append(filteredRows, record)
	}
}

if err := scanner.Err(); err != nil {
	panic(err)
}

// Create dataframe from filtered records
fdf := dataframe.ReadCSV(strings.NewReader(strings.Join(flatten(filteredRows), "\n")))

// Convert EVENT_DATE to timestamp
timestamps := make([]float64, fdf.Nrow())
for i, record := range fdf.Col("EVENT_DATE").Records() {
	t, err := time.Parse("02/01/2006", record)
	if err != nil {
		panic(err)
	}
	timestamps[i] = float64(t.Unix())
}
fdf = fdf.Mutate(series.New(timestamps, series.Float, "TIMESTAMP"))

fmt.Println(fdf)

scatter := analysis.NewScatterPlotFromDataFrame(
	&fdf, 
	"TIMESTAMP",
	"ALL_RUNS",
	"SPECIES_NAME",
)
xAxis := fdf.Col("TIMESTAMP").Float()
yAxis := fdf.Col("ALL_RUNS").Float()
scatter.SetGlobalOptions(
	charts.WithTitleOpts(opts.Title{
		Title: "Site: " + siteName,
		Bottom: "1%",
	}),
	charts.WithYAxisOpts(opts.YAxis{
		Min: yAxis[0],
		Max: yAxis[len(yAxis)-1],
	}),
	charts.WithXAxisOpts(opts.XAxis{
		Min: xAxis[0],
		Max: xAxis[len(xAxis)-1],
	}),
)

gonb_echarts.Display(scatter, "width: 1024px; height:400px; background: white;")

In [None]:
import (
    "os"
	"github.com/go-gota/gota/dataframe"
)

%%

f, err := os.Open("../dat/FW_Fish_Data_Types.csv")
if err != nil {
	panic(err)
}

df := dataframe.ReadCSV(f)

fmt.Println(df)

In [None]:
import (
    "os"
	"github.com/go-gota/gota/dataframe"
)

%%

f, err := os.Open("../dat/FW_Fish_Sites.csv")
if err != nil {
	panic(err)
}

df := dataframe.ReadCSV(f)

fmt.Println(df)

In [None]:
import (
    "bufio"
	"bytes"
	"fmt"
	"os"

	"github.com/go-gota/gota/dataframe"
)

%%

const maxRows = 100

f, err := os.Open("../dat/FW_Fish_Individual_Lengths.csv")
if err != nil {
	panic(err)
}

scanner := bufio.NewScanner(f)
var buffer bytes.Buffer

if scanner.Scan() {
	buffer.WriteString(scanner.Text() + "\n")
} else {
	panic("Empty file")
}

count := 0
for scanner.Scan() {
	buffer.WriteString(scanner.Text() + "\n")
	count++
	if count >= maxRows {
		break
	}
}
if err := scanner.Err(); err != nil {
	panic(err)
}

df := dataframe.ReadCSV(bytes.NewReader(buffer.Bytes()))

fmt.Println(df)

In [None]:
import (
    "os"
	"github.com/go-gota/gota/dataframe"
)

%%

f, err := os.Open("../dat/FW_Fish_Banded_Measurements.csv")
if err != nil {
	panic(err)
}

df := dataframe.ReadCSV(f)

fmt.Println(df)

In [None]:
import (
    "os"
	"github.com/go-gota/gota/dataframe"
)

%%

f, err := os.Open("../dat/FW_Fish_Bulk_Measurements.csv")
if err != nil {
	panic(err)
}

df := dataframe.ReadCSV(f)

fmt.Println(df)