In [2]:
import pandas as pd
import numpy as np

In [3]:

biomass_df = pd.read_csv("biomass.csv")


biomass_df.columns = biomass_df.columns.str.strip()


if "Year" in biomass_df.columns:
    biomass_df = biomass_df.rename(columns={"Year": "year"})

biomass_df.head()

Unnamed: 0,country,year,biomass_relative_to
0,Atlantic halibut Gulf of Maine / Georges Bank,1800,1.979.592
1,Atlantic halibut Gulf of Maine / Georges Bank,1801,1.979.592
2,Atlantic halibut Gulf of Maine / Georges Bank,1802,1.979.592
3,Atlantic halibut Gulf of Maine / Georges Bank,1803,1.979.592
4,Atlantic halibut Gulf of Maine / Georges Bank,1804,1.977.551


In [4]:

np.random.seed(42)

base_temp = 26          
warming_trend = 0.02    
noise_std = 0.5         

min_year = biomass_df["year"].min()

# Create country-based variation
country_variation = {
    country: np.random.uniform(-1.5, 1.5)
    for country in biomass_df["country"].unique()
}

country_variation

{'Atlantic halibut Gulf of Maine / Georges Bank': -0.3763796434579125,
 'Red snapper Gulf of Mexico': 1.3521429192297485,
 'English sole Pacific Coast': 0.6959818254342154,
 'Petrale sole Pacific Coast': 0.2959754525911098,
 'Spanish mackerel Gulf of Mexico': -1.0319440786726903,
 'Pacific halibut North Pacific': -1.0320164389913922,
 'American lobster Southern Gulf of St. Lawrence': -1.3257491634954017,
 'Black rockfish Oregon Coast': 1.0985284373248057,
 'Canary rockfish Pacific Coast': 0.3033450352296265,
 'Chilipepper Southern Pacific Coast': 0.6242177333881367,
 'Sharpchin rockfish Pacific Coast': -1.4382465171125927,
 'Yellowtail rockfish Northern Pacific Coast': 1.409729556485983,
 'American lobster Lobster Fishing Area 34': 0.997327922401265,
 'American lobster Lobster Fishing Areas 35-38': -0.8629826679651715,
 'Rex sole Pacific Coast': -0.9545250983786981,
 'New Zealand snapper New Zealand SNA 1 Bay of Plenty and Hauraki Gulf': -0.9497864704396985,
 'New Zealand snapper New Z

In [5]:

sst_values = []

for _, row in biomass_df.iterrows():
    year_effect = (row["year"] - min_year) * warming_trend
    country_effect = country_variation[row["country"]]
    noise = np.random.normal(0, noise_std)

    sst = base_temp + year_effect + country_effect + noise
    sst_values.append(round(sst, 2))

# Add SST column
biomass_df["SST"] = sst_values

biomass_df.head()

Unnamed: 0,country,year,biomass_relative_to,SST
0,Atlantic halibut Gulf of Maine / Georges Bank,1800,1.979.592,27.11
1,Atlantic halibut Gulf of Maine / Georges Bank,1801,1.979.592,25.6
2,Atlantic halibut Gulf of Maine / Georges Bank,1802,1.979.592,25.2
3,Atlantic halibut Gulf of Maine / Georges Bank,1803,1.979.592,26.06
4,Atlantic halibut Gulf of Maine / Georges Bank,1804,1.977.551,25.14


In [6]:

sst_df = biomass_df[["country", "year", "SST"]]

sst_df.head()

Unnamed: 0,country,year,SST
0,Atlantic halibut Gulf of Maine / Georges Bank,1800,27.11
1,Atlantic halibut Gulf of Maine / Georges Bank,1801,25.6
2,Atlantic halibut Gulf of Maine / Georges Bank,1802,25.2
3,Atlantic halibut Gulf of Maine / Georges Bank,1803,26.06
4,Atlantic halibut Gulf of Maine / Georges Bank,1804,25.14


In [8]:

sst_df.to_csv("synthetic_sst_with_country_year.csv", index=False)

print("CSV file generated successfully!")

CSV file generated successfully!
