In [None]:
import sys
# Install prefect into the current Jupyter kernel environment
!{sys.executable} -m pip install prefect

In [None]:
import sys
import os
import pandas as pd

# --- Setup Project Environment ---

# Define absolute paths for clarity
project_root = '/Users/pjsmitty301/ca-biositing'
etl_project_dir = os.path.join(project_root, 'etl_merge/my_etl_project')
src_path = os.path.join(etl_project_dir, 'src')

# Add the project's 'src' directory to the Python path for imports
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# --- Run Your Code in the Correct Context ---

# Temporarily change the current working directory
# This is important so the script can find the 'credentials.json' file
original_dir = os.getcwd()
os.chdir(etl_project_dir)

print(f"Temporarily changed CWD to: {os.getcwd()}")
print("-" * 30)

# Now, import and run your function.
# The @task decorator is ignored when calling the function directly.
from etl.extract.basic_sample_info import extract_basic_sample_info

print("Attempting to extract data...")
df = extract_basic_sample_info()

# Good practice: change back to the original directory
os.chdir(original_dir)
print(f"\nRestored CWD to: {os.getcwd()}")

# --- Display Results ---
if df is not None:
    print("\nData extracted successfully! Displaying the first 5 rows:")
    display(df.head())
else:
    print("\nExtraction failed. Please double-check the console output.")

In [None]:
# You can now work with the 'df' DataFrame below.
# For example, to see the columns, run: df.columns
df

In [None]:
biomass_df1 = df[["Material_type", "Primary_crop"]]
biomass_df1.drop_duplicates()

In [None]:
import pandas as pd
from sqlmodel import Session, select
from database import engine  # Import the engine from your project's database module
from models.biomass import PrimaryProduct # Import the model for the table you want to query

print("Querying the database for primary products...")

# Use a session to connect to the database
with Session(engine) as session:
    # Create a statement to select all from the primary_product table
    statement = select(PrimaryProduct)
    
    # Use pandas to execute the query and load results into a DataFrame
    products_df = pd.read_sql(statement, session.connection())

print("Successfully loaded data from the 'primary_product' table.")
display(products_df.head())

In [None]:
from etl.extract.basic_sample_info import extract_basic_sample_info

df1 = extract_basic_sample_info()

In [None]:
df1

In [None]:
df2 = df1["Material_type"].unique()

In [None]:
from etl.extract.experiments import extract_experiments

df1 = extract_experiments()

In [None]:
df1

In [None]:
df1[["Analysis_type", "Analysis_abbrev"]].drop_duplicates()

In [None]:
test