In [None]:
!pip install uv
!uv pip install -r  requirements.txt
!uv pip install streamlit
!uv pip install -U ipywidgets
!uv pip install shap snowflake-ml-python==1.19.0

In [None]:
%%sql -r dataframe_1
select current_role();

In [None]:
#Update this VERSION_NUM to version your features, models etc!
VERSION_NUM = '0'
DB = "EY_DATA_CHALLENGE" 
SCHEMA = "DATA_SCHEMA" 
ROLE ="ACCOUNTADMIN"

In [None]:
import pandas as pd
import numpy as np
import sklearn
import math
import pickle
import shap
from datetime import datetime
import streamlit as st
from xgboost import XGBClassifier

# Snowpark ML
from snowflake.ml.registry import Registry
from snowflake.ml.modeling.tune import get_tuner_context
from snowflake.ml.modeling import tune
from entities import search_algorithm

#Snowflake feature store
from snowflake.ml.feature_store import FeatureStore, FeatureView, Entity, CreationMode

# Snowpark session
from snowflake.snowpark import DataFrame
from snowflake.snowpark.functions import col, to_timestamp, min, max, month, dayofweek, dayofyear, avg, date_add, sql_expr,year,quarter,date_trunc
from snowflake.snowpark.types import IntegerType
from snowflake.snowpark import Window

#setup snowpark session
from snowflake.snowpark.context import get_active_session
session = get_active_session()

session.use_database(DB)
session.use_schema(SCHEMA)
session

In [None]:
print("Reading table data...")
df_waterquality = session.table("LANDSAT_TRAINING")
df_waterquality.show(5)
df_waterquality.select(min("SAMPLE_DATE"),max("SAMPLE_DATE"))



### Feature Engineering with Snowpark APIs

In [None]:
#Create a dict with keys for feature names and values containing transform code

feature_eng_dict = dict()

#Timstamp features
feature_eng_dict["MONTH"] = month("SAMPLE_DATE")
feature_eng_dict["QUARTER"] = quarter("SAMPLE_DATE") 
feature_eng_dict["YEAR"] = year("SAMPLE_DATE") 
feature_eng_dict["QUARTER_DATE"] = date_trunc("quarter", col("SAMPLE_DATE"))

##Spectral Indexes

#NDMI (Normalized Difference Moisture Index) - Useful for detecting wetland conditions affecting water quality
#	Formula: (NIR - SWIR) / (NIR + SWIR)
#	Measures water content in vegetation and soil moisture

#MNDWI (Modified Normalized Difference Water Index) -Better for turbid water identification
#	Formula: (Green - SWIR) / (Green + SWIR)
#	Enhances water body detection, suppresses soil/vegetation noise

#1. NDWI: (Green - NIR) / (Green + NIR) - Water body delineation
#2. NDTI: (Red - Green) / (Red + Green) - Turbidity measurement

## EC Indexes 
#3. Salinity Index: (Red - NIR) / (Red + NIR)
#4. Band ratios: Blue/Red, SWIR/NIR combinations

## Useful Indices for DRP Prediction:

#5.Chlorophyll Index (CI): (NIR/Red) - 1, indicates algae from phosphorus
feature_eng_dict["CI"] = col("NIR") / col()


