In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
df = session.table('disaster').to_pandas()

In [None]:
df.columns

In [None]:
df.dropna(subset=['TEXT'], inplace=True)

Data analysis

In [None]:
label_counts = df['LABEL'].value_counts()
print(label_counts)

In [None]:
Prompt = "You are a text classifier model, Given a sentence, classify if it is a disaster or not, Answer 'yes' or 'no' only. the sentence is - "

In [None]:
from snowflake.cortex import Complete
def classify(question):
    prompt = Prompt + question
    return Complete('gemma-7b', prompt)

In [None]:
classify("Lets go for shopping. I prefer Quatar, Lithuanza and Air Asia. Which one you like")

### Finetuning

In [None]:
def create_prompt(row):
    return Prompt + row['TEXT']
    

# Apply the prompt creation to each row in the DataFrame
df['PROMPT'] = df.apply(create_prompt, axis=1)

# Show the DataFrame with generated prompts
print(df[['TEXT', 'PROMPT']])

In [None]:
df2 = df[:500]
df2 = df2.drop(['KEYWORD', 'LOCATION', 'INDEX'], axis=1)
df2['LABEL'] = df2['LABEL'].replace({1: 'yes', 0: 'no'}).astype(str)

In [None]:
df2.head()

In [None]:
df2.to_csv('dataframe.csv', index=False)

In [None]:
session.sql("CREATE OR REPLACE STAGE my_stage").collect()

session.file.put('dataframe.csv', '@my_stage')

print("Files in the stage:")
files = session.sql("LIST @my_stage").collect()
for file in files:
    print(file)

In [None]:
create_table_sql = """
CREATE OR REPLACE TABLE table_data (
    text STRING,
    label STRING,
    prompt STRING
);
"""
session.sql(create_table_sql).collect()

copy_into_sql = """
COPY INTO table_data
FROM @my_stage/dataframe.csv.gz
FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY='"' FIELD_DELIMITER=',' SKIP_HEADER=1)
ON_ERROR = 'CONTINUE';
"""

try:
    session.sql(copy_into_sql).collect()
    print("Data loaded into 'table_data' table successfully!")
except Exception as e:
    print("Error loading data into 'table_data':")
    print(f"Error Message: {e}")

In [None]:
select * from table_data limit 10;

In [None]:
SELECT SNOWFLAKE.CORTEX.FINETUNE(
  'CREATE',                       -- The operation type (CREATE, UPDATE, etc.)
  'my_model1',                -- The name of the new fine-tuned model
  'llama3.1-8b',                    -- The base model you want to fine-tune
  'SELECT prompt AS prompt, label AS completion FROM table_data' 
);

In [None]:
SELECT SNOWFLAKE.CORTEX.FINETUNE(
  'DESCRIBE',
  'CortexFineTuningWorkflow_323f8532-dc8d-4068-8a85-943511f335ad'
);

In [None]:
SELECT SNOWFLAKE.CORTEX.FINETUNE(
  'SHOW'
);

In [None]:
def tuned_model(query):
    prompt = Prompt + query
    print(prompt)
    return Complete("my_model1", prompt)

In [None]:
tuned_model("there are landslides in hilly regions")