In [None]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col
import configparser
import pandas as pd

In [None]:
# Importing the required PYPI and Libraries. 
import snowflake
from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
session.sql("CREATE WAREHOUSE IF NOT EXISTS COMPUTE_WH WITH WAREHOUSE_SIZE='X-SMALL'").collect()
session.sql("CREATE DATABASE IF NOT EXISTS SNOWPARK_DEFINITIVE_GUIDE").collect()
session.sql("CREATE SCHEMA IF NOT EXISTS SNOWPARK_DEFINITIVE_GUIDE.MY_SCHEMA").collect()
session.sql("CREATE STAGE IF NOT EXISTS SNOWPARK_DEFINITIVE_GUIDE.MY_SCHEMA.MY_STAGE").collect()

In [None]:
bike_share_df = pd.read_csv("bike_sharing_demand.csv")
# Check details of the dataframe. 
bike_share_df.info()
bike_share_df.columns




In [None]:
session.use_database("SNOWPARK_DEFINITIVE_GUIDE")
session.use_schema("MY_SCHEMA")

bike_share_df = pd.read_csv("bike_sharing_demand.csv")
# Renaming Column Names To Maintain Snowflake Convention
bike_share_df.columns = [column.upper() for column in bike_share_df.columns]

# Write the dataframe to a table in the database.

session.write_pandas(
     bike_share_df,
     "BSD_TRAINING",
     database = "SNOWPARK_DEFINITIVE_GUIDE",
     schema = "MY_SCHEMA",
     overwrite = True,
     auto_create_table =True
)




In [None]:
df_table=session.table("BSD_TRAINING")
row_count= df_table.count()
col_count = len(df_table.columns)
print(f"Table row cont:{row_count} \nTable cokumn count:{col_count} ")

df_table.sample(n=5).show()

In [None]:
df_table.schema.fields
df_table.columns
df_table.show()
df_table.select("windspeed","datetime").filter(df_table['WINDSPEED']==0).show(5)
df_table.select("windspeed","datetime").filter(col('DATETIME')=="2011-01-01 04:00:00").show()


In [None]:
from snowflake.snowpark.functions import count,col
data_types = df_table.schema
for column in df_table.columns:
    #print(df_table.agg(count(col(column))).collect()[0][0])
    print(f"Null values in {column} is {row_count - df_table.agg(count(col(column))).collect()[0][0]}")

#check values that are 0 in the windspeed columnm
print(f"Zero Values in windspeed column is {df_table.filter(df_table['WINDSPEED']==0).count()}")



In [None]:
from snowflake.snowpark.functions import iff, avg
wind_speed_mean = df_table.select(avg("windspeed")).collect()[0][0]
df_train = df_table.replace({0:wind_speed_mean}, subset=["windspeed"])
df_train.show()
df_train.write.mode("overwrite").save_as_table("model_data")

In [None]:
#check values that are 0 in the windspeed columnm
print(f"Zero Values in windspeed column is {df_train.filter(df_table['WINDSPEED']==0).count()}")
print(f"Zero Values in windspeed column equal to the windspeed mean value of {wind_speed_mean} :  {df_train.filter(df_table['WINDSPEED']==wind_speed_mean).count()}")