In [None]:
! pip install snowflake-ml-python -U

Collecting snowflake-ml-python
[?25l  Downloading https://files.pythonhosted.org/packages/05/1f/738b02586c868aed616def737786b5849fe7b7475cfc6e3ee2f1c6badacc/snowflake_ml_python-1.0.8-py3-none-any.whl (1.7MB)
[K     |████████████████████████████████| 1.7MB 8.1MB/s eta 0:00:01
[?25hCollecting cloudpickle
  Using cached https://files.pythonhosted.org/packages/15/80/44286939ca215e88fa827b2aeb6fa3fd2b4a7af322485c7170d6f9fd96e0/cloudpickle-2.2.1-py3-none-any.whl


In [24]:
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.modeling.xgboost import XGBClassifier
from snowflake.ml.modeling.linear_model import SGDClassifier
from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder, OneHotEncoder
from sklearn.metrics import mean_absolute_percentage_error

In [2]:
from snowflake.snowpark import Session
import configparser
def get_session():
    parser = configparser.ConfigParser()
    # Add the credential file name here
    parser.read('/notebooks/notebooks/config.ini')

    connection_params = dict(user=parser['Credentials']['user'], 
                         password=parser['Credentials']['password'], 
                         account=parser['Credentials']['account'], 
                         warehouse=parser['Credentials']['warehouse'], 
                         database=parser['Credentials']['database'],
                         schema=parser['Credentials']['schema'], 
                         role=parser['Credentials']['role'])

    session = Session.builder.configs(connection_params).create()
    return session



In [3]:
session = get_session()

# Model Building
### Load the final data from snowflake table

In [4]:
table_name = 'predictive_maintenance_final'

In [5]:
sf_df = session.table(table_name).drop('ROW')

In [6]:
sf_df.show(3)

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FAILURE"  |"METRIC5"  |"METRIC6"  |"DAY"  |"DAY_WEEK"  |"SECTOR"  |"OP_PERIOD"  |"DEV_RECONNECTED"  |"MNW1"    |"DIF_M6"  |"DIF_M5"  |"LOG_M2"           |"LOG_M3"            |"LOG_M4"            |"LOG_M7"  |"LOG_M9"           |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|0          |6          |407438     |1      |3           |S1F0      |1            |0                  |26953834  |0         |0         |2.079441541679836  |0.0                 |3.9702919135521215  |0.0       |2.079441541679836  |
|0          |6          |403174     |1      |3           |S1F0      |1          

In [8]:
a = sf_df.to_pandas()

In [11]:
list(a.columns)

['FAILURE',
 'METRIC5',
 'METRIC6',
 'DAY',
 'DAY_WEEK',
 'SECTOR',
 'OP_PERIOD',
 'DEV_RECONNECTED',
 'MNW1',
 'DIF_M6',
 'DIF_M5',
 'LOG_M2',
 'LOG_M3',
 'LOG_M4',
 'LOG_M7',
 'LOG_M9']

In [10]:
a.head()

Unnamed: 0,FAILURE,METRIC5,METRIC6,DAY,DAY_WEEK,SECTOR,OP_PERIOD,DEV_RECONNECTED,MNW1,DIF_M6,DIF_M5,LOG_M2,LOG_M3,LOG_M4,LOG_M7,LOG_M9
0,0,6,407438,1,3,S1F0,1,0,26953834,0,0,2.079442,0.0,3.970292,0.0,2.079442
1,0,6,403174,1,3,S1F0,1,0,7671335,0,0,0.0,1.386294,0.0,0.0,0.0
2,0,12,237394,1,3,S1F0,1,0,21661996,0,0,0.0,0.0,0.0,0.0,0.0
3,0,6,410186,1,3,S1F0,1,0,9961753,0,0,0.0,0.0,0.0,0.0,0.0
4,0,15,313173,1,3,S1F0,1,0,16996310,0,0,0.0,0.0,0.0,0.0,1.386294


In [13]:
CATEGORICAL_COLUMNS = ["SECTOR"]
CATEGORICAL_COLUMNS_OE = ["SECTOR_OE"]
NUMERICAL_COLUMNS = ['METRIC5','METRIC6','DAY','DAY_WEEK','OP_PERIOD','MNW1','DIF_M6','DIF_M5','LOG_M2',
 'LOG_M3','LOG_M4','LOG_M7','LOG_M9']
LABEL_COLUMNS = ["FAILURE"]
OUTPUT_COLUMNS = ["PREDICTION"]

In [15]:
train_df, test_df = sf_df.random_split([0.8,0.2])

In [16]:
train_df.show(3)

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FAILURE"  |"METRIC5"  |"METRIC6"  |"DAY"  |"DAY_WEEK"  |"SECTOR"  |"OP_PERIOD"  |"DEV_RECONNECTED"  |"MNW1"    |"DIF_M6"  |"DIF_M5"  |"LOG_M2"           |"LOG_M3"  |"LOG_M4"            |"LOG_M7"  |"LOG_M9"           |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|0          |6          |407438     |1      |3           |S1F0      |1            |0                  |26953834  |0         |0         |2.079441541679836  |0.0       |3.9702919135521215  |0.0       |2.079441541679836  |
|0          |12         |237394     |1      |3           |S1F0      |1            |0                  |21661996  |0     

Create a pipeline with preprocessing steps and model definition

In [25]:
pipeline = Pipeline(
    steps=[
            (
                "OHE",
                OneHotEncoder(
                input_cols=CATEGORICAL_COLUMNS,
                output_cols=CATEGORICAL_COLUMNS_OE
                )
            ),
#             (
#                 "MMS",
#                 MinMaxScaler(
#                 clip=True,
#                 input_cols=NUMERICAL_COLUMNS,
#                 output_cols=NUMERICAL_COLUMNS,
#                 )
#             ),
            (
                "classification",
                SGDClassifier(
                input_cols=NUMERICAL_COLUMNS,
                label_cols=LABEL_COLUMNS,
                output_cols=OUTPUT_COLUMNS
                )
            )
    ]
)

In [26]:
pipeline.fit(train_df)

  success, nchunks, nrows, ci_output = write_pandas(
The version of package 'snowflake-snowpark-python' in the local environment is 1.8.0, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'numpy' in the local environment is 1.24.4, which does not fit the criteria for the requirement 'numpy==1.24.3'. Your UDF might not work when the package version is different between the server and your local environment.


SnowparkSQLException: (1304): 01af1b5b-0503-cad2-0072-f3030bf44a4a: 100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "/home/udf/32355341555882050/udf_py_2105021860.zip/udf_py_2105021860.py", line 81, in compute
    return func(session,arg1,arg2,arg3,arg4,arg5,arg6)
  File "/tmp/pip_packages/snowflake/ml/modeling/linear_model/sgd_classifier.py", line 539, in fit_wrapper_sproc
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/telemetry.py", line 76, in wrap
    result = func(*args, **kwargs)
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/dataframe.py", line 492, in to_pandas
    result = self._session._conn.execute(self._plan, to_pandas=True, **kwargs)
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/server_connection.py", line 354, in execute
    result_set, result_meta = self.get_result_set(
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py", line 84, in wrap
    return func(*args, **kwargs)
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/server_connection.py", line 390, in get_result_set
    result = self.run_query(
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/server_connection.py", line 102, in wrap
    raise ex
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/server_connection.py", line 95, in wrap
    return func(*args, **kwargs)
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/server_connection.py", line 311, in run_query
    raise ex
  File "/usr/lib/python_udf/47551a7bfd00f4b7db3a8f702d9cb264485e91e604075ad17c0dfcb8e138b05a/lib/python3.8/site-packages/snowflake/snowpark/_internal/server_connection.py", line 304, in run_query
    results_cursor = self._cursor.execute(query, **kwargs)
TypeError: execute() got an unexpected keyword argument 'statement_params'
 in function SNOWML_FIT_55CD6BB6_5A5A_476A_BF20_47C785096CB9 with handler udf_py_2105021860.compute

In [None]:
result = pipeline.predict(test_df)