In [3]:
# !pip install -U -q imblearn
import os, types
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn import metrics

### Authenticating Google Drive and importing neccessary dataset

In [4]:
data = pd.read_csv('heart_2020_oversampled.csv')
data.head()

Unnamed: 0,BMI,PhysicalHealth,MentalHealth,SleepTime,Smoking,AlcoholDrinking,Stroke,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,Asthma,KidneyDisease,SkinCancer,HeartDisease
0,16.6,3.0,30.0,5.0,Yes,No,No,No,Female,55-59,White,Yes,Yes,Very good,Yes,No,Yes,No
1,20.34,0.0,0.0,7.0,No,No,Yes,No,Female,80 or older,White,No,Yes,Very good,No,No,No,No
2,26.58,20.0,30.0,8.0,Yes,No,No,No,Male,65-69,White,Yes,Yes,Fair,Yes,No,No,No
3,24.21,0.0,0.0,6.0,No,No,No,No,Female,75-79,White,No,No,Good,No,No,Yes,No
4,23.71,28.0,0.0,8.0,No,No,No,Yes,Female,40-44,White,No,Yes,Very good,No,No,No,No


In [5]:
X, y = data.iloc[:, :-1],data.iloc[:, -1]
y = y.map({'No':0, 'Yes':1})
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [6]:
numeric_columns = data._get_numeric_data().columns
categorical_columns = data.select_dtypes(include=['object']).columns[:-1]

In [7]:
categories_list = [['No', 'Yes'], ['No', 'Yes'], ['No', 'Yes'], ['No', 'Yes'], ['Male', 'Female'], ['18-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74', '75-79', '80 or older'],
['White', 'Black', 'Asian', 'American Indian/Alaskan Native', 'Hispanic', 'Other'], ['No', 'No, borderline diabetes', 'Yes (during pregnancy)', 'Yes'],
['No', 'Yes'], ['Poor', 'Fair', 'Good', 'Very good', 'Excellent'], ['No', 'Yes'], ['No', 'Yes'], ['No', 'Yes']]

In [8]:
preprocessing = ColumnTransformer(
    [('norm', StandardScaler(), numeric_columns),
    ('categor', OrdinalEncoder(categories=categories_list), categorical_columns)])

classifier = RandomForestClassifier()

model = Pipeline([('preprocessing', preprocessing),('classifier', classifier)])

In [None]:
pipe = model.fit(X, y)

In [11]:
!pip install ibm_watson_machine_learning --upgrade
from ibm_watson_machine_learning import APIClient

wml_credentials = {
    "url": "https://us-south.ml.cloud.ibm.com",
    "apikey": ""
}

client = APIClient(wml_credentials)

In [12]:
def guide_from_space_name(client, space_name):
    space = client.spaces.get_details()
    return (next(item for item in space['resources'] if item['entity']['name']== space_name)['metadata']['id'])

space_uid = guide_from_space_name(client, 'heart-disease-risk')
print('Space UID:', space_uid)

Space UID: 5618c926-9b78-4d9a-9b7c-a46bc645cb2d


In [13]:
client.set.default_space(space_uid)

'SUCCESS'

In [17]:
client.software_specifications.list(limit=100)

-------------------------------  ------------------------------------  ----
NAME                             ASSET_ID                              TYPE
default_py3.6                    0062b8c9-8b7d-44a0-a9b9-46c416adcbd9  base
pytorch-onnx_1.3-py3.7-edt       069ea134-3346-5748-b513-49120e15d288  base
scikit-learn_0.20-py3.6          09c5a1d0-9c1e-4473-a344-eb7b665ff687  base
spark-mllib_3.0-scala_2.12       09f4cff0-90a7-5899-b9ed-1ef348aebdee  base
pytorch-onnx_rt22.1-py3.9        0b848dd4-e681-5599-be41-b5f6fccc6471  base
ai-function_0.1-py3.6            0cdb0f1e-5376-4f4d-92dd-da3b69aa9bda  base
shiny-r3.6                       0e6e79df-875e-4f24-8ae9-62dcc2148306  base
tensorflow_2.4-py3.7-horovod     1092590a-307d-563d-9b62-4eb7d64b3f22  base
pytorch_1.1-py3.6                10ac12d6-6b30-4ccd-8392-3e922c096a92  base
tensorflow_1.15-py3.6-ddl        111e41b3-de2d-5422-a4d6-bf776828c4b7  base
runtime-22.1-py3.9               12b83a17-24d8-5082-900f-0ab31fbfd3cb  base
scikit-learn

In [20]:
software_spec_uid = client.software_specifications.get_uid_by_name("runtime-22.1-py3.9")
software_spec_uid

'12b83a17-24d8-5082-900f-0ab31fbfd3cb'

In [None]:
scikit_model_details = client.repository.store_model(model=model, meta_props={
    client.repository.ModelMetaNames.NAME: "decisionTree",
    client.repository.ModelMetaNames.TYPE: "scikit-learn_1.0",
    client.repository.ModelMetaNames.SOFTWARE_SPEC_UID: software_spec_uid
})

model_id_2 = client.repository.get_model_id(scikit_model_details)
model_id_2