# Testing cross project artifacts

In [1]:
!pip install -U xgboost
!pip install scikit-learn



## 1. artifact generating function 

In [2]:
#mlrun: start-code

import mlrun
import pandas as pd 
import json
import os
from xgboost import XGBClassifier
import pickle
from mlrun.artifacts.base import DirArtifact
from mlrun import MLClientCtx
from sklearn.datasets import load_iris
from io import BytesIO
from sklearn.model_selection import train_test_split

def get_dataitem(context: MLClientCtx,
                             key: str):
    
    for artifact in context.artifacts:
        if artifact['kind'] == 'model' and artifact['metadata'].get('key',None) == key:
            return mlrun.get_dataitem(artifact['spec']['target_path'] + artifact['spec']['model_file'])
        elif artifact['kind'] == 'dataset' and artifact['metadata'].get('key',None) == key:
            return mlrun.get_dataitem(artifact['spec']['target_path'])
        elif artifact['metadata'].get('key',None) == key:
            return mlrun.get_dataitem(artifact['spec']['target_path'])
    context.logger.info('Artifact not found')
    
def log_transactions(context: MLClientCtx,
                    ):
        
    # uploading new artifact 
    df_encode = pd.DataFrame(load_iris()['data']).to_json().encode()
    context.log_artifact('encoded_iris-'+context.artifact_path[:2], body=df_encode, local_path='encoded_iris-'+context.artifact_path[:2]+'.csv')
    # reading artifact
    trans_df = pd.DataFrame(json.loads(get_dataitem(context, 'encoded_iris-'+context.artifact_path[:2]).get()))
    context.logger.info(f'dataframe shape : {trans_df.shape}')
    
    # training the model (for serving purposes )
    bst = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='binary:logistic')
    X,y = load_iris(return_X_y=True)
    X_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)
    bst.fit(X_train, y_train)
    # logging a model
    context.log_model('bst_model', body=pickle.dumps(bst), model_file='bst.pkl')
    # getting the model remote
    model = pickle.loads(get_dataitem(context, 'bst_model').get())
    context.logger.info(f'logged model : {model.__class__}')
    
    # Logging directory
    context.log_artifact(DirArtifact(key='my_project', target_path=context.artifact_path))
    
    # Logging dataset 
    context.log_dataset(key = 'iris_dataset-'+context.artifact_path[:2],
                        df = pd.DataFrame(json.loads(get_dataitem(context, 'encoded_iris-'+context.artifact_path[:2]).get())),
                        local_path='iris_dataset-'+context.artifact_path[:2]+'.csv')
    # Getting dataset
    context.logger.info(f'logged dataset {get_dataitem(context, "iris_dataset-" + context.artifact_path[:2]).as_df().shape}')
    
    return
            
#mlrun: end-code

## 2. Creating projects, setting & running artifact generating function

In [3]:
import mlrun
import os

# Initialize the MLRun project object
project1 = mlrun.get_or_create_project('cross-project1',user_project=True,context=os.path.join(os.getcwd(), 'test-notebooks1'))

project1.set_function(name='log_transactions', kind='job', image='mlrun/ml-models', handler='log_transactions')

project1.get_function('log_transactions').run(local=False)

> 2023-03-06 07:44:29,650 [info] Created and saved project cross-project1-normal-user: {'from_template': None, 'overwrite': False, 'context': '/User/test/test-notebooks/project_transfer/test-notebooks1', 'save': True}
> 2023-03-06 07:44:29,651 [info] created project cross-project1 and saved in MLRun DB
> 2023-03-06 07:45:13,654 [info] starting run log-transactions-log-transactions uid=f9bdf9fb1c1342a1986a66e2bae894ee DB=http://mlrun-api:8080
> 2023-03-06 07:45:13,908 [info] Job is running in the background, pod: log-transactions-log-transactions-p67lv
> 2023-03-06 07:45:21,985 [info] dataframe shape : (150, 4)
> 2023-03-06 07:45:22,062 [info] logged model : <class 'xgboost.sklearn.XGBClassifier'>
> 2023-03-06 07:45:22,185 [info] logged dataset (150, 5)
> 2023-03-06 07:45:22,282 [info] To track results use the CLI: {'info_cmd': 'mlrun get run f9bdf9fb1c1342a1986a66e2bae894ee -p cross-project1-normal-user', 'logs_cmd': 'mlrun logs f9bdf9fb1c1342a1986a66e2bae894ee -p cross-project1-normal

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
cross-project1-normal-user,...bae894ee,0,Mar 06 07:45:20,completed,log-transactions-log-transactions,v3io_user=normal-userkind=jobowner=normal-usermlrun/client_version=1.3.0-rc33mlrun/client_python_version=3.9.16host=log-transactions-log-transactions-p67lv,,,,encoded_iris-v3bst_modelmy_projectiris_dataset-v3





> 2023-03-06 07:45:23,343 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f3d9c406970>

In [4]:
# Initialize the MLRun project object
project2 = mlrun.get_or_create_project('cross-project2',user_project=True,context=os.path.join(os.getcwd(), 'test-notebooks2'))

project2.set_function(name='log_transactions', kind='job', image='mlrun/ml-models', handler='log_transactions')

project2.get_function('log_transactions').run(local=False)

> 2023-03-06 07:45:38,720 [info] Created and saved project cross-project2-normal-user: {'from_template': None, 'overwrite': False, 'context': '/User/test/test-notebooks/project_transfer/test-notebooks2', 'save': True}
> 2023-03-06 07:45:38,721 [info] created project cross-project2 and saved in MLRun DB
> 2023-03-06 07:46:20,823 [info] starting run log-transactions-log-transactions uid=6e2962bf154b433ab7e7b967b9bfa067 DB=http://mlrun-api:8080
> 2023-03-06 07:46:21,061 [info] Job is running in the background, pod: log-transactions-log-transactions-zb6mg
> 2023-03-06 07:46:27,412 [info] dataframe shape : (150, 4)
> 2023-03-06 07:46:27,498 [info] logged model : <class 'xgboost.sklearn.XGBClassifier'>
> 2023-03-06 07:46:27,722 [info] logged dataset (150, 5)
> 2023-03-06 07:46:27,841 [info] To track results use the CLI: {'info_cmd': 'mlrun get run 6e2962bf154b433ab7e7b967b9bfa067 -p cross-project2-normal-user', 'logs_cmd': 'mlrun logs 6e2962bf154b433ab7e7b967b9bfa067 -p cross-project2-normal

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
cross-project2-normal-user,...b9bfa067,0,Mar 06 07:46:25,completed,log-transactions-log-transactions,v3io_user=normal-userkind=jobowner=normal-usermlrun/client_version=1.3.0-rc33mlrun/client_python_version=3.9.16host=log-transactions-log-transactions-zb6mg,,,,encoded_iris-v3bst_modelmy_projectiris_dataset-v3





> 2023-03-06 07:46:30,452 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f3d5d38d400>

## 3. Importing/Exporting artifacts

### 3.1 base artifact

In [None]:
# Exporting project2 artifact
project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').export('proj2_encoded_iris.yaml')

# Importing the artifact from project1
project1.import_artifact('../proj2_encoded_iris.yaml',
                         new_key = 'imported_proj1_encoded_iris')

# Testing the imported artifact
pd.DataFrame(json.loads(project1.get_artifact('imported_proj1_encoded_iris').to_dataitem().get())).head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


### 3.2 dataset artifact

In [None]:
# Exporting project2 model artifact
project2.get_artifact('log-transactions-log-transactions_iris_dataset-v3').export('proj2_dataset.yaml')

# importing model artifact from project2
project1.import_artifact('../proj2_dataset.yaml',
                         new_key = 'imported_proj1_dataset')

# Testing the imported dataset artifact
project1.get_artifact('imported_proj1_dataset').to_dataitem().as_df()

Unnamed: 0.1,Unnamed: 0,0,1,2,3
0,0,5.1,3.5,1.4,0.2
1,1,4.9,3.0,1.4,0.2
2,2,4.7,3.2,1.3,0.2
3,3,4.6,3.1,1.5,0.2
4,4,5.0,3.6,1.4,0.2
...,...,...,...,...,...
145,145,6.7,3.0,5.2,2.3
146,146,6.3,2.5,5.0,1.9
147,147,6.5,3.0,5.2,2.0
148,148,6.2,3.4,5.4,2.3


### 3.3 model artifact

In [None]:
# Exporting project1 model artifact
project1.get_artifact('log-transactions-log-transactions_bst_model').export('proj1_model.yaml')

# importing model artifact from project2
project2.import_artifact('../proj1_model.yaml',
                         new_key = 'imported_proj2_model')


# Testing the imported model artifact
project2_model = pickle.loads(project2.get_artifact('imported_proj2_model')._get_file_body())
project2_model.predict(pd.DataFrame(json.loads((project1.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())))[:5])

array([0, 0, 0, 0, 0])

### 3.4 dir artifact

In [None]:
try: 
    # Exporting project2 dir artifact
    project1.get_artifact('log-transactions-log-transactions_my_project').export('proj1_dir.yaml')

    # importing dir artifact from project2
    project2.import_artifact('../proj1_dir.yaml',
                             new_key = 'imported_proj2_dir')

    # Testing the imported dir artifact
    print(project2.get_artifact('imported_proj2_dir').to_dataitem().listdir())
except Exception as e:
    print(e)

['log-transactions-log-transactions/']


## 4. Importing/Exporting functions

### 4.1 Serving function

#### 4.1.1 Creating serving function, adding model, predicting and deploying

In [None]:
# Setting project1 serving function
project1.set_function(name='proj1-serving',func = mlrun.new_function(name='proj1-serving', kind='serving',image='mlrun/ml-models', command = []))

# adding project2 model to project1 serving function
project1.get_function('proj1-serving').add_model(key = 'my_model',
                                                 class_name = "mlrun.frameworks.xgboost.XGBoostModelServer",
                                                 model_path = project2.get_artifact('log-transactions-log-transactions_bst_model').target_path)

<mlrun.serving.states.TaskStep at 0x7f3d5d39ce50>

In [None]:
# Testing the fused model serving function
server = project1.get_function('proj1-serving').to_mock_server()

server.test(body={'inputs': 
                  pd.DataFrame(json.loads(project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())).values.tolist()[-5:]})

> 2023-03-06 07:46:46,676 [info] model my_model was loaded
> 2023-03-06 07:46:46,678 [info] Loaded ['my_model']


{'id': 'ec6487f434654f37bf959869677c1b92',
 'model_name': 'my_model',
 'outputs': [2, 2, 2, 2, 2]}

In [None]:
project1.deploy_function('proj1-serving')

> 2023-03-06 07:46:46,761 [info] Starting remote function deploy
2023-03-06 07:46:47  (info) Deploying function
2023-03-06 07:46:47  (info) Building
2023-03-06 07:46:47  (info) Staging files and preparing base images
2023-03-06 07:46:47  (info) Building processor image
2023-03-06 07:49:32  (info) Build complete
2023-03-06 07:50:22  (info) Function deploy complete
> 2023-03-06 07:50:28,887 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-cross-project1-normal-user-proj1-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['cross-project1-normal-user-proj1-serving-cross-project-zi3tekpa.default-tenant.app.vmdev92.lab.iguazeng.com/']}


DeployStatus(state=ready, outputs={'endpoint': 'http://cross-project1-normal-user-proj1-serving-cross-project-zi3tekpa.default-tenant.app.vmdev92.lab.iguazeng.com/', 'name': 'cross-project1-normal-user-proj1-serving'})

In [None]:
import time
time.sleep(5)

project1.get_function('proj1-serving').invoke(
    path='/v2/models/my_model/infer', 
    body={'inputs': pd.DataFrame(json.loads(project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())).values.tolist()[-5:]}
)

> 2023-03-06 07:50:33,999 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-cross-project1-normal-user-proj1-serving.default-tenant.svc.cluster.local:8080/v2/models/my_model/infer'}


{'id': '78918c14-c285-4068-a201-a635dafa3b32',
 'model_name': 'my_model',
 'outputs': [2, 2, 2, 2, 2]}

#### 4.1.2 Importing & Exporting already deployed serving function

In [None]:
# exporting project1 deployed serving function
project1.get_function('proj1-serving').export('proj1-serving.yaml')

# importing project1 serving function from project2
project2.set_function(name='proj2-imported-serving', func='../proj1-serving.yaml')
print(project2.get_function('proj2-imported-serving').is_deployed())

> 2023-03-06 07:50:34,314 [info] function spec saved to path: proj1-serving.yaml
True


In [None]:
try:
    # Testing the imported already deployed function
    server = project2.get_function('proj2-imported-serving').to_mock_server()

    server.test(body={'inputs': 
                      pd.DataFrame(json.loads(project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())).values.tolist()[-5:]})
except Exception as e:
    print(e)

command file http://cross-project1-normal-user-proj1-serving-cross-project-zi3tekpa.default-tenant.app.vmdev92.lab.iguazeng.com/ not found


In [None]:
# Redeploying the already deployed function
project2.deploy_function('proj2-imported-serving')

> 2023-03-06 07:50:34,353 [info] Starting remote function deploy
2023-03-06 07:50:34  (info) Deploying function
2023-03-06 07:50:34  (info) Building
2023-03-06 07:50:34  (info) Staging files and preparing base images
2023-03-06 07:50:34  (info) Building processor image
2023-03-06 07:53:21  (info) Build complete
2023-03-06 07:54:21  (info) Function deploy complete
> 2023-03-06 07:54:26,194 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-cross-project2-normal-user-proj2-imported-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['cross-project2-normal-user-proj2-imported-serving-cros-51ebminm.default-tenant.app.vmdev92.lab.iguazeng.com/']}


DeployStatus(state=ready, outputs={'endpoint': 'http://cross-project2-normal-user-proj2-imported-serving-cros-51ebminm.default-tenant.app.vmdev92.lab.iguazeng.com/', 'name': 'cross-project2-normal-user-proj2-imported-serving'})

##### Testing the redeployed imported-deployed-function

In [None]:
time.sleep(5) 
project2.get_function('proj2-imported-serving').invoke(
    path='/v2/models/my_model/infer', 
    body={'inputs': pd.DataFrame(json.loads(project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())).values.tolist()[-5:]}
)

> 2023-03-06 07:54:31,307 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-cross-project2-normal-user-proj2-imported-serving.default-tenant.svc.cluster.local:8080/v2/models/my_model/infer'}


{'id': '2ab0285e-2ac6-4616-8127-9098447c960d',
 'model_name': 'my_model',
 'outputs': [2, 2, 2, 2, 2]}

#### 4.1.3 Importing & Exporting undeployed serving function

In [None]:
# Setting project2 serving function
project2.set_function(mlrun.new_function(name='proj2-serving', kind='serving',image='mlrun/ml-models', command = []))

# adding project1 model to project2 serving function
project2.get_function('proj2-serving').add_model(key = 'my_model',
                                                 class_name = "mlrun.frameworks.xgboost.XGBoostModelServer",
                                                 model_path = project1.get_artifact('log-transactions-log-transactions_bst_model').target_path)

<mlrun.serving.states.TaskStep at 0x7f3cef7cc1c0>

In [None]:
# exporting project2 not deployed serving function
project2.get_function('proj2-serving').export('proj2-serving.yaml')

# Importing project2 serving function from project1
project1.set_function(name='proj1-imported-serving', func='../proj2-serving.yaml')
print(project1.get_function('proj1-imported-serving').is_deployed())

> 2023-03-06 07:54:31,593 [info] function spec saved to path: proj2-serving.yaml
True


In [None]:
# Testing the imported undeployed function
server = project1.get_function('proj1-imported-serving').to_mock_server()

server.test(body={'inputs': 
                  pd.DataFrame(json.loads(project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())).values.tolist()[-5:]})

> 2023-03-06 07:54:32,886 [info] model my_model was loaded
> 2023-03-06 07:54:32,887 [info] Loaded ['my_model']


{'id': 'c2447f09f9034d458c6e13c2d973283a',
 'model_name': 'my_model',
 'outputs': [2, 2, 2, 2, 2]}

In [None]:
# Deploying the undeployed imported function
project1.deploy_function('proj1-imported-serving')

> 2023-03-06 07:54:32,956 [info] Starting remote function deploy
2023-03-06 07:54:33  (info) Deploying function
2023-03-06 07:54:33  (info) Building
2023-03-06 07:54:33  (info) Staging files and preparing base images
2023-03-06 07:54:33  (info) Building processor image
2023-03-06 07:57:23  (info) Build complete
2023-03-06 07:58:21  (info) Function deploy complete
> 2023-03-06 07:58:27,531 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-cross-project1-normal-user-proj1-imported-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['cross-project1-normal-user-proj1-imported-serving-cros-heffmh7o.default-tenant.app.vmdev92.lab.iguazeng.com/']}


DeployStatus(state=ready, outputs={'endpoint': 'http://cross-project1-normal-user-proj1-imported-serving-cros-heffmh7o.default-tenant.app.vmdev92.lab.iguazeng.com/', 'name': 'cross-project1-normal-user-proj1-imported-serving'})

In [None]:
time.sleep(5)

# Testing the imported then deployed function
project1.get_function('proj1-imported-serving').invoke(
    path='/v2/models/my_model/infer', 
    body={'inputs': pd.DataFrame(json.loads(project2.get_artifact('log-transactions-log-transactions_encoded_iris-v3').to_dataitem().get())).values.tolist()[-5:]}
)

> 2023-03-06 07:58:32,647 [info] invoking function: {'method': 'POST', 'path': 'http://nuclio-cross-project1-normal-user-proj1-imported-serving.default-tenant.svc.cluster.local:8080/v2/models/my_model/infer'}


{'id': 'df730d5e-2685-4e15-964b-bf6f3762378c',
 'model_name': 'my_model',
 'outputs': [2, 2, 2, 2, 2]}

### 4.2 mlrun function

In [None]:
# exporting project2 log_transaction mlrun function
project2.get_function('log-transactions').export('proj2-mlrun-func.yaml')

# Importing project2 log_transaction mlrun function from project1
project1.set_function(name='proj1-imported-mlrun-func', func='../proj2-mlrun-func.yaml')
project1.run_function('proj1-imported-mlrun-func')

> 2023-03-06 07:58:33,009 [info] function spec saved to path: proj2-mlrun-func.yaml
> 2023-03-06 07:58:33,057 [info] starting run proj1-imported-mlrun-func-log-transactions uid=b04c7861d4764c7c857e373ec1e8d0a5 DB=http://mlrun-api:8080
> 2023-03-06 07:58:33,215 [info] Job is running in the background, pod: proj1-imported-mlrun-func-log-transactions-hwqq8
> 2023-03-06 07:58:39,562 [info] dataframe shape : (150, 4)
> 2023-03-06 07:58:39,649 [info] logged model : <class 'xgboost.sklearn.XGBClassifier'>
> 2023-03-06 07:58:39,795 [info] logged dataset (150, 5)
> 2023-03-06 07:58:39,909 [info] To track results use the CLI: {'info_cmd': 'mlrun get run b04c7861d4764c7c857e373ec1e8d0a5 -p cross-project1-normal-user', 'logs_cmd': 'mlrun logs b04c7861d4764c7c857e373ec1e8d0a5 -p cross-project1-normal-user'}
> 2023-03-06 07:58:39,909 [info] Or click for UI: {'ui_url': 'https://dashboard.default-tenant.app.vmdev92.lab.iguazeng.com/mlprojects/cross-project1-normal-user/jobs/monitor/b04c7861d4764c7c857

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
cross-project1-normal-user,...c1e8d0a5,0,Mar 06 07:58:37,completed,proj1-imported-mlrun-func-log-transactions,v3io_user=normal-userkind=jobowner=normal-usermlrun/client_version=1.3.0-rc33mlrun/client_python_version=3.9.16host=proj1-imported-mlrun-func-log-transactions-hwqq8,,,,encoded_iris-v3bst_modelmy_projectiris_dataset-v3





> 2023-03-06 07:58:42,691 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f3cef806940>

### 4.3 nuclio function

#### 4.3.1 Importing & Exporting deployed remote function

In [None]:
%%writefile script.py
import mlrun
import time

def handler(context: mlrun.MLClientCtx, event):
    context.logger.info('Going to sleep zZz...')
    time.sleep(5)
    return 'Waking up !'

Writing script.py


In [None]:
# Setting remote function
project1.set_function(func = mlrun.code_to_function(name='proj1-remote-func', kind='remote',image='mlrun/mlrun', filename='script.py', handler='handler'))

# Deploying
project1.deploy_function('proj1-remote-func')
# Exporting project1 undeployed remote function
project1.get_function('proj1-remote-func').export('proj1-remote-func.yaml')

# Importing project2 remote function from project1
project2.set_function(name='proj2-imported-remote-func', func='../proj1-remote-func.yaml')

> 2023-03-06 07:58:42,796 [info] Starting remote function deploy
2023-03-06 07:58:43  (info) Deploying function
2023-03-06 07:58:43  (info) Building
2023-03-06 07:58:43  (info) Staging files and preparing base images
2023-03-06 07:58:43  (info) Building processor image
2023-03-06 07:59:48  (info) Build complete
2023-03-06 08:00:22  (info) Function deploy complete
> 2023-03-06 08:00:23,702 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-cross-project1-normal-user-proj1-remote-func.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['cross-project1-normal-user-proj1-remote-func-cross-pro-rjophmbj.default-tenant.app.vmdev92.lab.iguazeng.com/']}
> 2023-03-06 08:00:23,759 [info] function spec saved to path: proj1-remote-func.yaml


<mlrun.runtimes.function.RemoteRuntime at 0x7f3cef806ac0>

In [None]:
project2.deploy_function('proj2-imported-remote-func')

time.sleep(5)
project2.get_function('proj2-imported-remote-func').invoke('')

> 2023-03-06 08:00:23,783 [info] Starting remote function deploy
2023-03-06 08:00:23  (info) Deploying function
2023-03-06 08:00:23  (info) Building
2023-03-06 08:00:23  (info) Staging files and preparing base images
2023-03-06 08:00:23  (info) Building processor image
2023-03-06 08:01:29  (info) Build complete
2023-03-06 08:02:21  (info) Function deploy complete
> 2023-03-06 08:02:24,682 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-cross-project2-normal-user-proj2-imported-remote-func.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['cross-project2-normal-user-proj2-imported-remote-func-d9z1eq77.default-tenant.app.vmdev92.lab.iguazeng.com/']}
> 2023-03-06 08:02:29,735 [info] invoking function: {'method': 'GET', 'path': 'http://nuclio-cross-project2-normal-user-proj2-imported-remote-func.default-tenant.svc.cluster.local:8080'}


b'Waking up !'

## 5. Importing/Exporting project

In [None]:
# Initialize the MLRun project object
new_project = mlrun.get_or_create_project('testing-exported-proj', context=os.path.join(os.getcwd(), 'new-project'))

# Required credentials :
# AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, GOOGLE_APPLICATION_CREDENTIALS, S3_BUCKET
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID', None)
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY', None)
GOOGLE_APPLICATION_CREDENTIALS = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', None)
assert AWS_ACCESS_KEY_ID != None and AWS_SECRET_ACCESS_KEY != None and GOOGLE_APPLICATION_CREDENTIALS != None

secrets = {'AWS_ACCESS_KEY_ID': AWS_ACCESS_KEY_ID,
           'AWS_SECRET_ACCESS_KEY':AWS_SECRET_ACCESS_KEY,
           'GOOGLE_APPLICATION_CREDENTIALS': GOOGLE_APPLICATION_CREDENTIALS}

new_project.set_secrets(secrets=secrets, provider='kubernetes')

S3_BUCKET = os.environ.get('S3_BUCKET', 'testbucket-igz')

new_project.artifact_path = os.path.join('s3://', S3_BUCKET + '/cross_project/')

> 2023-03-06 08:02:50,447 [info] Created and saved project testing-exported-proj: {'from_template': None, 'overwrite': False, 'context': '/User/test/test-notebooks/project_transfer/new-project', 'save': True}
> 2023-03-06 08:02:50,448 [info] created project testing-exported-proj and saved in MLRun DB


In [None]:
# Setting artifacts
new_project.set_artifact('encoded_iris', artifact=os.path.join(os.getcwd(),'proj2_encoded_iris.yaml'))
new_project.set_artifact('model', artifact=os.path.join(os.getcwd(),'proj1_model.yaml'))
new_project.set_artifact('dataset', artifact=os.path.join(os.getcwd(),'proj2_dataset.yaml'))
new_project.set_artifact('dir', artifact=os.path.join(os.getcwd(),'proj1_dir.yaml'))

# Setting functions
new_project.set_function(name='serving', func='../proj1-serving.yaml')
new_project.set_function(name='mlrun-func', func='../proj2-mlrun-func.yaml')
new_project.set_function(name='remote-func', func='../proj1-remote-func.yaml')


<mlrun.runtimes.function.RemoteRuntime at 0x7f3cef7e9100>

In [None]:
# exporting the project

# S3 artifact path (e.g. s3://my-bucket/new_project.zip)
new_project.export(filepath=os.path.join(new_project.artifact_path, 'new_project.zip'))

# GCS artifact path (e.g. gs://my-bucket/new_project.zip)
new_project.export(filepath=os.path.join('gs' + new_project.artifact_path[2:], 'new_project.zip'))

# V3IO local path
new_project.export(filepath='/v3io/bigdata/new_project.zip')

In [None]:
# Importing the projects
gs_project = mlrun.load_project(name='testing-gs-project',
                                url=os.path.join(new_project.artifact_path, 'new_project.zip'),
                                context = os.path.join(os.getcwd(), 'gs-project'))

s3_project = mlrun.load_project(name='testing-s3-project',
                                url=os.path.join('s3' + new_project.artifact_path[2:], 'new_project.zip'),
                                context = os.path.join(os.getcwd(), 's3-project'))

v3io_project = mlrun.load_project(name='testing-v3io-project',
                                  url='/v3io/bigdata/new_project.zip',
                                  context=os.path.join(os.getcwd(), 'v3io-project'))

In [None]:
# Testing the imported artifacts
# Artifact
gs_encoded_dataset = pd.DataFrame(json.loads(gs_project.get_artifact('encoded_iris').to_dataitem().get()))
print('gs project artifact\n', gs_encoded_dataset.head(), '\n\n')

# Dataset
s3_dataset = s3_project.get_artifact('dataset').to_dataitem().as_df()
print('s3 project artifact\n',s3_dataset.head())

# Model
v3io_model = pickle.loads(v3io_project.get_artifact('model')._get_file_body())
v3io_model.predict(gs_encoded_dataset[:5])

# Directory
gs_dir = gs_project.get_artifact('dir').to_dataitem().listdir()

# Testing imported functions
# Serving function
gs_project.get_function('serving').deploy()
time.sleep(5)
gs_project.get_function('serving').invoke(
    path='/v2/models/my_model/infer', 
    body={'inputs': gs_encoded_dataset.values[-5:].tolist()}
)

# mlrun function
s3_project.run_function('mlrun-func',local=True)

## nuclio function
v3io_project.deploy_function('remote-func')

time.sleep(5)
v3io_project.get_function('remote-func').invoke('')

gs project artifact
      0    1    2    3
0  5.1  3.5  1.4  0.2
1  4.9  3.0  1.4  0.2
2  4.7  3.2  1.3  0.2
3  4.6  3.1  1.5  0.2
4  5.0  3.6  1.4  0.2 


s3 project artifact
    Unnamed: 0    0    1    2    3
0           0  5.1  3.5  1.4  0.2
1           1  4.9  3.0  1.4  0.2
2           2  4.7  3.2  1.3  0.2
3           3  4.6  3.1  1.5  0.2
4           4  5.0  3.6  1.4  0.2
> 2023-03-06 08:03:44,068 [info] Starting remote function deploy
2023-03-06 08:03:44  (info) Deploying function
2023-03-06 08:03:44  (info) Building
2023-03-06 08:03:44  (info) Staging files and preparing base images
2023-03-06 08:03:44  (info) Building processor image
2023-03-06 08:06:29  (info) Build complete
2023-03-06 08:07:21  (info) Function deploy complete
> 2023-03-06 08:07:26,303 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-testing-gs-project-serving.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['testing-gs-project-serving-testing-gs-project.defaul

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
testing-s3-project,...f08c6049,0,Mar 06 08:07:31,completed,mlrun-func-log-transactions,v3io_user=normal-userkind=owner=normal-userhost=jupyter-v5afxg4hcf-at3sd-77c89bbcd5-svv9f,,,,encoded_iris-s3bst_modelmy_projectiris_dataset-s3





> 2023-03-06 08:07:44,442 [info] run executed, status=completed
> 2023-03-06 08:07:44,511 [info] Starting remote function deploy
2023-03-06 08:07:44  (info) Deploying function
2023-03-06 08:07:44  (info) Building
2023-03-06 08:07:44  (info) Staging files and preparing base images
2023-03-06 08:07:44  (info) Building processor image
2023-03-06 08:08:50  (info) Build complete
2023-03-06 08:09:22  (info) Function deploy complete
> 2023-03-06 08:09:25,781 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-testing-v3io-project-remote-func.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['testing-v3io-project-remote-func-testing-v3io-project.default-tenant.app.vmdev92.lab.iguazeng.com/']}
> 2023-03-06 08:09:30,838 [info] invoking function: {'method': 'GET', 'path': 'http://nuclio-testing-v3io-project-remote-func.default-tenant.svc.cluster.local:8080'}


b'Waking up !'

## Cleanup

In [None]:
projects = [project1,project2,gs_project,s3_project,v3io_project, new_project]
for project in projects:
    mlrun.get_run_db().delete_project(name=project.name, deletion_strategy='cascade')

import shutil
for f in os.listdir():
    if (not f.endswith('ipynb')) and f != '.test':
        if os.path.isfile(f):
            os.remove(f)
        elif os.path.isdir(f):
            shutil.rmtree(f)
        else:
            raise "A file that is not a notebook wasn't deleted"

In [None]:
import boto3
from urllib.parse import urlparse

s3 = boto3.resource('s3')
bucket = s3.Bucket(urlparse(project.artifact_path).netloc)
bucket.objects.filter(Prefix=urlparse(project.artifact_path).path[1:]).delete()