In [1]:
import numpy as np
import pandas as pd
import mlflow
import joblib

In [2]:
def load_model(run_id: str = None):
    
    if run_id is None:
        model_path = 'out/model.sav'
    else:
        mlflow_client = mlflow.tracking.MlflowClient()
        model_path = mlflow_client.download_artifacts(run_id, 'model.sav')
        
    return joblib.load(model_path)

In [3]:
def predict(ds: np.ndarray, model) -> pd.DataFrame:
    x = ds[:, 1:]
    y_log1p = model.predict(x)
    y = np.expm1(y_log1p)
    df_out = pd.DataFrame({
        'row_id': ds[:, 0],
        'meter_reading': y,
    })
    df_out['row_id'] = df_out['row_id'].asint(int)
    return df_out.loc[:, ['row_id', 'meter_reading']]

In [None]:
m = load_model('7ab01f79f76142b6bd768f7605f9d590')
dataset_test = np.load('dataset_test.npy')

  from numpy.core.umath_tests import inner1d


In [5]:
predict(dataset_test, m).to_csv('submission.csv', index=False)

[Parallel(n_jobs=10)]: Done   3 out of  10 | elapsed:   31.5s remaining:  1.2min
[Parallel(n_jobs=10)]: Done  10 out of  10 | elapsed:   35.8s finished


In [2]:
submission = pd.read_csv('submission.csv')

In [3]:
submission.head()

Unnamed: 0,meter_reading,row_id
0,177.302521,0.0
1,56.075295,1.0
2,5.510656,2.0
3,105.224689,3.0
4,1138.246228,4.0


In [5]:
submission['row_id'] = submission['row_id'].astype(int)

In [None]:
submission.loc[:, ['row_id', 'meter_reading']].to_csv('submission.csv', index=False)