# Imports

In [1]:
import sys
sys.path.insert(0, "..")

In [2]:
import docx

from abc import ABC, abstractmethod
import importlib
import os.path
import pandas as pd
import numpy as np

from tqdm.notebook import tqdm

from scoring import doctools
import scoring.documentation.generators as generators_module


from inspect import getmembers, isclass, isabstract

from scoring.documentation import orchestrator

# Data

In [3]:
# data should contain the calculated score
data = pd.read_csv("demo_data/ExampleDocumetationData.csv")
data["SCORE"] = np.random.random(size=data.shape[0])
data.head()
gm_data = pd.read_csv("demo_data/gm_data_scored.csv",low_memory=False)
gm_data['OLD_GM_SCORE'] = np.random.random(size=gm_data.shape[0])
gm_data['OLD_GM_SCORE'] = gm_data['OLD_GM_SCORE'] + (gm_data['FPD30']-gm_data['OLD_GM_SCORE'])*0.1
gm_data['FPD10'] = gm_data['FPD30']

In case that Old Score is defined as a positive outcome: 

In [4]:
data['OLD_SCORE'] = 1 - data['OLD_SCORE']

In [5]:
data.columns

Index(['ID', 'TIME', 'Numerical_1', 'Numerical_2', 'Numerical_3',
       'Numerical_4', 'Numerical_5', 'Numerical_6', 'Numerical_7',
       'Categorical_1', 'Categorical_2', 'Categorical_3', 'Categorical_4',
       'Categorical_5', 'DateVariable_1', 'DateVariable_2', 'OLD_SCORE',
       'WEIGHT', 'REJECTED', 'DEF', 'FPD', 'BASE', 'DAY', 'MONTH', 'data_type',
       'SCORE'],
      dtype='object')

## Reload generators

In [6]:
importlib.reload(generators_module);
importlib.reload(orchestrator);

# Define metadata

In [7]:
### REGRESSION ###

metadata = {
    "scorecard_name": "POS existing",
    "author_name": "Triss Merigold",
    "country": "Indonesia",
    "PSW_version": "0.8.0",
    "area": "Underwriting",
    "segment": "POS, Existing clients",
    "columns": {
        "target": "DEF",
        "base": "BASE",
        "short_target": "FPD",
        "short_base": "BASE",
        "time": "MONTH",
        "row_id": "ID",
        "score": "SCORE", 
        "old_score": "OLD_SCORE",
        "data_type": "data_type",
        "weight": "WEIGHT"
    },
    "samples": {
        "Train": (data["data_type"] == "train") & (data["BASE"] == 1),
        "Validation": (data["data_type"] == "valid") & (data["BASE"] == 1),
        "Test": (data["data_type"] == "test") & (data["BASE"] == 1),
        "Out of Time": (data["data_type"] == "oot") & (data["BASE"] == 1),
        "Historical Out of Time": (data["data_type"] == "hoot") & (data["BASE"] == 1),
        "Observable": (data["BASE"] == 1),
        "All": pd.Series(True, index=data.index),
        "Old comparison": (data["data_type"].isin(["test", "oot", "hoot"])) & (pd.notnull(data["OLD_SCORE"])) & (data["BASE"] == 1),
        "Old comparison with rejected": (data["data_type"].isin(["test", "oot", "hoot"])) & (pd.notnull(data["OLD_SCORE"])),
        },
    "predictors": ['Numerical_1', 'Numerical_2', 'Numerical_4', 'Categorical_1'],
    "covariates": ['Numerical_1', 'Numerical_2', 'Numerical_4', 'Categorical_1', 'Numerical_7'],
    "grouping_path": "myGrouping.json", #REGRESSION
    "model_path": "myModelSW1.model", #REGRESSION
    "use_weight": True,
}

In [8]:
o = orchestrator.Orchestrator(data=data, metadata=metadata, file="demo_orche")
o.load_structure("../scoring/documentation/regressionscorecard.structure")

In [9]:
### LGBM ###

metadata = {
    "scorecard_name": "POS existing",
    "author_name": "Triss Merigold",
    "country": "Indonesia",
    "PSW_version": "0.8.0",
    "area": "Underwriting",
    "segment": "POS, Existing clients",
    "columns": {
        "target": "DEF",
        "base": "BASE",
        "short_target": "FPD",
        "short_base": "BASE",
        "time": "MONTH",
        "row_id": "ID",
        "score": "SCORE", 
        "old_score": "OLD_SCORE",
        "data_type": "data_type",
        "weight": "WEIGHT"
    },
    "samples": {
        "Train": (data["data_type"] == "train") & (data["BASE"] == 1),
        "Validation": (data["data_type"] == "valid") & (data["BASE"] == 1),
        "Test": (data["data_type"] == "test") & (data["BASE"] == 1),
        "Out of Time": (data["data_type"] == "oot") & (data["BASE"] == 1),
        "Historical Out of Time": (data["data_type"] == "hoot") & (data["BASE"] == 1),
        "Observable": (data["BASE"] == 1),
        "All": pd.Series(True, index=data.index),
        "Old comparison": (data["data_type"].isin(["test", "oot", "hoot"])) & (pd.notnull(data["OLD_SCORE"])) & (data["BASE"] == 1),
        "Old comparison with rejected": (data["data_type"].isin(["test", "oot", "hoot"])) & (pd.notnull(data["OLD_SCORE"])),
        },
    "predictors": ['Numerical_1', 'Numerical_2', 'Numerical_4', 'Categorical_1'],
    "covariates": ['Numerical_1', 'Numerical_2', 'Numerical_4', 'Categorical_1', 'Numerical_7'],
    "model_path": "myModelLGBM.model", #LGBM
    "use_weight": True,
}

In [10]:
o = orchestrator.Orchestrator(data=data, metadata=metadata, file="demo_orche")
o.load_structure("../scoring/documentation/boostingscorecard.structure")

In [11]:
### GM ###

metadata = {
    "scorecard_name": "POS existing",
    "author_name": "Triss Merigold",
    "country": "Indonesia",
    "PSW_version": "0.8.0",
    "area": "Underwriting",
    "segment": "GM POS, Existing clients",
    "columns": {
        "target": "FPD30",
        "base": "APPROVED",
        "short_target": "FPD10",
        "short_base": "APPROVED",
        "time": "MONTH",
        "row_id": "ID",
        "score": "GM_SCORE", 
        "old_score": "OLD_GM_SCORE",
        "data_type": "data_type",
        "weight": "WEIGHT",
    },
    "samples": {
        "Train": (gm_data["data_type"] == "train") & (gm_data["APPROVED"] == 1),
        "Validation": (gm_data["data_type"] == "valid") & (gm_data["APPROVED"] == 1),
        "Test": (gm_data["data_type"] == "test") & (gm_data["APPROVED"] == 1),
        "Out of Time": (gm_data["data_type"] == "oot") & (gm_data["APPROVED"] == 1),
        "Historical Out of Time": (gm_data["data_type"] == "hoot") & (gm_data["APPROVED"] == 1),
        "Observable": (gm_data["APPROVED"] == 1),
        "All": pd.Series(True, index=gm_data.index),
        "Old comparison": (gm_data["data_type"].isin(["valid"])) & (pd.notnull(gm_data["OLD_GM_SCORE"])) & (gm_data["APPROVED"] == 1),
        "Old comparison with rejected": (gm_data["data_type"].isin(["valid"])) & (pd.notnull(gm_data["OLD_GM_SCORE"])),
        },
    "predictors": ['INTERNAL', 'TELCO_A', 'TELCO_B', 'BUREAU_X_WOE', 'BUREAU_Y', 'UTILITY_WOE', 'DEVICE',],
    "model_path": "myModelGM.model",
    "use_weight": True,
}

In [12]:
o = orchestrator.Orchestrator(data=gm_data, metadata=metadata, file="demo_orche_gm")
o.load_structure("../scoring/documentation/gmscorecard.structure")

# Disable output to notebook and export

In [13]:
o.export()

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
