In [1]:
import os.path
import pandas as pd
import numpy as np

In [2]:
from whylogs.v2 import get_or_create_session
from whylogs.v2 import MetricPlugin

In [3]:
session = get_or_create_session()

WARN: Missing config


In [4]:
print("Current working directory:", os.getcwd())

Current working directory: /home/jamie/projects/whylogs/examples


In [5]:
data_file = "data/custom_metric_sample.txt"
data_file2 = "data/custom_metric_sample2.txt"

In [6]:
data = pd.read_csv(os.path.join(data_file), delimiter = '\n')
data

Unnamed: 0,Text
0,Much Ado About Nothing
1,ACT I
2,SCENE I. Before LEONATO'S house.
3,"Enter LEONATO, HERO, and BEATRICE, with a Mess..."
4,LEONATO
...,...
3678,Think not on him till to-morrow:
3679,I'll devise thee brave punishments for him.
3680,"Strike up, pipers."
3681,Dance


In [7]:
data2 = pd.read_csv(os.path.join(data_file2), delimiter = '\n')
data2

Unnamed: 0,Text
0,Hamlet
1,ACT I
2,SCENE I. Elsinore. A platform before the castle.
3,FRANCISCO at his post. Enter to him BERNARDO
4,BERNARDO
...,...
5376,Speak loudly for him.
5377,Take up the bodies: such a sight as this
5378,"Becomes the field, but here shows much amiss."
5379,"Go, bid the soldiers shoot."


In [8]:
target_column_name = data.columns[0]
target_column_name

'Text'

In [9]:
target_column_name2 = data2.columns[0]
target_column_name2

'Text'

In [10]:
# Ok here is a sample custom metric that 
from dataclasses import dataclass
from json import loads

@dataclass
class TargetWordCountMetric(MetricPlugin):
    target_string: str = 'ACT'
    name: str = 'TargetWordCounter'
    target_column_name: str = ''
    word_counts: int = 0

    def track(self, data):
        if self.target_string in data:
            self.word_counts = self.word_counts + data.count(self.target_string)

    def merge(self, other: 'TargetWordCountMetric'):
        if other is None or other is self:
           return self
        # maybe you need to check if these are mergeable or not
        if self.target_string != other.target_string:
            raise ValueError(f"Cannot merge word counts of different target strings: '{self.target_string}' != '{other.target_string}'")
        # Custom metrics can define how they merge, here we add counts of the matching target strings.
        self.word_counts = self.word_counts + other.word_counts


In [11]:
logger = session.logger(dataset_name='test.data')

In [12]:
profile = logger.profile
profile

<whylogs.v2.core.datasetprofile.DatasetProfile at 0x7f48d7642a30>

In [13]:
print(target_column_name)
count_acts = TargetWordCountMetric(name='ActCounter', target_string= 'ACT', target_column_name=target_column_name)
count_scenes = TargetWordCountMetric(name='SceneCounter', target_string= 'SCENE', target_column_name=target_column_name)


Text


In [14]:
logger.add_metric_plugin(count_acts)
logger.add_metric_plugin(count_scenes)

In [15]:
logger.plugins

{'ActCounter': TargetWordCountMetric(target_string='ACT', name='ActCounter', target_column_name='Text', word_counts=0),
 'SceneCounter': TargetWordCountMetric(target_string='SCENE', name='SceneCounter', target_column_name='Text', word_counts=0)}

In [16]:
logger.plugins


{'ActCounter': TargetWordCountMetric(target_string='ACT', name='ActCounter', target_column_name='Text', word_counts=0),
 'SceneCounter': TargetWordCountMetric(target_string='SCENE', name='SceneCounter', target_column_name='Text', word_counts=0)}

In [17]:
logger.log_dataframe(data)

In [18]:
logger.plugins


{'ActCounter': TargetWordCountMetric(target_string='ACT', name='ActCounter', target_column_name='Text', word_counts=5),
 'SceneCounter': TargetWordCountMetric(target_string='SCENE', name='SceneCounter', target_column_name='Text', word_counts=17)}

In [19]:
logger.log_dataframe(data2)

In [20]:
logger.plugins


{'ActCounter': TargetWordCountMetric(target_string='ACT', name='ActCounter', target_column_name='Text', word_counts=10),
 'SceneCounter': TargetWordCountMetric(target_string='SCENE', name='SceneCounter', target_column_name='Text', word_counts=37)}

In [21]:
profile.session_id

'f04e2ac7-114f-4b0f-898d-f72ceac1e9e9'

In [22]:
logger.close()

<whylogs.v2.core.datasetprofile.DatasetProfile at 0x7f48d7642a30>