In [1]:
import pandas as pd
import river

In [2]:
dir(river)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 'annotations']

In [3]:
from river.linear_model import LogisticRegression
from river.naive_bayes import MultinomialNB
from river.feature_extraction import BagOfWords,TFIDF

In [4]:
data = [
    ("my unit test failed", "software"),
    ("tried the program, but it was buggy", "software"),
    ("i need a new power supply", "hardware"), 
    ("the drive has a 2TB capacity", "hardware"),
    ("unit-tests","software"),
    ("program", "software"),
    ("power supply", "hardware"),
    ("drive", "hardware"),
    ("it needs more memory", "hardware"),
    ("check the API", "software"),
    ("design the API", "software"),
    ("they need more CPU", "hardware"),
    ("code", "software"),
    ("i found some bugs in the code", "software"),
    ("i swapped the memory", "hardware"),
    ("i tested the code", "software")
]


In [5]:
test_data = [
    ('he writes code daily', 'software'),
    ('the disk is faulty', 'hardware'),
    ("refactor the code", "software"),
    ('no empty space on the drive', 'hardware')
]

In [6]:
from river.compose import Pipeline

In [7]:
pipe_nb = Pipeline(('vectorizer',BagOfWords(lowercase=True)),('nb',MultinomialNB()))

In [8]:
pipe_nb

In [9]:
pipe_nb.steps

OrderedDict([('vectorizer',
              BagOfWords (
                on=None
                strip_accents=True
                lowercase=True
                preprocessor=None
                stop_words=None
                tokenizer_pattern="(?u)\b\w[\w\-]+\b"
                tokenizer=None
                ngram_range=(1, 1)
              )),
             ('nb',
              MultinomialNB (
                alpha=1.
              ))])

In [10]:
for text, label in data:
    pipe_nb.learn_one(text, label)

In [11]:
pipe_nb.predict_one("I made an API")

'software'

In [12]:
pipe_nb.predict_one("The Hard drive is damaged")

'hardware'

In [13]:
pipe_nb.predict_one("The CPU is damaged")

'hardware'

In [14]:
pipe_nb.predict_one("I made motherboard") #Error

'software'

In [15]:
pipe_nb.predict_proba_one("I made motherboard") #Error

{'software': 0.5136052113727353, 'hardware': 0.48639478862726443}

In [16]:
pipe_nb.predict_proba_one("The CPU is damaged")

{'software': 0.46444862782340723, 'hardware': 0.5355513721765931}

In [17]:
from river.metrics import ClassificationReport
classification_report = ClassificationReport()
for x,y in test_data:
    res = pipe_nb.predict_one(x)
    classification_report.update(y, res)


In [18]:
print(classification_report)

           Precision   Recall    F1       Support  
                                                   
hardware     100.00%    50.00%   66.67%         2  
software      66.67%   100.00%   80.00%         2  
                                                   
   Macro      83.33%    75.00%   73.33%            
   Micro      75.00%    75.00%   75.00%            
Weighted      83.33%    75.00%   73.33%            

                  75.00% accuracy                  


**Update the Model**

In [21]:
metric = river.metrics.Accuracy()
for text,label in test_data:
    y_pred_before = pipe_nb.predict_one(text)
    metric.update(label,y_pred_before)
    pipe_nb.learn_one(text, label)

In [22]:
metric

Accuracy: 75.00%

In [23]:
metric2 = river.metrics.Accuracy()
for text,label in test_data:
    y_pred_before = pipe_nb.predict_one(text)
    metric2.update(label,y_pred_before)
    pipe_nb.learn_one(text, label)

In [25]:
metric2 #updated accuracy

Accuracy: 100.00%