# [River](https://github.com/online-ml/river)

## Vaex

In [2]:
from numbers import Number

import numpy as np
import vaex
from river import compose
from river.linear_model import LogisticRegression
from river.metrics import Accuracy
from river.preprocessing import StandardScaler, OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from vaex.ml.datasets import load_titanic

from goldilox import Pipeline


df = load_titanic()
features = df.get_column_names()
target = "survived"
features.remove(target)

# River pipeline
num = compose.SelectType(Number) | StandardScaler()
cat = compose.SelectType(str) | OneHotEncoder()
model = (num + cat) | LogisticRegression()

metric = Accuracy()
for x in df.to_records():
    y = bool(x.pop(target))
    y_pred = model.predict_one(x)
    metric = metric.update(y, y_pred)
    model = model.learn_one(x, y)


@vaex.register_function(on_expression=False)
def predict(*columns):
    batch = np.array(columns).T
    return np.array(
        [model.predict_one({feature: value for feature, value in zip(values, features)}) for values in batch])

df.add_function('predict', predict)
df['predictions'] = df.func.predict(*tuple([df[col] for col in features]))
pipeline = Pipeline.from_vaex(df)
assert pipeline.validate()
pipeline.inference(pipeline.raw)

#,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home_dest,predictions
0,1,True,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.338,B5,S,2,,"St Louis, MO",False


## Skleran 

In [3]:
from numbers import Number

import numpy as np
import vaex
from river import compose
from river.linear_model import LogisticRegression
from river.metrics import Accuracy
from river.preprocessing import StandardScaler, OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from vaex.ml.datasets import load_titanic
from goldilox import Pipeline


class RiverLogisticRegression(BaseEstimator, TransformerMixin):

    def __init__(self, target, output_column='predictions'):
        num = compose.SelectType(Number) | StandardScaler()
        cat = compose.SelectType(str) | OneHotEncoder()
        model = (num + cat) | LogisticRegression()

        self.model = model
        self.target = target
        self.metric = Accuracy()
        self.output_column = output_column

    def iterate(self, X, y):
        if y is not None:
            X = X.drop(self.target, errors='ignore')
            return zip(X, y)
        for x in df.to_dict(orient='records'):
            y = x.pop(self.target, None)
            yield x, y

    def fit(self, X, y=None, **kwargs):
        for x, y in self.iterate(X, y):
            y_pred = self.model.predict_one(x)
            self.metric.update(y, y_pred)
            self.model.learn_one(x, y)
        return self

    def predict(self, X):
        return np.array([self.model.predict_one(x) for x in X.to_dict(orient='records')])

    def transform(self, X):
        X = X.drop(self.target, errors='ignore')
        X[self.output_column] = self.predict(X)
        return X

    def fit_transform(self, X, y=None, **fit_params):
        self.fit(X, y)
        return self.transform(X)

df = load_titanic().to_pandas_df() 
pipeline = Pipeline.from_sklearn(RiverLogisticRegression('survived')).fit(df)

pipeline.inference(pipeline.raw)

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home_dest,predictions
0,1,True,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,,"St Louis, MO",False
