# [Vowpal wabbit](https://vowpalwabbit.org/)

## Vaex

In [1]:
from numbers import Number

import numpy as np
import vaex
from vowpalwabbit.DFtoVW import DFtoVW
from vowpalwabbit.pyvw import vw
from vaex.ml.datasets import load_titanic

from goldilox import Pipeline

df = load_titanic()
features = df.get_column_names()
target = 'survived'
features.remove(target)
df['survived'] = df['survived'].astype('int')



In [6]:
params = {'P':1, "enable_logging":True, 'link':'logistic'}
model = vw(**params)
for _,_,d in df.to_pandas_df(chunk_size=10):
    for ex in DFtoVW.from_colnames(df=d, y=target, x=features).convert_df():
        model.learn(ex)

model.finish()     
print(' '.join(model.get_log()[-8:]))

finished run
 number of examples = 1309
 weighted example sum = 1309.000000
 weighted label sum = 500.000000
 average loss = 0.095104
 best constant = 0.381971
 best constant's loss = 0.236069
 total feature number = 18672



In [7]:
# encode 
import tempfile
import base64
filename = tempfile.mktemp()
model.save(filename)
with open(filename, 'rb') as f:
    model_data = f.read()
encoding =  base64.encodebytes(model_data).decode('ascii')

In [8]:
# decode
model_data = base64.decodebytes(encoding.encode('ascii'))
openfilename = tempfile.mktemp()
with open(openfilename, 'wb') as f:
    f.write(model_data)

params['i']= openfilename
new_model = vw(**params)
examples = DFtoVW.from_colnames(df=df.head(1).to_pandas_df(), y=target, x=features).convert_df()
new_model.predict(examples[0])



finished run
number of examples = 1
weighted example sum = 1.000000
weighted label sum = 1.000000
average loss = 0.002353
best constant = 1.000000
best constant's loss = 0.000000
total feature number = 16


0.7214158177375793

In [11]:
new_model.predict(examples[0])

0.7214158177375793

In [None]:
import traitlets
import tempfile
import base64

class VWModell(traitlets.HasTraits):

        # This should work with the reduce's arguments
        def __init__(self, model=None, params=None):
            self.params = params or {}
            if model is not None:
                model = self.decode(model)
            self.model = model or vw(**self.params)

        # This is how you make a class pickalbe
        def __reduce__(self):
            return (self.__class__, (self.encode(),self.params))

        # how vw implemented serialization
        def decode(self, encoding):            
            if isinstance(encoding, bytes):                
                model_data = base64.decodebytes(encoding.encode('ascii'))
                openfilename = tempfile.mktemp()
                with open(openfilename, 'wb') as f:
                    f.write(model_data)
                params = self.params.copy()
                params['i']= openfilename
                return vw(**params)
            else:
                return encoding

        # how vw implemented serialization
        def encode(self):
            if isinstance(self.model, bytes):
                return self.model
            filename = tempfile.mktemp()
            model.save(filename)
            with open(filename, 'rb') as f:
                model_data = f.read()
            encoding =  base64.encodebytes(model_data).decode('ascii')
            return encoding

        # def predict(self, data, k=3):
            # examples = DFtoVW.from_colnames(df=df.head(1).to_pandas_df(), y=target, x=features).convert_df()
            
            # return np.array(neighbours)[:, 0]

model = VWModell(model=model, params=params)

@vaex.register_function(on_expression=False)
def predict(*columns):
    batch = np.array(columns).T
    return np.array(
        [model.predict({feature: value for feature, value in zip(values, features)}) for values in batch])

df.add_function('predict', predict)
df['predictions'] = df.func.predict(*tuple([df[col] for col in features]))
pipeline = Pipeline.from_vaex(df)
assert pipeline.validate()
pipeline.inference(pipeline.raw)