# Slackformer: Basic transformer on WBSlack data
Credit to Stacey for the model architecture and training logic <3

In [27]:
import weave
import slack_trainer

In [28]:
SLACK_DATA_DIR = "/Users/timothysweeney/Documents/jan_1_2022_slack_dump"
raw_data = slack_trainer.load_data(SLACK_DATA_DIR)

In [29]:
config = {
    "min_msg_count"  : 100,
    "train_frac"     : 0.8,
    "use_baseline"   : True,
    "vocab_size"     : 50000,
    "sequence_length": 200,
    "dense_dim"      : 40,
    "embed_dim"      : 64,
    "num_heads"      : 6,
    "ff_dim"         : 64,
    "optimizer"      : "adam",
    "loss"           : "sparse_categorical_crossentropy",
    "metrics"        : ["accuracy"],
    "batch_size"     : 64,
    "epochs"         : 1
}

In [30]:
# Process the data
data = slack_trainer.process_data(users         = raw_data["users"], 
                                  messages      = raw_data["messages"], 
                                  min_msg_count = config["min_msg_count"], 
                                  train_frac    = config["train_frac"])

In [31]:
# Build & Compile the Model
if config["use_baseline"]:
    model = slack_trainer.make_baseline_model(data, 
                                               vocab_size      = config["vocab_size"], 
                                               sequence_length = config["sequence_length"])
else:
    model = slack_trainer.make_transformer_model(data, 
                                                   vocab_size      = config["vocab_size"], 
                                                   sequence_length = config["sequence_length"],
                                                   dense_dim       = config["dense_dim"],
                                                   embed_dim       = config["embed_dim"],
                                                   num_heads       = config["num_heads"],
                                                   ff_dim          = config["ff_dim"])
model.compile(optimizer = config["optimizer"], 
                       loss      = config["loss"], 
                       metrics   = config["metrics"])

In [32]:
# Train the model
results = slack_trainer.fit_model(model, data, 
                                  batch_size = config["batch_size"], 
                                  epochs     = config["epochs"])



In [33]:
# Package model for inference
packaged_model = slack_trainer.package_model(model, data)

In [34]:
# Save or Publish Model
# saved_model = weave.save(packaged_model)
saved_model = weave.publish(packaged_model)

In [35]:
pub = weave.publish({
    "model": model,
    "packaged_model": saved_model,
    "classes": data["users"].sort_values("model_id")["real_name"].tolist()
}, "FastModel")

In [36]:
# Demonstrate calling
from weave.ecosystem import keras as weave_keras
weave_keras.call_string(saved_model, "i love weave")

