In [1]:
import os 
import dotenv
from pathlib import Path

In [2]:
project_dir = Path(os.path.abspath("")).resolve().parents[1]

In [3]:
project_dir

PosixPath('/Users/kinara/Scotland/Talks/AllDataId/ml-in-prod')

In [4]:
dotenv_path = os.path.join(project_dir, ".env")
dotenv.load_dotenv(dotenv_path)

True

In [5]:
# check the environments

In [6]:
os.environ.get("MLFLOW_TRACKING_URI")

'http://localhost:5000'

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mlflow

In [8]:
# define experiment name

In [9]:
experiment_name = "sentiment_clf_01"

In [10]:
# set new experiment to mlflow

In [11]:
mlflow.set_experiment(experiment_name)

2023/07/26 21:33:42 INFO mlflow.tracking.fluent: Experiment with name 'sentiment_clf_01' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow/1', creation_time=1690421622297, experiment_id='1', last_update_time=1690421622297, lifecycle_stage='active', name='sentiment_clf_01', tags={}>

In [12]:
# get existing experiments

In [10]:
experiment = mlflow.get_experiment_by_name(experiment_name)

In [11]:
print(f"experiment id: {experiment.experiment_id}")

experiment id: 1


In [12]:
# easiest part -> just enable autolog

In [13]:
mlflow.sklearn.autolog()

In [14]:
import re
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize
from nltk.stem.porter import PorterStemmer

In [15]:
english_stopwords = stopwords.words("english")

In [16]:
porter = PorterStemmer()

In [17]:
def tokenize(text):
    return text.split()

def tokenizer_porter(text):
    return [porter.stem(word) for word in text.split()]

In [18]:
def clean_text(text: str, lowercase: bool = False, remove_unwanted_chars: bool = False, remove_stopwords: bool = False):
    #print(lowercase, remove_unwanted_chars, remove_stopwords)
    # remove unwanted characters from text
    if remove_unwanted_chars:
        # remove new-line chars
        text = re.sub(r"\n+", " ", text)
        # remove extra spaces and zero width space
        text = re.sub(r"\s+", " ", text)
        text = re.sub(r"&#x200B;", "", text)
        
        # remove extra dots/elipsis
        text = re.sub(r"\.\.+", ".", text)
        # remove tag html
        text = re.sub(r'<[^>]*>', '', text)
        # remove parentheses
        text = re.sub(r'[()]', '', text)
        # remove url
        text = re.sub(r'''(?i)\b((?:https?|ftp://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))''', '', text)
        # remove local file path
        text = re.sub(r'file:\\+', '', text)
    
    # sent tokenize
    text = " ".join(sent_tokenize(text))
    
    # remove stopwords
    if remove_stopwords:
        tokens = text.split()
        tokens = [t for t in tokens if t.lower() not in english_stopwords]
        
        text = " ".join(tokens)
        
    # set text to lowercase
    if lowercase:
        text = text.lower()
    
    return text

In [19]:
# load sample dataset

In [20]:
dataset = pd.read_csv(os.path.join(project_dir, "data/processed/airline-sentiment-datasets.csv"))

In [21]:
dataset.head()

Unnamed: 0,tweet_id,airline_sentiment,negativereason,airline,text
0,570222239603273729,NEGATIVE,Can't Tell,American,do not merge with an airline that ai not read...
1,570213186139525120,NEGATIVE,Flight Attendant Complaints,American,thanks for making the worst fly experience ev...
2,569864610016321536,NEGATIVE,Customer Service Issue,American,Filed a PIR with an agent at Indianapolis air...
3,569850083140882432,NEGATIVE,Lost Luggage,American,I am called Paris office this morning again s...
4,569907965223763970,NEGATIVE,Customer Service Issue,American,thx for showing me that your Twitter apprecia...


In [22]:
dataset["text"] = dataset["text"].apply(clean_text, lowercase=False, remove_unwanted_chars=True, remove_stopwords=True)

In [23]:
dataset["airline"] = dataset["airline"].astype("category").str.lower()

In [24]:
# class labels

In [25]:
dataset["label"] = dataset["airline_sentiment"]
dataset["label_text"] = dataset["airline_sentiment"]

In [26]:
dataset = dataset[["text", "airline", "label", "label_text"]]

In [27]:
# convert label from str to id

In [28]:
label2id = {
    "NEGATIVE": 0,
    "NEUTRAL": 1,
    "POSITIVE": 2
}
id2label = {v:k for k, v in label2id.items()}

In [29]:
dataset["label"] = dataset["label_text"].apply(lambda x: label2id[x])

In [30]:
dataset.head()

Unnamed: 0,text,airline,label,label_text
0,merge airline ai ready prime time book elite f...,american,0,NEGATIVE
1,thanks making worst fly experience ever never ...,american,0,NEGATIVE
2,Filed PIR agent Indianapolis airport Unable pu...,american,0,NEGATIVE
3,called Paris office morning still waiting Miam...,american,0,NEGATIVE
4,thx showing Twitter appreciates employees Sure...,american,0,NEGATIVE


In [48]:
# split train and test

In [36]:
from sklearn.model_selection import train_test_split

In [37]:
df_train, df_test = train_test_split(
    dataset, 
    random_state=42, 
    test_size=0.2, 
    shuffle=True, 
    stratify=dataset[["label", "airline"]]
)

In [38]:
X_train, y_train = df_train["text"].values, df_train["label"].values
X_test, y_test = df_test["text"].values, df_test["label"].values

In [39]:
# train examples
print(f"train data shape: {X_train.shape}, {y_train.shape}")

train data shape: (3504,), (3504,)


In [40]:
# test examples 
print(f"test data shape: {X_test.shape}, {y_test.shape}")

test data shape: (876,), (876,)


In [41]:
# save for later use
df_train.to_parquet(os.path.join(project_dir, "data", "processed", "dataset.train.parquet"))
df_test.to_parquet(os.path.join(project_dir, "data", "processed", "dataset.test.parquet"))

In [42]:
# the modelling part

In [43]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score

In [44]:
tfidf = TfidfVectorizer()

In [45]:
mini_param_grid = [
    {
        "vect__ngram_range": [(1, 1)],
        "vect__stop_words": [None, english_stopwords],
        "vect__lowercase": [True, False],
        "vect__tokenizer": [tokenizer_porter],
        "vect__norm": [None],
        "vect__use_idf": [False, True],
        "clf__penalty": ["l2"],
        "clf__loss": ["hinge"],
        "clf__alpha": [0.001, 0.01],
        "clf__shuffle": [True, False],
        "clf__random_state": [13],
        "clf__max_iter": [4, 8, 10, 16]
        # "clf__C": [1.0, 10.0]
    }
]

In [46]:
lr_tfidf = Pipeline([
    ("vect", TfidfVectorizer()),
    ("clf", SGDClassifier())
])

In [47]:
gs_lr_tfidf = GridSearchCV(lr_tfidf, mini_param_grid, scoring="accuracy", cv=5, verbose=2, n_jobs=-1)

In [61]:
# train? yes but with mlflow

In [48]:
with mlflow.start_run(experiment_id=experiment.experiment_id, run_name="fourth_run"):
    gs_lr_tfidf.fit(X_train, y_train)
    
    print("model best parameters:")
    print(gs_lr_tfidf.best_params_)
    
    # log parameters for future works
    mlflow.log_params(gs_lr_tfidf.best_params_)
    
    # eval model and store eval metrics
    eval_metrics = {}
    best_clf = gs_lr_tfidf.best_estimator_
    eval_metrics["train_accuracy"] = best_clf.score(X_train, y_train)
    
    y_preds = best_clf.predict(X_test)
    eval_metrics["test_accuracy"] = accuracy_score(y_true=y_test, y_pred=y_preds)
    eval_metrics["macro_f1_score"] = f1_score(y_true=y_test, y_pred=y_preds, average="weighted")
    
    # log eval metrics
    mlflow.log_metrics(eval_metrics)
    
    
    # log trained model
    mlflow.sklearn.log_model(
        best_clf,
        "sentiment-model"
    )



Fitting 5 folds for each of 128 candidates, totalling 640 fits
















[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11ad5b940>, vect__use_idf=False; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11ad46430>, vect__use_idf=True; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she





[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11354a940>, vect__use_idf=False; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x113532b80>, vect__use_idf=False; total time=   1.6s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x113543e50>, vect__use_idf=True; total time=   1.9s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, cl

[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6d6940>, vect__use_idf=False; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6beb80>, vect__use_idf=True; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6d3e50>, vect__use_idf=True; total time=   1.8s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf







[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'









[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11aa4ff70>, vect__use_idf=True; total time=   2.0s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10e717040>, vect__use_idf=True; total time=   1.6s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 











[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'

[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further











[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11a643f70>, vect__use_idf=False; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x113495f70>, vect__use_idf=False; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11a661f70>, vect__use_idf=True; total time=   1.8s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty

[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x110834f70>, vect__use_idf=False; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6caf70>, vect__use_idf=True; total time=   1.7s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6af040>, vect__use_idf=True; total time=   1.8s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=10, clf__penalty=









[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11aa42f70>, vect__use_idf=True; total time=   2.0s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did'





[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further



[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11981ef70>, vect__use_idf=True; total time=   1.9s
[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did'





[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further







[CV] END clf__alpha=0.001, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'furthe



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11351df70>, vect__use_idf=True; total time=   1.9s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doi







[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 





[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',









[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=4, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6c1040>, vect__use_idf=False; total time=   2.3s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x110837f70>, vect__use_idf=False; total time=   3.3s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11089df70>, vect__use_idf=True; total time=   4.3s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__r



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11bf48f70>, vect__use_idf=True; total time=   2.1s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'd





[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',









[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x119811f70>, vect__use_idf=False; total time=   2.1s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10e41ff70>, vect__use_idf=True; total time=   2.0s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x1198bf040>, vect__use_idf=True; total time=   2.0s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, c



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x11a6cef70>, vect__use_idf=True; total time=   2.0s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'd



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=8, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'









[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'





[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'



[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=10, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'

2023/07/26 22:04:28 INFO mlflow.sklearn.utils: Logging the 5 best runs, 123 runs will be omitted.


model best parameters:
{'clf__alpha': 0.001, 'clf__loss': 'hinge', 'clf__max_iter': 16, 'clf__penalty': 'l2', 'clf__random_state': 13, 'clf__shuffle': True, 'vect__lowercase': True, 'vect__ngram_range': (1, 1), 'vect__norm': None, 'vect__stop_words': None, 'vect__tokenizer': <function tokenizer_porter at 0x132c5ad30>, 'vect__use_idf': False}


In [50]:
# test prediction from logged model

In [49]:
logged_model = "runs:/aa735496385a465e8fb11f515621af17/model"

In [50]:
loaded_model = mlflow.pyfunc.load_model(logged_model)

In [51]:
test_idx = 33

pred =  loaded_model.predict(np.array([X_test[test_idx]]))

print("Text:  ", X_test[test_idx])
print("Predicted: ", id2label[pred[0]])
print("True label: ", id2label[y_test[test_idx]])

Text:   keep getting hung PLEASE respond book ticket able buy one unless speak someone
Predicted:  NEGATIVE
True label:  NEGATIVE


In [52]:
test_idx = 34

pred =  loaded_model.predict(np.array([X_test[test_idx]]))

print("Text:  ", X_test[test_idx])
print("Predicted: ", id2label[pred[0]])
print("True label: ", id2label[y_test[test_idx]])

Text:   thank See u next wednesday FLL Save peanuts
Predicted:  POSITIVE
True label:  POSITIVE


In [53]:
# register model 

In [54]:
model_registry_version  = mlflow.register_model(logged_model, "sentiment-model-sgd")

Successfully registered model 'sentiment-model-sgd'.
2023/07/26 22:06:45 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: sentiment-model-sgd, version 1
Created version '1' of model 'sentiment-model-sgd'.


In [55]:
# check registered model 
print(f"model name: {model_registry_version.name}")
print(f"model version: {model_registry_version.version}")

model name: sentiment-model-sgd
model version: 1


In [56]:
# change model stage to Staging/Prod

In [57]:
from mlflow import MlflowClient

In [58]:
client = MlflowClient()
client.transition_model_version_stage(
    name=model_registry_version.name, version=model_registry_version.version, stage="Staging"
)

<ModelVersion: aliases=[], creation_timestamp=1690423605932, current_stage='Staging', description='', last_updated_timestamp=1690423640484, name='sentiment-model-sgd', run_id='aa735496385a465e8fb11f515621af17', run_link='', source='s3://mlflow/1/aa735496385a465e8fb11f515621af17/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>

In [59]:
# try to load model based on stage
model_name = "sentiment-model-sgd"
stage = "Staging"

In [60]:
selected_model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")

In [61]:
test_idx = 37
pred = selected_model.predict(np.array([X_test[test_idx]]))
print("Text:  ", X_test[test_idx])
print("Predicted: ", id2label[pred[0]])
print("True label: ", id2label[y_test[test_idx]])

Text:   Cancelled Flights flight NY 3 times make decided amp Gs Cancelled Flight return flight Thanks
Predicted:  NEGATIVE
True label:  NEGATIVE


In [62]:
# try to load model based on stage
model_name = "sentiment-model-sgd"
stage = "Production"

In [63]:
selected_model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{stage}")

MlflowException: No versions of model with name 'sentiment-model-sgd' and stage 'Production' found

[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x10f6acf70>, vect__use_idf=False; total time=   2.0s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x110861f70>, vect__use_idf=False; total time=   2.4s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=False, vect__lowercase=True, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x110780f70>, vect__use_idf=True; total time=   2.0s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2,

[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'

[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=None, vect__tokenizer=<function tokenizer_porter at 0x1177c7f70>, vect__use_idf=True; total time=   2.2s
[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 

[CV] END clf__alpha=0.01, clf__loss=hinge, clf__max_iter=16, clf__penalty=l2, clf__random_state=13, clf__shuffle=True, vect__lowercase=False, vect__ngram_range=(1, 1), vect__norm=None, vect__stop_words=['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further'

Note
```
MLFLOW_S3_ENDPOINT_URL=http://localhost:4566 MLFLOW_S3_IGNORE_TLS=true MLFLOW_TRACKING_URI=http://localhost:5000  mlflow models serve -m runs:/c16032ed5d474403bea358adbc8894b9/model -p 8001
```