In [2]:
import csv
import json
from sklearn.model_selection import train_test_split
from collections import Counter
import numpy as np
import os
import pandas as pd
import random
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import classification_report, accuracy_score, f1_score
import scipy.stats
import utils
import string
from sklearn.feature_extraction.text import CountVectorizer


In [25]:
yelp_data_source = 'project_data/yelp.csv' #done; also have full labels
amazon_instant_source = 'project_data/reviews_Amazon_Instant_Video_5.json' #done; also have full labels
amazon_instruments_source = 'project_data/reviews_Musical_Instruments_5.json' #done; have full labels
amazon_music_source = 'project_data/reviews_Digital_Music_5.json'#done; have full labels
rate_beer_source = 'project_data/ratebeer.txt'# done; have full labels
amazon_beauty_source = 'project_data/reviews_Beauty_5.json' #have full labels
sources = [yelp_data_source, amazon_instruments_source, amazon_music_source, amazon_instant_source, rate_beer_source]

In [4]:
def load_csv(path):
    '''
    Returns a tuple (X, y)
    X is matrix 
    y is vector of ratings
    '''
    X = []
    y = []
    with open(path, "r") as file:
        reader = csv.reader(file)
        i = 0
        for line in reader:
            #hacky way of skipping the header...
            if i == 0:
                i+=1
                continue
            rating = int(line[3])
            #hacky way of removing new lines
            text = " ".join(line[4].split())
            X.append(text)
            y.append(rating)
    print("Loaded: {}".format(path))
    return (X, y)

In [5]:
def write_to_csv(output_filename, X = None, y = None, path=None, force_binary = True):
    '''
    Write to nvidia compliant format
    label,sentence
    y,x
    y,x
    etc.
    
    Provide path if loading from source
    '''
    if not X and not y and not path:
        print("Must provide data source from file or from numpy.")
        return
    
    with open(output_filename, "w") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["label", "sentence"])
        if not (path is None):
            X,y = load_csv(path) if path.split(".")[-1]=="csv" else load_json_from_text(path)
        for label, example in zip(y,X):
            if force_binary:
                new_label = 0 if label < 3 else 1 #force binary labels
            else:
                new_label = label
            writer.writerow([new_label, example])
    print("Conversion for {} finished. Written to {}.".format(path, output_filename))
        

In [6]:
write_to_csv( "yelp_nvidia.csv",path=yelp_data_source)
write_to_csv( "instruments_nvidia.csv",path=amazon_instruments_source)
write_to_csv("music_nvidia.csv",path=amazon_music_source)
write_to_csv("instant_nvidia.csv",path=amazon_instant_source)

Loaded: project_data/yelp.csv
Conversion for project_data/yelp.csv finished. Written to yelp_nvidia.csv.


NameError: name 'load_json_from_text' is not defined

In [7]:
def load_json_from_text(source):
    '''
    Built for amazon Jsons
    Returns a tuple (X, y)
    X is matrix 
    y is vector of ratings
    '''
    X = []
    y = []
    with open(source, "r") as file:
        lines = file.readlines()
        for line in lines:
            data = json.loads(line)
            text = data['reviewText']
            if len(text) > 1:
                X.append(text)
                rating = int(data['overall'])
                y.append(rating)
            
    print("Loaded: {}".format(source))
    return (X, y)
            
            

In [26]:
X,y = load_json_from_text(amazon_beauty_source)
X,y = X[:10000], y[:10000]

Loaded: project_data/reviews_Beauty_5.json


In [8]:
def load_rate_beer_from_text(ratebeer_source):
    X = []
    y = []
    #cp1252 since this source was compiled on a windows machine
    with open(ratebeer_source, "r", encoding = 'cp1252') as file:
        lines = file.readlines()
        for line in lines:
            if line.split(":")[0] == 'review/overall':
                y.append(int(line.split(":")[1].split("/")[0]))
            if line.split(":")[0] == 'review/text':
                if len(line.split(":")) > 2: #to account for UPDATED:
                    X.append(line.split(":")[2])
                else:
                    X.append(line.split(":")[1])
            if len(X) == 10000 and len(y) == 10000: #hard cap at 100
                break
    print("Loaded: {}".format(ratebeer_source))
    return (X, y)

In [165]:
X, y = load_rate_beer_from_text(rate_beer_source)
X,y = X[:10000], y[:10000]

Loaded: project_data/ratebeer.txt


In [42]:
def collect_data(sources):
    X = []
    y = []
    for source in sources:
        if source.split(".")[-1] == 'json':
            a,b = load_json_from_text(source)
            X += a
            y += b
        elif source.split(".")[-1] == 'csv':
            a,b = load_csv(source)
            X += a
            y += b 
        elif source.split(".")[-1] == 'txt':
            a,b = load_rate_beer_from_text(source)
            X += a
            y += b
    return (X[:10000], y[:10000]) #hard cap at 10,000 reviews
            
            

In [140]:
sources = [amazon_instruments_source]
X,y = collect_data(sources)
print("Number of datapoints loaded: {}".format(len(X)))

Loaded: project_data/reviews_Musical_Instruments_5.json
Number of datapoints loaded: 10000


In [10]:
#Train, dev, test split
#random state = 42 for all splits
def split_data(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
    X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size = 0.1, random_state = 42)
    print("train: {}, dev: {}, test: {}".format(len(X_train), len(X_dev), len(X_test)))
    return X_train, X_dev, X_test, y_train, y_dev, y_test

In [11]:
def split_data_and_save_labels(X,y, name = None):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
    X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size = 0.1, random_state = 42)
    print("train: {}, dev: {}, test: {}".format(len(X_train), len(X_dev), len(X_test)))
    if name:
        np.save('{}_trY'.format(name), y_train)
        np.save('{}_vaY'.format(name), y_dev)
        np.save('{}_teY'.format(name), y_test)
    return X_train, X_dev, X_test, y_train, y_dev, y_test

In [27]:
X_train, X_dev, X_test, y_train, y_dev, y_test = split_data_and_save_labels(X,y, "beauty")

train: 8100, dev: 900, test: 1000


In [29]:
#write to nvidia compliant format
write_to_csv("amazon_beauty_train.csv", X = X_train, y = y_train)
write_to_csv("amazon_beauty_val.csv", X = X_dev, y = y_dev)
write_to_csv("amazon_beauty_test.csv", X = X_test, y = y_test)

Conversion for None finished. Written to amazon_beauty_train.csv.
Conversion for None finished. Written to amazon_beauty_val.csv.
Conversion for None finished. Written to amazon_beauty_test.csv.


In [12]:
def unigram_phi(x):
    '''
    unigram feature applied to each example
    Removes case
    Removes punctuation
    '''
    translator = str.maketrans('', '', string.punctuation)
    x = x.translate(translator)
    return Counter(x.lower().split())

In [23]:
def ternary_class_func(y):
    """Define a binary SST task. Just like `binary_class_func` except
    input '2' returns 'neutral'."""
    if y in ("0", "1"):
        return "negative"
    elif y in ("3", "4"):
        return "positive"
    else:
        return "neutral"
    
def binary_class_func(y, lower = 1, upper = 5):
    
    span = range(1,upper + 1)
    median = span[int(len(span)/2)]
    
    if y < median:
        return 0
    else:
        return 1
    
    
def identity_class_func(y):
    return y

In [32]:
def build_dataset(X,y, phi = (1,1), max_features = 15000, class_func = identity_class_func, vectorizer=None, vectorize=True, beer_args = {}):
    """Core general function for building experimental datasets.
    Uses CountVectorizer from sklearn to encapsulate n-gram feature generation and vectorization.

    vectorize : bool
       Whether to use a CountVectorizer. Set this to False for
       deep learning models that process their own input.
    phi: (a,b)
        Range for n-grams. 

    Returns
    -------
    dict
        A dict with keys 'X' (the feature matrix), 'y' (the list of
        labels), 'vectorizer' (the `DictVectorizer`), and
        'raw_examples' (the `nltk.Tree` objects, for error analysis).

    """
    raw_examples = X
    if class_func == binary_class_func:
        labels = [class_func(l, **beer_args) for l in y]
    else:
        labels = [class_func(l) for l in y]
    feat_matrix = None
    
    if vectorize:
        
        # In training, we want a new vectorizer:
        if vectorizer == None:
            vectorizer = CountVectorizer(max_features = max_features, ngram_range = phi)
            feat_matrix = vectorizer.fit_transform(X)
            
        # In assessment, we featurize using the existing vectorizer:
        else:
            feat_matrix = vectorizer.transform(X)
            
    else:
        feat_matrix = feat_dicts
        
    return {'X': feat_matrix,
            'y': labels,
            'vectorizer': vectorizer,
            'raw_examples': raw_examples}

In [54]:
train_dataset = build_dataset(X_train, y_train, class_func = binary_class_func)
print("Train dataset with features has {:,} examples and {:,} features".format(
        *train_dataset['X'].shape))

Train dataset with features has 8,100 examples and 15,000 features


In [55]:
dev_dataset = build_dataset(X_dev, y_dev, class_func = binary_class_func, vectorizer = train_dataset['vectorizer'])
print("Dev dataset with features has {:,} examples and {:,} features".format(
        *dev_dataset['X'].shape))

Dev dataset with features has 900 examples and 15,000 features


In [57]:
test_dataset = build_dataset(X_test, y_test, class_func = binary_class_func, vectorizer = train_dataset['vectorizer'])
print("test dataset with features has {:,} examples and {:,} features".format(
        *test_dataset['X'].shape))

test dataset with features has 1,000 examples and 15,000 features


In [15]:
def fit_maxent_classifier(X, y,args_dict = {}):    
    '''
    max ent classifier (multi class log reg)
    '''
    mod = LogisticRegression(fit_intercept=True)
    mod.fit(X, y, **args_dict)
    return mod


from sklearn.linear_model import SGDClassifier
def fit_basic_sgd_classifier(X, y,args_dict = {}):    
    """Wrapper for `BasicSGDClassifier`.    
    """    
    mod = SGDClassifier()
    mod.fit(X, y, **args_dict)
    return mod

from sklearn.ensemble import RandomForestClassifier
def fit_rf_classifier(X,y, args_dict = {}):
    mod = RandomForestClassifier(**args_dict)
    mod.fit(X,y)
    return mod

from sklearn.ensemble import GradientBoostingClassifier
def fit_sgb_classifier(X,y, args_dict = {}):
    mod = GradientBoostingClassifier(**args_dict)
    mod.fit(X,y)
    return mod

from sklearn.naive_bayes import MultinomialNB
def fit_mnb_classifier(X,y, args_dict = {}):
    mod = MultinomialNB(**args_dict)
    mod.fit(X,y)
    return mod

In [36]:
def experiment(
        train_func,
        X,
        y,
        phi = (1,1),
        assess=None,
        max_features = 15000,
        class_func=identity_class_func,
        score_func=utils.safe_macro_f1,
        vectorize=True,
        verbose=True, 
        args_dict = {},
        beer_args = {}):
    """
    
    Convenience function.
    
    If assess is none, performance on training is reported. Otherwise, performance on assess = (X_assess, y_assess) is reported.

    Returns
    -------
    Model
    
    float
        The overall scoring metric as determined by `score_metric`.

    """
    # Train dataset:
    train = build_dataset(X, y, class_func = class_func, phi = phi, max_features = max_features, beer_args = beer_args)
    
    # Manage the assessment set-up:
    X_train = train['X']
    y_train = train['y']
    X_assess = train['X']
    y_assess = train['y']
    if assess != None:
        X_dev, y_dev = assess
        dev = build_dataset(X_dev, y_dev, class_func = class_func, phi = phi, vectorizer = train['vectorizer'], beer_args = beer_args)
        X_assess, y_assess = dev['X'], dev['y']
    # Train:
    mod = train_func(X_train, y_train, args_dict)
    # Predictions:
    predictions = mod.predict(X_assess)
    # Report:
    if verbose:
        print('Accuracy: %0.03f' % accuracy_score(y_assess, predictions))
        print(classification_report(y_assess, predictions, digits=3))
    # Return the overall score:
    return mod, score_func(y_assess, predictions)

In [79]:
sgdmodel, score = experiment(fit_basic_sgd_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = binary_class_func)

Accuracy: 0.885
             precision    recall  f1-score   support

          0      0.830     0.293     0.433       150
          1      0.888     0.989     0.936       850

avg / total      0.879     0.885     0.861      1000





In [74]:
maxentmodel, score = experiment(fit_maxent_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = binary_class_func)

Accuracy: 0.908
             precision    recall  f1-score   support

          0      0.727     0.620     0.669       150
          1      0.935     0.959     0.947       850

avg / total      0.903     0.908     0.905      1000



In [44]:
args_dict = {"n_estimators": 10}
rfmodel, score = experiment(fit_rf_classifier, X_train, y_train, phi = (1,1), assess = (X_dev, y_dev), class_func = binary_class_func, args_dict = args_dict)

Accuracy: 0.853
             precision    recall  f1-score   support

          0      0.596     0.218     0.320       142
          1      0.869     0.972     0.918       758

avg / total      0.826     0.853     0.823       900



In [60]:
args_dict = {}
sgbmodel, score = experiment(fit_sgb_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = binary_class_func, args_dict = args_dict)

Accuracy: 0.876
             precision    recall  f1-score   support

          0      0.842     0.213     0.340       150
          1      0.877     0.993     0.932       850

avg / total      0.872     0.876     0.843      1000



In [59]:
args_dict = {"alpha": 0.08}
mnbmodel, score = experiment(fit_mnb_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = binary_class_func, args_dict = args_dict)

Accuracy: 0.895
             precision    recall  f1-score   support

          0      0.667     0.600     0.632       150
          1      0.931     0.947     0.939       850

avg / total      0.891     0.895     0.893      1000



In [49]:
def linear_battery(source, beer_args = {}, class_func = binary_class_func):
    '''
    specify beer rating limits in beer_args if using binary labels on beer dataset
    fit sgd, log reg, rf, sgb, mnb models on unigram features on source
    '''
    sources = [source]
    X,y = collect_data(sources)
    print("Number of datapoints loaded: {}".format(len(X)))
    
    X_train, X_dev, X_test, y_train, y_dev, y_test = split_data(X,y)
    
    print("Log Reg Model")
    maxentmodel, score = experiment(fit_maxent_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = class_func, beer_args = beer_args)
    
    print("RF Model")
    args_dict = {"n_estimators": 10}
    rfmodel, score = experiment(fit_rf_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = class_func, args_dict = args_dict, beer_args = beer_args)
    
    print("SGB Model")
    args_dict = {}
    sgbmodel, score = experiment(fit_sgb_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = class_func, args_dict = args_dict, beer_args = beer_args)
    
    print("SGD Model")
    args_dict = {}
    sgdmodel, score = experiment(fit_basic_sgd_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = class_func, beer_args = beer_args)
    
    print("MNB Model")
    args_dict = {"alpha": 0.08}
    mnbmodel, score = experiment(fit_mnb_classifier, X_train, y_train, phi = (1,1), assess = (X_test, y_test), class_func = class_func, args_dict = args_dict, beer_args = beer_args)

In [37]:
linear_battery(amazon_instant_source)

Loaded: project_data/reviews_Amazon_Instant_Video_5.json
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.940
             precision    recall  f1-score   support

          0      0.714     0.333     0.455        75
          1      0.948     0.989     0.968       925

avg / total      0.931     0.940     0.930      1000

RF Model
Accuracy: 0.927
             precision    recall  f1-score   support

          0      0.750     0.040     0.076        75
          1      0.928     0.999     0.962       925

avg / total      0.914     0.927     0.896      1000

SGB Model
Accuracy: 0.930
             precision    recall  f1-score   support

          0      0.778     0.093     0.167        75
          1      0.931     0.998     0.963       925

avg / total      0.920     0.930     0.904      1000

SGD Model




Accuracy: 0.933
             precision    recall  f1-score   support

          0      0.595     0.333     0.427        75
          1      0.948     0.982     0.964       925

avg / total      0.921     0.933     0.924      1000

MNB Model
Accuracy: 0.931
             precision    recall  f1-score   support

          0      0.543     0.507     0.524        75
          1      0.960     0.965     0.963       925

avg / total      0.929     0.931     0.930      1000



In [38]:
linear_battery(amazon_music_source)

Loaded: project_data/reviews_Digital_Music_5.json
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.965
             precision    recall  f1-score   support

          0      0.500     0.171     0.255        35
          1      0.971     0.994     0.982       965

avg / total      0.954     0.965     0.957      1000

RF Model
Accuracy: 0.965
             precision    recall  f1-score   support

          0      0.000     0.000     0.000        35
          1      0.965     1.000     0.982       965

avg / total      0.931     0.965     0.948      1000

SGB Model


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Accuracy: 0.965
             precision    recall  f1-score   support

          0      0.500     0.057     0.103        35
          1      0.967     0.998     0.982       965

avg / total      0.951     0.965     0.951      1000

SGD Model




Accuracy: 0.964
             precision    recall  f1-score   support

          0      0.471     0.229     0.308        35
          1      0.973     0.991     0.982       965

avg / total      0.955     0.964     0.958      1000

MNB Model
Accuracy: 0.960
             precision    recall  f1-score   support

          0      0.407     0.314     0.355        35
          1      0.975     0.983     0.979       965

avg / total      0.955     0.960     0.958      1000



In [39]:
linear_battery(amazon_instruments_source)

Loaded: project_data/reviews_Musical_Instruments_5.json
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.941
             precision    recall  f1-score   support

          0      0.400     0.185     0.253        54
          1      0.955     0.984     0.969       946

avg / total      0.925     0.941     0.931      1000

RF Model
Accuracy: 0.946
             precision    recall  f1-score   support

          0      0.500     0.056     0.100        54
          1      0.949     0.997     0.972       946

avg / total      0.924     0.946     0.925      1000

SGB Model
Accuracy: 0.942
             precision    recall  f1-score   support

          0      0.000     0.000     0.000        54
          1      0.946     0.996     0.970       946

avg / total      0.895     0.942     0.918      1000

SGD Model




Accuracy: 0.897
             precision    recall  f1-score   support

          0      0.198     0.296     0.237        54
          1      0.959     0.931     0.945       946

avg / total      0.918     0.897     0.907      1000

MNB Model
Accuracy: 0.939
             precision    recall  f1-score   support

          0      0.360     0.167     0.228        54
          1      0.954     0.983     0.968       946

avg / total      0.922     0.939     0.928      1000



In [44]:
linear_battery(rate_beer_source, beer_args = {"lower": 1, "upper":20})

Loaded: project_data/ratebeer.txt
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.917
             precision    recall  f1-score   support

          0      0.590     0.383     0.465        94
          1      0.938     0.972     0.955       906

avg / total      0.906     0.917     0.909      1000

RF Model
Accuracy: 0.904
             precision    recall  f1-score   support

          0      0.450     0.096     0.158        94
          1      0.913     0.988     0.949       906

avg / total      0.870     0.904     0.875      1000

SGB Model
Accuracy: 0.915
             precision    recall  f1-score   support

          0      0.765     0.138     0.234        94
          1      0.918     0.996     0.955       906

avg / total      0.903     0.915     0.887      1000

SGD Model




Accuracy: 0.907
             precision    recall  f1-score   support

          0      0.510     0.277     0.359        94
          1      0.928     0.972     0.950       906

avg / total      0.889     0.907     0.894      1000

MNB Model
Accuracy: 0.900
             precision    recall  f1-score   support

          0      0.474     0.574     0.519        94
          1      0.955     0.934     0.944       906

avg / total      0.910     0.900     0.904      1000



In [46]:
linear_battery(yelp_data_source)

Loaded: project_data/yelp.csv
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.908
             precision    recall  f1-score   support

          0      0.727     0.620     0.669       150
          1      0.935     0.959     0.947       850

avg / total      0.903     0.908     0.905      1000

RF Model
Accuracy: 0.862
             precision    recall  f1-score   support

          0      0.625     0.200     0.303       150
          1      0.874     0.979     0.923       850

avg / total      0.837     0.862     0.830      1000

SGB Model
Accuracy: 0.874
             precision    recall  f1-score   support

          0      0.816     0.207     0.330       150
          1      0.876     0.992     0.930       850

avg / total      0.867     0.874     0.840      1000

SGD Model




Accuracy: 0.907
             precision    recall  f1-score   support

          0      0.699     0.667     0.683       150
          1      0.942     0.949     0.946       850

avg / total      0.905     0.907     0.906      1000

MNB Model
Accuracy: 0.895
             precision    recall  f1-score   support

          0      0.667     0.600     0.632       150
          1      0.931     0.947     0.939       850

avg / total      0.891     0.895     0.893      1000



In [21]:
def nn_experiment(source_name, args = {}):
    '''
    feature extraction from language model

    source_name is for example "yelp"
    then we feed "yelp_trY.npy" etc as source to np.load
    
    Does both a binary and a full label log reg based on features
    '''
    y = np.load("project_data/{}_trY.npy".format(source_name))
    X = np.load("project_data/{}_trXt.npy".format(source_name))
    vaX = np.load("project_data/{}_vaXt.npy".format(source_name))
    vaY = np.load("project_data/{}_vaY.npy".format(source_name))
    teX = np.load("project_data/{}_teXt.npy".format(source_name))
    teY = np.load("project_data/{}_teY.npy".format(source_name))

    print("All labels classification")
    model = fit_maxent_classifier(X, y)
    predictions = model.predict(teX)
    print('Accuracy: %0.03f' % accuracy_score(teY, predictions))
    print(classification_report(teY, predictions, digits=3))


    print("Binary classification")
    y = [binary_class_func(i, **args) for i in y]
    model = fit_maxent_classifier(X, y)
    predictions = model.predict(teX)
    teY = [binary_class_func(i, **args) for i in teY]
    print('Accuracy: %0.03f' % accuracy_score(teY, predictions))
    print(classification_report(teY, predictions, digits=3))

In [159]:
#currently data is mLSTM featurization on yelp reviews 
nn_experiment("yelp")

All labels classification
Accuracy: 0.510
             precision    recall  f1-score   support

          1      0.593     0.500     0.543        70
          2      0.393     0.438     0.414        80
          3      0.372     0.288     0.325       156
          4      0.511     0.518     0.514       367
          5      0.571     0.627     0.598       327

avg / total      0.505     0.510     0.506      1000

Binary classification
Accuracy: 0.915
             precision    recall  f1-score   support

          0      0.707     0.740     0.723       150
          1      0.954     0.946     0.950       850

avg / total      0.917     0.915     0.916      1000



In [171]:
#amazon instant reviews 
nn_experiment("instant")

All labels classification
Accuracy: 0.623
             precision    recall  f1-score   support

          1      0.727     0.400     0.516        40
          2      0.333     0.229     0.271        35
          3      0.244     0.268     0.256        82
          4      0.368     0.354     0.361       229
          5      0.770     0.808     0.789       614

avg / total      0.618     0.623     0.618      1000

Binary classification
Accuracy: 0.954
             precision    recall  f1-score   support

          0      0.738     0.600     0.662        75
          1      0.968     0.983     0.975       925

avg / total      0.951     0.954     0.952      1000



In [18]:
nn_experiment("instruments")

All labels classification
Accuracy: 0.700
             precision    recall  f1-score   support

          1      0.529     0.375     0.439        24
          2      0.250     0.133     0.174        30
          3      0.345     0.247     0.288        77
          4      0.342     0.358     0.350       176
          5      0.831     0.873     0.852       693

avg / total      0.683     0.700     0.690      1000

Binary classification
Accuracy: 0.963
             precision    recall  f1-score   support

          0      0.707     0.537     0.611        54
          1      0.974     0.987     0.981       946

avg / total      0.960     0.963     0.961      1000



In [30]:
#args passes in lower and upper limit of scores so we can compute threshold
nn_experiment("beer", args = {'lower':1, 'upper':20}) 

All labels classification
Accuracy: 0.179
             precision    recall  f1-score   support

          2      0.000     0.000     0.000         1
          3      0.000     0.000     0.000         2
          4      0.000     0.000     0.000         2
          5      0.000     0.000     0.000         4
          6      0.000     0.000     0.000         7
          7      0.000     0.000     0.000         8
          8      0.000     0.000     0.000        20
          9      0.000     0.000     0.000        15
         10      0.208     0.143     0.169        35
         11      0.089     0.095     0.092        42
         12      0.165     0.177     0.171        79
         13      0.149     0.214     0.176        84
         14      0.203     0.185     0.194       135
         15      0.178     0.218     0.196       133
         16      0.191     0.200     0.195       150
         17      0.160     0.197     0.176       117
         18      0.268     0.250     0.259       104
   

  'precision', 'predicted', average, warn_for)


Accuracy: 0.899
             precision    recall  f1-score   support

          0      0.443     0.287     0.348        94
          1      0.929     0.962     0.945       906

avg / total      0.883     0.899     0.889      1000



In [24]:
nn_experiment("music")

All labels classification
Accuracy: 0.659
             precision    recall  f1-score   support

          1      0.400     0.143     0.211        14
          2      0.429     0.143     0.214        21
          3      0.375     0.265     0.310        68
          4      0.358     0.320     0.338       228
          5      0.765     0.842     0.801       669

avg / total      0.633     0.659     0.642      1000

Binary classification
Accuracy: 0.967
             precision    recall  f1-score   support

          0      0.536     0.429     0.476        35
          1      0.979     0.987     0.983       965

avg / total      0.964     0.967     0.965      1000



In [47]:
nn_experiment("beer8k", args = {'lower':1, 'upper':20})

All labels classification
Accuracy: 0.154
             precision    recall  f1-score   support

          2      0.000     0.000     0.000         1
          3      0.000     0.000     0.000         2
          4      0.000     0.000     0.000         2
          5      0.500     0.250     0.333         4
          6      0.000     0.000     0.000         7
          7      0.000     0.000     0.000         8
          8      0.000     0.000     0.000        20
          9      0.000     0.000     0.000        15
         10      0.258     0.229     0.242        35
         11      0.111     0.071     0.087        42
         12      0.122     0.127     0.124        79
         13      0.134     0.179     0.153        84
         14      0.134     0.148     0.141       135
         15      0.167     0.218     0.189       133
         16      0.191     0.173     0.182       150
         17      0.159     0.205     0.179       117
         18      0.158     0.115     0.133       104
   

  'precision', 'predicted', average, warn_for)


Accuracy: 0.894
             precision    recall  f1-score   support

          0      0.419     0.330     0.369        94
          1      0.932     0.953     0.942       906

avg / total      0.884     0.894     0.888      1000



In [51]:
linear_battery(amazon_instant_source, class_func = identity_class_func)

Loaded: project_data/reviews_Amazon_Instant_Video_5.json
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.617
             precision    recall  f1-score   support

          1      0.429     0.150     0.222        40
          2      0.067     0.029     0.040        35
          3      0.353     0.293     0.320        82
          4      0.355     0.328     0.341       229
          5      0.738     0.832     0.783       614

avg / total      0.583     0.617     0.595      1000

RF Model
Accuracy: 0.614
             precision    recall  f1-score   support

          1      0.500     0.025     0.048        40
          2      0.000     0.000     0.000        35
          3      0.364     0.098     0.154        82
          4      0.362     0.205     0.262       229
          5      0.663     0.909     0.766       614

avg / total      0.540     0.614     0.545      1000

SGB Model
Accuracy: 0.629
             precision    recall  f1-score   



Accuracy: 0.532
             precision    recall  f1-score   support

          1      0.346     0.225     0.273        40
          2      0.120     0.086     0.100        35
          3      0.263     0.183     0.216        82
          4      0.324     0.646     0.431       229
          5      0.821     0.581     0.681       614

avg / total      0.618     0.532     0.549      1000

MNB Model
Accuracy: 0.603
             precision    recall  f1-score   support

          1      0.348     0.200     0.254        40
          2      0.097     0.086     0.091        35
          3      0.312     0.293     0.302        82
          4      0.352     0.245     0.289       229
          5      0.721     0.834     0.773       614

avg / total      0.566     0.603     0.579      1000



In [52]:
linear_battery(amazon_music_source, class_func = identity_class_func)

Loaded: project_data/reviews_Digital_Music_5.json
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.662
             precision    recall  f1-score   support

          1      1.000     0.214     0.353        14
          2      0.200     0.048     0.077        21
          3      0.302     0.191     0.234        68
          4      0.394     0.325     0.356       228
          5      0.750     0.854     0.799       669

avg / total      0.630     0.662     0.638      1000

RF Model
Accuracy: 0.646
             precision    recall  f1-score   support

          1      0.000     0.000     0.000        14
          2      1.000     0.095     0.174        21
          3      0.000     0.000     0.000        68
          4      0.267     0.088     0.132       228
          5      0.677     0.933     0.784       669

avg / total      0.535     0.646     0.559      1000

SGB Model


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Accuracy: 0.690
             precision    recall  f1-score   support

          1      0.500     0.071     0.125        14
          2      0.000     0.000     0.000        21
          3      0.250     0.015     0.028        68
          4      0.541     0.145     0.228       228
          5      0.702     0.979     0.818       669

avg / total      0.617     0.690     0.603      1000

SGD Model




Accuracy: 0.527
             precision    recall  f1-score   support

          1      0.167     0.071     0.100        14
          2      0.286     0.095     0.143        21
          3      0.197     0.191     0.194        68
          4      0.304     0.671     0.418       228
          5      0.859     0.535     0.659       669

avg / total      0.665     0.527     0.554      1000

MNB Model
Accuracy: 0.666
             precision    recall  f1-score   support

          1      0.333     0.286     0.308        14
          2      0.300     0.143     0.194        21
          3      0.304     0.250     0.274        68
          4      0.406     0.399     0.403       228
          5      0.789     0.824     0.806       669

avg / total      0.652     0.666     0.658      1000



In [53]:
linear_battery(amazon_instruments_source, class_func = identity_class_func)

Loaded: project_data/reviews_Musical_Instruments_5.json
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.675
             precision    recall  f1-score   support

          1      0.429     0.125     0.194        24
          2      0.333     0.067     0.111        30
          3      0.373     0.247     0.297        77
          4      0.307     0.290     0.298       176
          5      0.779     0.866     0.820       693

avg / total      0.643     0.675     0.652      1000

RF Model
Accuracy: 0.678
             precision    recall  f1-score   support

          1      0.000     0.000     0.000        24
          2      0.500     0.033     0.062        30
          3      0.600     0.039     0.073        77
          4      0.269     0.102     0.148       176
          5      0.709     0.947     0.811       693

avg / total      0.600     0.678     0.596      1000

SGB Model
Accuracy: 0.705
             precision    recall  f1-score   s



Accuracy: 0.644
             precision    recall  f1-score   support

          1      0.333     0.042     0.074        24
          2      0.000     0.000     0.000        30
          3      0.269     0.416     0.327        77
          4      0.290     0.239     0.262       176
          5      0.789     0.821     0.805       693

avg / total      0.627     0.644     0.631      1000

MNB Model
Accuracy: 0.651
             precision    recall  f1-score   support

          1      0.308     0.167     0.216        24
          2      0.000     0.000     0.000        30
          3      0.298     0.182     0.226        77
          4      0.269     0.244     0.256       176
          5      0.761     0.851     0.804       693

avg / total      0.605     0.651     0.625      1000



In [54]:
linear_battery(yelp_data_source, class_func = identity_class_func)

Loaded: project_data/yelp.csv
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.489
             precision    recall  f1-score   support

          1      0.652     0.429     0.517        70
          2      0.429     0.412     0.420        80
          3      0.385     0.301     0.338       156
          4      0.477     0.488     0.482       367
          5      0.526     0.612     0.566       327

avg / total      0.487     0.489     0.485      1000

RF Model
Accuracy: 0.422
             precision    recall  f1-score   support

          1      0.292     0.100     0.149        70
          2      0.192     0.062     0.094        80
          3      0.260     0.173     0.208       156
          4      0.418     0.575     0.484       367
          5      0.504     0.526     0.515       327

avg / total      0.395     0.422     0.396      1000

SGB Model
Accuracy: 0.489
             precision    recall  f1-score   support

          1      0



Accuracy: 0.480
             precision    recall  f1-score   support

          1      0.600     0.386     0.470        70
          2      0.400     0.075     0.126        80
          3      0.297     0.474     0.365       156
          4      0.497     0.477     0.487       367
          5      0.584     0.606     0.595       327

avg / total      0.494     0.480     0.473      1000

MNB Model
Accuracy: 0.500
             precision    recall  f1-score   support

          1      0.441     0.429     0.435        70
          2      0.397     0.312     0.350        80
          3      0.381     0.276     0.320       156
          4      0.490     0.572     0.528       367
          5      0.587     0.587     0.587       327

avg / total      0.494     0.500     0.494      1000



In [56]:
linear_battery(rate_beer_source, beer_args = {"lower":1, "upper":20}, class_func = identity_class_func)

Loaded: project_data/ratebeer.txt
Number of datapoints loaded: 10000
train: 8100, dev: 900, test: 1000
Log Reg Model
Accuracy: 0.211
             precision    recall  f1-score   support

          2      0.000     0.000     0.000         1
          3      0.000     0.000     0.000         2
          4      0.000     0.000     0.000         2
          5      0.000     0.000     0.000         4
          6      0.000     0.000     0.000         7
          7      0.000     0.000     0.000         8
          8      0.250     0.150     0.187        20
          9      0.000     0.000     0.000        15
         10      0.091     0.057     0.070        35
         11      0.154     0.143     0.148        42
         12      0.215     0.215     0.215        79
         13      0.215     0.238     0.226        84
         14      0.252     0.230     0.240       135
         15      0.157     0.195     0.174       133
         16      0.210     0.247     0.227       150
         17      0

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Accuracy: 0.158
             precision    recall  f1-score   support

          2      0.000     0.000     0.000         1
          3      0.000     0.000     0.000         2
          4      0.000     0.000     0.000         2
          5      0.000     0.000     0.000         4
          6      0.000     0.000     0.000         7
          7      0.000     0.000     0.000         8
          8      0.167     0.050     0.077        20
          9      0.000     0.000     0.000        15
         10      0.029     0.029     0.029        35
         11      0.083     0.071     0.077        42
         12      0.202     0.241     0.220        79
         13      0.209     0.274     0.237        84
         14      0.142     0.148     0.145       135
         15      0.164     0.233     0.193       133
         16      0.164     0.187     0.174       150
         17      0.159     0.188     0.173       117
         18      0.149     0.067     0.093       104
         19      0.167     0.

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Accuracy: 0.196
             precision    recall  f1-score   support

          1      0.000     0.000     0.000         0
          2      0.000     0.000     0.000         1
          3      0.000     0.000     0.000         2
          4      0.000     0.000     0.000         2
          5      0.000     0.000     0.000         4
          6      0.000     0.000     0.000         7
          7      0.000     0.000     0.000         8
          8      0.250     0.050     0.083        20
          9      0.000     0.000     0.000        15
         10      0.000     0.000     0.000        35
         11      0.000     0.000     0.000        42
         12      0.291     0.203     0.239        79
         13      0.196     0.107     0.138        84
         14      0.183     0.170     0.176       135
         15      0.181     0.308     0.228       133
         16      0.212     0.327     0.257       150
         17      0.200     0.325     0.248       117
         18      0.219     0.

  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)


Accuracy: 0.154
             precision    recall  f1-score   support

          1      0.000     0.000     0.000         0
          2      0.000     0.000     0.000         1
          3      0.000     0.000     0.000         2
          4      0.000     0.000     0.000         2
          5      0.000     0.000     0.000         4
          6      0.000     0.000     0.000         7
          7      0.000     0.000     0.000         8
          8      0.125     0.050     0.071        20
          9      0.038     0.067     0.049        15
         10      0.147     0.143     0.145        35
         11      0.120     0.143     0.130        42
         12      0.138     0.101     0.117        79
         13      0.160     0.048     0.073        84
         14      0.179     0.304     0.225       135
         15      0.139     0.105     0.120       133
         16      0.150     0.060     0.086       150
         17      0.183     0.291     0.224       117
         18      0.253     0.