# Definitions

In [1]:
import gensim.downloader as api
def load_model(model_name: str):
    model = api.load(model_name)
    return model

In [14]:
import csv



def initialize_data(file_name: str):
    # Load csv file
    rows = []
    with open(f'in/{file_name}', 'r') as csv_file:
        csv_reader = csv.reader(csv_file)

        _ = next(csv_reader)

        for row in csv_reader:
            rows.append(row)

        print(f"{csv_reader.line_num - 1} rows appended")
    return rows
    

def output_model_performance(model_name: str, model, rows):
    with open(f'out/{model_name}-details.csv', 'w') as f:
        writer = csv.writer(f)
        # labels are guess, correct or wrong
        header = ['question', 'answer', 'guess', 'label']
        writer.writerow(header)

        for row in rows:
            pairs = []
            similarities = []

            # create word-possible_synonym pairs
            for token in row[2:]:
                pairs.append((row[0], token))

            for w1, w2 in pairs:
                try:
                    temp = model.similarity(w1,w2)
                    similarities.append((w2,temp))
                except KeyError:
                    similarities.append((w2,-1))



            # result
            guess = max(similarities, key=lambda x: float(x[1]))

            # labels
            label = 'wrong'
            if guess[1] == -1:
                label = 'guess'
            elif row[1] == guess[0]:
                label = 'correct'

            data = [row[0], row[1], guess[0], label]
            writer.writerow(data)



In [18]:
# reading the details
def analyze_model_results(model_name: str, model, append=False):
    rows = []
    with open(f'out/{model_name}-details.csv', 'r') as f:
        csv_reader = csv.reader(f)

        _ = next(csv_reader)

        for row in csv_reader:
            rows.append(row)

    # writing the analysis
    write_mode = 'a' if append else 'w'
    with open ('out/analysis.csv', write_mode) as f:
        writer = csv.writer(f)

        header = ['model', 'size_of_vocab', 'C', 'V', 'accuracy']
        if not append:
            writer.writerow(header)
            
        C = 0
        G = 0
        V = 0
        size_of_vocab = len(model)
        for row in rows:
            if row[-1] == 'correct':
                C += 1
            elif row[-1] == 'guess':
                G += 1
        V = len(rows) - G
        acc = C/V
        data = [model_name, size_of_vocab, C, V, acc]
        writer.writerow(data)

# Load Models (ONLY RUN ONCE)

In [4]:
# word2vec-google-news-300
wv = load_model('word2vec-google-news-300')
print('Done.')

In [6]:
# glove-twitter-200
tw200 = load_model('glove-twitter-200')
print('Done.')

Done.


In [7]:
# glove-twitter-100
tw100 = load_model('glove-twitter-100')
print('Done.')

Done.


In [9]:
# glove-wiki-gigaword-200
wi200 = load_model('glove-wiki-gigaword-200')
print('Done.')



IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Done.


In [10]:
# glove-wiki-gigaword-100
wi100 = load_model('glove-wiki-gigaword-100')
print('Done.')

Done.


In [19]:
data_file = 'synonyms.csv'

# initialize
rows = initialize_data(data_file)

# output model performance
output_model_performance('word2vec-google-news-300', wv, rows) # TASK 1
# TASK 2
output_model_performance('glove-twitter-200', tw200, rows)
output_model_performance('glove-twitter-100', tw100, rows)
output_model_performance('glove-wiki-gigaword-200', wi200, rows)
output_model_performance('glove-wiki-gigaword-100', wi100, rows)

# analyze
analyze_model_results('word2vec-google-news-300', wv) # TASK 1
# TASK 2
analyze_model_results('glove-twitter-200', tw200, True)
analyze_model_results('glove-twitter-100', tw100, True)
analyze_model_results('glove-wiki-gigaword-200', wi200, True)
analyze_model_results('glove-wiki-gigaword-100', wi100, True)



80 rows appended
