In [3]:
import requests
import json
import csv
from scipy.stats import pearsonr

In [4]:
def datamuse(query):
    # http://www.datamuse.com/api/
    url = "https://api.datamuse.com/words?ml=" + query + "&max=75" #max = number of results to return
    response = requests.get(url)
    similar_words = [x['word'] for x in json.loads(response.content)]
    return similar_words

In [5]:
def mc28():
    with open('datasets/sim-eval-master/datasets/mc.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=';')
        rows = []
        for row in reader: rows.append(row)
        return rows # [[word1, word2, similarity], ...] 

In [6]:
def stss131():
    # https://www.researchgate.net/publication/262411640_A_new_benchmark_dataset_with_production_methodology_for_short_text_semantic_similarity_algorithms
    with open('datasets/stss-131/stss.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=';')
        rows = []
        for row in reader: rows.append(row)
        return rows # [[index, sentence1, sentence2, avg similarity, standard deviation], ...]

In [7]:
def jaccard(word_list1, word_list2):
    intersection = len(set(word_list1).intersection(word_list2))
    union = len(set(word_list1).union(word_list2))
    jaccard_similarity = intersection / union
    return jaccard_similarity

In [118]:
from functools import reduce, partial
from itertools import starmap

def get_ml(w,c):
    """ Get a function for querying the related words """
    return [lambda: datamuse(
       "{word}&max={count}"
       .format(word=w,count=c)
    ),w]

def relations_to_file(fn, word):
    """
    fn: query function
    word
    """
    filename = "./related-words/{}.ml".format(word)
        
    with open(filename, 'w') as f:
        """ 
        Query the related words one at a time 
        when required for writing to file 
        """
        for word in fn():
            f.write(word)
            f.write('\n')
            
    return filename

def collect_ml_getters(dataset_fn, count=30):
    """ Collect the getters for lazy evaluation """
    return reduce(lambda acc, fns: acc + fns, [
        [get_ml(f,count), get_ml(s,count)]
        for f, s, _ 
        in dataset_fn()
    ], [])

def get_and_write_dataset_words_to_files(dataset_fn, count=30):
    """works for whiles that return tuple with three items per row"""
    return list(starmap(
        relations_to_file, 
        collect_ml_getters(dataset_fn, count)
    ))

# write the files with
# response = get_and_write_dataset_words_to_files(mc28)

response = ['./related-words/automobile.ml',
 './related-words/car.ml',
 './related-words/gem.ml',
 './related-words/jewel.ml',
 './related-words/journey.ml',
 './related-words/voyage.ml',
 './related-words/boy.ml',
 './related-words/lad.ml',
 './related-words/coast.ml',
 './related-words/shore.ml',
 './related-words/asylum.ml',
 './related-words/madhouse.ml',
 './related-words/magician.ml',
 './related-words/wizard.ml',
 './related-words/midday.ml',
 './related-words/noon.ml',
 './related-words/furnace.ml',
 './related-words/stove.ml',
 './related-words/food.ml',
 './related-words/fruit.ml',
 './related-words/bird.ml',
 './related-words/cock.ml',
 './related-words/bird.ml',
 './related-words/crane.ml',
 './related-words/implement.ml',
 './related-words/tool.ml',
 './related-words/brother.ml',
 './related-words/monk.ml',
 './related-words/brother.ml',
 './related-words/lad.ml',
 './related-words/crane.ml',
 './related-words/implement.ml',
 './related-words/car.ml',
 './related-words/journey.ml',
 './related-words/monk.ml',
 './related-words/oracle.ml',
 './related-words/cemetery.ml',
 './related-words/woodland.ml',
 './related-words/food.ml',
 './related-words/rooster.ml',
 './related-words/coast.ml',
 './related-words/hill.ml',
 './related-words/forest.ml',
 './related-words/graveyard.ml',
 './related-words/shore.ml',
 './related-words/woodland.ml',
 './related-words/monk.ml',
 './related-words/slave.ml',
 './related-words/coast.ml',
 './related-words/forest.ml',
 './related-words/lad.ml',
 './related-words/wizard.ml',
 './related-words/cord.ml',
 './related-words/smile.ml',
 './related-words/glass.ml',
 './related-words/magician.ml',
 './related-words/rooster.ml',
 './related-words/voyage.ml',
 './related-words/noon.ml',
 './related-words/string.ml']


In [112]:
from os import path
path.exists('./related-words')
with open('./related-words/automobile.ml', 'w') as f:
    f.write('fuk')

In [25]:
word="jou"
count=20
"ml={word}&max={count}".format(word=word, count=count)

'ml=jou&max=20'

[1, 2, 3, 4]


In [41]:
x()

6

In [42]:
datamuse("ml=automobile&max=30")

[]

In [94]:
reduce(lambda acc, g: acc + g, [[0,1],[2,3]], [])

[0, 1, 2, 3]

In [114]:
stss131()

[['66',
  'Would you like to go out to drink with me tonight?',
  "I really don't know what to eat tonight so I might go out somewhere.",
  '1.01',
  '0.77'],
 ['67',
  'I advise you to treat this matter very seriously as it is vital.',
  'You must take this most seriously, it will affect you.',
  '3.38',
  '0.69'],
 ['68',
  'When I was going out to meet my friends there was a delay at the train station.',
  'The train operator announced to the passengers that the train would be delayed.',
  '3.13',
  '0.68'],
 ['69',
  'Does music help you to relax, or does it distract you too much?',
  'Does this sponge look wet or dry to you?',
  '0.1',
  '0.29'],
 ['70',
  'You must realise that you will definitely be punished if you play with the alarm.',
  'He will be harshly punished for setting the fire alarm off.',
  '2.84',
  '0.87'],
 ['71',
  'I will make you laugh so much that your sides ache.',
  'When I tell you this you will split your sides laughing.',
  '3.75',
  '0.38'],
 ['72',
  "