In [1]:
import pandas as pd
import socket
import numpy as np
import textdistance
import time
import tabulate

In [2]:
HOST = 'localhost'
PORT = 65532

In [3]:
def process(text):
    result = pd.DataFrame()
    
    
    metrics_dict = {'cosine':textdistance.cosine,
                    'hamming':textdistance.hamming,
                    'jaro-winkler':textdistance.jaro_winkler,
                    'damerau_levenshtein':textdistance.damerau_levenshtein}
    
    command = text.split('; ')[0]
    words = text.split('; ')[1].split(', ')
    
    if command=='calculate metrics':
        res_dict = {}
        for metric in metrics_dict.keys():
            res_dict['word_to_find_similar'] = words[0]
            res_dict['word_to_compare'] = words[1]
            res_dict[metric] = [metrics_dict[metric](words[0], words[1])]
        try:
            data = pd.read_csv('results_words.csv')
            result = data.append(pd.DataFrame(res_dict)).reset_index(drop=True)
        except:
            result = pd.DataFrame(res_dict)
    
        result = result.drop_duplicates(subset=['word_to_find_similar',
                                       'word_to_compare'], keep='first')
        
        result.to_csv('results_words.csv', index=False)
        
    elif command == 'find similars':
        try:
            data = pd.read_csv('results_words.csv')
            all_words = set(data['word_to_find_similar'])|set(data['word_to_compare'])

            for word in words:
                if word in data['word_to_find_similar'].values:
                    closest_word = data[data.word_to_find_similar == word].sort_values(list(metrics_dict.keys()),
                                                ascending = [True, False, True, False]).iloc[0]

                    res_dict = dict(zip(closest_word.index, closest_word.values))
                    result = result.append(pd.DataFrame.from_records([res_dict]))


                elif word in data['word_to_compare'].values:
                    closest_word = data[data.word_to_compare == word].sort_values(list(metrics_dict.keys()),
                                                ascending = [True, False, True, False]).iloc[0]

                    res_dict = dict(zip(closest_word.index, closest_word.values))
                    result = result.append(pd.DataFrame.from_records([res_dict]))

                else:
                    res_dict = {'word_to_find_similar':word,
                                   'word_to_compare':list(all_words)}

                    for key in metrics_dict.keys():
                        res_dict[key] = list(map(lambda x: metrics_dict[key](word, x),
                                                    list(all_words)))

                    df = pd.DataFrame(res_dict, index=[i for i in range(len(res_dict['word_to_compare']))]
                                     ).sort_values(list(metrics_dict.keys()),
                                                ascending = [True, False, True, False]).iloc[0]

                    result = result.append(df)
                
                result = result.drop_duplicates(subset=['word_to_find_similar',
                                       'word_to_compare'], keep='first')
                
        except:
            return "No data stored yet!"
    return tabulate.tabulate(result.values, result.columns, tablefmt="pipe")

In [4]:
print(process('find similars; scorpions, malyarchuk'))

|   cosine |   damerau_levenshtein |   hamming |   jaro-winkler | word_to_compare   | word_to_find_similar   |
|---------:|----------------------:|----------:|---------------:|:------------------|:-----------------------|
| 0.149071 |                     8 |         9 |       0.437037 | Hello             | scorpions              |
| 0.141421 |                     9 |         9 |       0.433333 | Hello             | malyarchuk             |


In [5]:
def show():
    try:
        data = pd.read_csv('results_words.csv')
        return tabulate.tabulate(data.values, data.columns, tablefmt="pipe")
    except:
        return "No data stored yet!"

In [6]:
def worker(msg):
    if msg=='print':
        return show()
    elif msg=='bye':
        return 'Good Bye!'
    elif msg.split(';')[0]=='calculate metrics' or msg.split(';')[0]=='find similars':
        return process(msg)
    else:
        return "No valid command"

In [7]:
def server(host, port):
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind((host, port))
        s.listen()
        conn, addr = s.accept()
        with conn:
            print('Connected by', addr)

            data = conn.recv(1024).decode('utf-8')
            
            if data=='Start':
                conn.sendall("Welcome to Text Measurement System!".encode('utf-8'))
                while True:
                    data = conn.recv(1024).decode('utf-8')

                    if not data:
                        print('End Connection')
                        break

                    result = worker(data).encode('utf-8')
                    
                    conn.sendall(result)

In [8]:
server(HOST, PORT)

Connected by ('127.0.0.1', 53116)
End Connection
