In [1]:
import pandas as pd
import socket
import numpy as np
import textdistance
import time

In [2]:
HOST = 'localhost'
PORT = 65534

In [33]:
def process(text):
    result = pd.DataFrame()
    
    
    metrics_dict = {'cosine':textdistance.cosine,
                    'hamming':textdistance.hamming,
                    'jaro-winkler':textdistance.jaro_winkler,
                    'damerau_levenshtein':textdistance.damerau_levenshtein}
    
    command = text.split('; ')[0]
    words = text.split('; ')[1].split(', ')
    
    if command=='calculate metrics':
        res_dict = {}
        for metric in metrics_dict.keys():
            res_dict['word_to_find_similar'] = words[0]
            res_dict['word_to_compare'] = words[1]
            res_dict[metric] = [metrics_dict[metric](words[0], words[1])]
        try:
            data = pd.read_csv('results_words.csv')
            result = data.append(pd.DataFrame(res_dict)).reset_index(drop=True)
        except:
            result = pd.DataFrame(res_dict)
    
        result = result.drop_duplicates(subset=['word_to_find_similar',
                                       'word_to_compare'], keep='first')
        
        result.to_csv('results_words.csv', index=False)
        
    elif command == 'find similars':
        try:
            data = pd.read_csv('results_words.csv')
            all_words = set(data['word_to_find_similar'])|set(data['word_to_compare'])

            for word in words:
                if word in data['word_to_find_similar'].values:
                    closest_word = data[data.word_to_find_similar == word].sort_values(list(metrics_dict.keys()),
                                                ascending = [True, False, True, False]).iloc[0]

                    res_dict = dict(zip(closest_word.index, closest_word.values))
                    result = result.append(pd.DataFrame.from_records([res_dict]))


                elif word in data['word_to_compare'].values:
                    closest_word = data[data.word_to_compare == word].sort_values(list(metrics_dict.keys()),
                                                ascending = [True, False, True, False]).iloc[0]

                    res_dict = dict(zip(closest_word.index, closest_word.values))
                    result = result.append(pd.DataFrame.from_records([res_dict]))

                else:
                    res_dict = {'word_to_find_similar':word,
                                   'word_to_compare':list(all_words)}

                    for key in metrics_dict.keys():
                        res_dict[key] = list(map(lambda x: metrics_dict[key](word, x),
                                                    list(all_words)))

                    df = pd.DataFrame(res_dict, index=[i for i in range(len(res_dict['word_to_compare']))]
                                     ).sort_values(list(metrics_dict.keys()),
                                                ascending = [True, False, True, False]).iloc[0]

                    result = result.append(df)
                
                result = result.drop_duplicates(subset=['word_to_find_similar',
                                       'word_to_compare'], keep='first')
                
        except:
            return "No data stored yet!"
    #return str(dict(zip(result.columns, [result.values[:, i].tolist() for i in range(len(result.columns))])))
    #return tabletext.to_text(result.values)
    return tabulate.tabulate(result.values, result.columns, tablefmt="pipe")

In [34]:
print(process('find similars; scorpions, malyarchuk'))

|   cosine |   damerau_levenshtein |   hamming |   jaro-winkler | word_to_compare   | word_to_find_similar   |
|---------:|----------------------:|----------:|---------------:|:------------------|:-----------------------|
| 0.333333 |                     7 |         9 |       0.407407 | Jaro              | scorpions              |
| 0.782624 |                     4 |        10 |       0.786905 | klymchuk          | malyarchuk             |


In [35]:
def show():
    try:
        data = pd.read_csv('results_words.csv')
        #return str(dict(zip(data.columns, [data.values[:, i].tolist() for i in range(len(data.columns))])))
        #return tabletext.to_text(data.values)
        return tabulate.tabulate(data.values, data.columns, tablefmt="pipe")
    except:
        return "No data stored yet!"

In [36]:
def worker(msg):
    if msg=='print':
        return show()
    elif msg=='bye':
        return 'Good Bye!'
    else:
        return process(msg)

In [37]:
def server(host, port):
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind((host, port))
        s.listen()
        conn, addr = s.accept()
        with conn:
            print('Connected by', addr)

            data = conn.recv(1024).decode('utf-8')
            
            if data=='Start':
                conn.sendall("Welcome to Text Measurement System!".encode('utf-8'))
                while True:
                    data = conn.recv(1024).decode('utf-8')

                    if not data:
                        print('End Connection')
                        break

                    result = worker(data).encode('utf-8')
                    
                    conn.sendall(result)

In [38]:
server(HOST, PORT)

Connected by ('127.0.0.1', 35576)
End Connection


In [5]:
server(HOST, PORT)

Connected by ('127.0.0.1', 54650)
calculate metrics; Hello, hello
End Connection


In [71]:
df

Unnamed: 0,a,b
0,1,4
1,2,5
2,3,6


In [77]:
df.values

array([[1, 4],
       [2, 5],
       [3, 6]])

In [5]:
import tabletext

In [7]:
df = pd.DataFrame({'a':[1,2,3,4],
                  'b':[1,2,5,6]})

In [8]:
print(tabletext.to_text(df.values))

┌───┬───┐
│ 1 │ 1 │
├───┼───┤
│ 2 │ 2 │
├───┼───┤
│ 3 │ 5 │
├───┼───┤
│ 4 │ 6 │
└───┴───┘


In [9]:
def f():
    return (tabletext.to_text(df.values))

In [13]:
f().encode('utf-8')

b'\xe2\x94\x8c\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xac\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x90\n\xe2\x94\x82 1 \xe2\x94\x82 1 \xe2\x94\x82\n\xe2\x94\x9c\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xbc\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xa4\n\xe2\x94\x82 2 \xe2\x94\x82 2 \xe2\x94\x82\n\xe2\x94\x9c\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xbc\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xa4\n\xe2\x94\x82 3 \xe2\x94\x82 5 \xe2\x94\x82\n\xe2\x94\x9c\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xbc\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xa4\n\xe2\x94\x82 4 \xe2\x94\x82 6 \xe2\x94\x82\n\xe2\x94\x94\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\xb4\xe2\x94\x80\xe2\x94\x80\xe2\x94\x80\xe2\x94\x98'

In [23]:
import tabulate

In [30]:
print(tabulate.tabulate(df.values, df.columns, tablefmt="pipe"))

|   a |   b |
|----:|----:|
|   1 |   1 |
|   2 |   2 |
|   3 |   5 |
|   4 |   6 |
