In [1]:
import re
import numpy as np
import json
import pandas as pd
import ast

In [2]:
def vector(component_1,component_2):
    a = re.split('\(|\)|,',component_1)
    b = re.split('\(|\)|,',component_2)
    v_x = float(a[1]) - float(b[1])
    v_y = float(a[2]) - float(b[2])
    return np.array([v_x,v_y])

In [3]:
def cos_sim(a, b):
    """Takes 2 vectors a, b and returns the cosine similarity 
    """
    dot_product = np.dot(a, b) # x.y
    norm_a = np.linalg.norm(a) #|x|
    norm_b = np.linalg.norm(b) #|y|
    return dot_product / (norm_a * norm_b)

In [4]:
def get_tail_list(input_comp1,pattern):
    tail = []
    a = re.split('\(|\)|,',input_comp1)
    for i in range(len(pattern)):
        if (a[0] in pattern[i]):
            tail.append(pattern[i])
    return tail

In [5]:
def get_head_list(input_comp2,pattern):
    head = []
    b = re.split('\(|\)|,',input_comp2)
    for i in range(len(pattern)):
        if (b[0] in pattern[i]):
            head.append(pattern[i])
    return head

In [6]:
def similarity_of_two_components(input_comp1,input_comp2,pattern):
    input_vector = vector(input_comp1,input_comp2)
    tail = get_tail_list(input_comp1,pattern)
    head = get_head_list(input_comp2,pattern)
    similarity = -1
    for i in range(len(tail)):
        for j in range(len(head)):
            temp = vector(tail[i],head[j])
            if similarity < cos_sim(input_vector, temp):
                similarity = cos_sim(input_vector, temp)
    return similarity

In [7]:
def similarity(input_comps,pattern):
    s = 0
    for i in range(len(input_comps)-1):
        s += similarity_of_two_components(input_comps[i],input_comps[i+1],pattern)
    return s

In [8]:
input_comps = ['BUTTON(59,67)','TEXT(32,47)']
p =['AVATAR(164.0,83.0)',
'BUTTON(716,47)',
'IMAGE(16.0,16.0)',
'BUTTON(638.0,47.0)',
'AVATAR(146.0,83.0)',
'AVATAR(128.0,83.0)',
'TEXT(128.0,54.0)',
'TEXT(196.0,85.0)',
'TEXT(128.0,21.0)']

In [9]:
similarity(input_comps,p)

0.8328205790756609

In [10]:
f = open('100000.json')
input_file = json.load(f)

In [11]:
def get_input_components(file_json):
    list_input = []
    f = open(file_json)
    input_file = json.load(f)
    for i in range(len(input_file)):
        temp = input_file[i]['type'] +\
        '(' + str(input_file[i]['data']['position']['x']) +\
        ',' + str(input_file[i]['data']['position']['y']) + ')'
        list_input.append(temp)
    return list_input

In [12]:
list_input = get_input_components('100000.json')

In [13]:
list_input

['BUTTON(820,92)', 'TEXTBOX(443,92)']

In [14]:
def find_patterns(list_input):
    components = []
    list_patterns = []
    for i in list_input:
        components.append(re.split('\(|\)|,',i)[0])
    df = pd.read_csv('recommendation.csv')
    df['Inputs'] = df['Inputs'].apply(lambda x:ast.literal_eval(x))
    mask = df['Inputs'].apply(lambda x: set(components) == set(x))
    list_patterns = ast.literal_eval(df[mask]['Recommend'].values[0])
    return list_patterns

In [15]:
f = open('patterns_with_position.json')
source = json.load(f)
list_patterns = find_patterns(list_input)

In [16]:
list_patterns

[2722,
 2723,
 319395,
 2725,
 2726,
 2724,
 319595,
 2098731,
 319468,
 319373,
 319570,
 2098714,
 2288601,
 48666,
 473596]

In [17]:
simi_list = []
for pattern in list_patterns: 
    for i in source:
        if(i['id'] == str(pattern)):
            print('recommended pattern',pattern)
            print('source',i['id'],i['children'])
            print('similarity ',similarity(list_input,i['children']))
            simi_list.append(similarity(list_input,i['children']))

recommended pattern 2722
source 2722 ['BUTTON(28,130)', 'TEXTBOX(294,272)', 'TEXT(28,24)', 'DIVIDER(28,241)', 'BUTTON(28,86)', 'TEXTBOX(28,272)', 'TEXTBOX(28,352)', 'TEXT(274,230)', 'TEXTBOX(28,432)', 'BUTTON(28,554)', 'DIVIDER(306,241)', 'BUTTON(28,174)', 'CHECKBOX(28,513)', 'TEXT(187,618)', 'TEXT(46,512)', 'TEXT(335,618)']
similarity  0.0
recommended pattern 2723
source 2723 ['TEXT(30,23)', 'TEXT(31,80)', 'BUTTON(677,92)', 'TEXT(30,110)', 'BUTTON(677,310)', 'BUTTON(677,526)', 'CHECKBOX(663,645)', 'DIVIDER(30,168)', 'BUTTON(677,216)', 'DIVIDER(30,277)', 'DIVIDER(30,386)', 'DIVIDER(30,495)', 'DIVIDER(30,603)', 'TEXT(30,186)', 'TEXT(30,216)', 'TEXT(30,293)', 'TEXT(30,326)', 'TEXT(30,411)', 'TEXT(30,439)', 'TEXT(30,512)', 'TEXT(30,540)', 'TEXT(30,622)', 'TEXT(30,652)', 'TEXTBOX(618,423)']
similarity  0.4970459221108368
recommended pattern 319395
source 319395 ['BUTTON(24,86)', 'BUTTON(24,142)', 'BUTTON(24,198)', 'TEXTBOX(24,304)', 'TEXTBOX(24,384)', 'BUTTON(24,506)', 'TEXT(24,24)', 'DIVI

In [18]:
df = pd.DataFrame(zip(list_patterns,simi_list),
                  columns=['Pattern', 'Similarity'])
df

Unnamed: 0,Pattern,Similarity
0,2722,0.0
1,2723,0.497046
2,319395,0.0
3,2725,0.955081
4,2726,1.0
5,2724,0.818245
6,319595,1.0
7,2098731,0.713471
8,319468,1.0
9,319373,0.808939
