In [1]:
import pandas as pd
from ast import literal_eval
from collections import Counter
import os
import csv
import json

import random

import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('list_components.csv')
df['Components'] = df['Components'].apply(literal_eval)

In [3]:
temp = pd.DataFrame(df['Components'].values.tolist()).transpose()

In [4]:
temp = temp.replace(np.nan,'',regex=True)

In [5]:
X = temp.to_numpy()
X = np.array(X)

In [6]:
X

array([['AVATAR', 'TEXT', 'DIVIDER', ..., 'TEXT', 'IMAGE', 'BUTTON'],
       ['BUTTON', 'TEXT', 'SWITCH', ..., 'TEXT', '', 'TABBAR_MENU'],
       ['IMAGE', 'TEXT', 'SIDEBAR_MENU', ..., 'TEXT', '', 'TABBAR_MENU'],
       ...,
       ['', '', '', ..., '', '', ''],
       ['', '', '', ..., '', '', ''],
       ['', '', '', ..., '', '', '']], dtype=object)

In [7]:
classes = np.unique(X)
classes

array(['', 'AVATAR', 'BUTTON', 'CHECKBOX', 'COLLAPSED_SIDEBAR_MENU',
       'COLUMN_BAR_CHART', 'DIVIDER', 'DROPDOWN_BUTTON', 'HEADER_MENU',
       'ICON', 'IMAGE', 'RATING', 'RECTANGLE', 'SIDEBAR_MENU', 'SLIDER',
       'SWITCH', 'TABBAR_MENU', 'TAG', 'TEXT', 'TEXTAREA', 'TEXTBOX'],
      dtype=object)

In [8]:
result = np.zeros((len(X),len(classes)))

In [9]:
result

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [10]:
for i in range(len(X)):
    for j in range(len(X[i])): 
        temp = np.where(classes == X[i][j])    
        idx = temp[0][0]
        result[i][idx] = 1

In [11]:
result = pd.DataFrame(result,columns = classes)
result = result.drop(columns = [''])
result

Unnamed: 0,AVATAR,BUTTON,CHECKBOX,COLLAPSED_SIDEBAR_MENU,COLUMN_BAR_CHART,DIVIDER,DROPDOWN_BUTTON,HEADER_MENU,ICON,IMAGE,RATING,RECTANGLE,SIDEBAR_MENU,SLIDER,SWITCH,TABBAR_MENU,TAG,TEXT,TEXTAREA,TEXTBOX
0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
1,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0
2,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
3,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
4,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
65,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
67,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# Building the model
frq_items = apriori(result, min_support = 0.15, use_colnames = True)

# Collecting the inferred rules in a dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)

In [13]:
rules["antecedents"] = rules["antecedents"].apply(lambda x: list(tuple(x)))
rules["consequents"] = rules["consequents"].apply(lambda x: list(tuple(x)))
rules['merged']= rules["antecedents"] + rules["consequents"]

In [14]:
column_patterns = []
idx = []
for i in range(len(rules['merged'])):
    list_patterns = []
    for j in range(len(df['Components'])):
        if(set(rules['merged'][i]).issubset(set(df['Components'][j]))):
            list_patterns.append(df['Id'][j])
    if(len(list_patterns)==0):
        idx.append(i)
    column_patterns.append(list_patterns)

In [15]:
rules['Patterns'] = pd.Series(column_patterns)
#rules["antecedents"] = rules["antecedents"].apply(lambda x: ','.join(x))

In [16]:
#remove empty value in Patterns column
rules = rules.drop(index = idx).reset_index().drop(columns = 'index')

In [17]:
#combine output patterns
ipts = rules['antecedents'].value_counts().keys()
rcmd = []
for ipt in ipts:
    mask = rules['antecedents'].apply(lambda x: ipt == x)
    temp = rules[mask]
    s=[]
    for v in temp['Patterns'].values:
        s += v
    rcmd.append(list(set(s)))

In [18]:
d = {'Inputs': ipts, 'Recommend': rcmd}
recommendation = pd.DataFrame(d)

In [19]:
rules.to_csv('rules.csv',index = False)
recommendation.to_csv('recommendation.csv',index = False)