In [1]:
import json
import pandas as pd
import numpy as np
import ast
import re
import math
import random

import statsmodels.api as sm
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

### Tiền xử lý source pattern

In [2]:
def parse_children(list_children,x_pattern,y_pattern,result):
    for i_child in list_children:
        type_child = i_child["type"]
        try:
            size_child = i_child["data"]["size"]
        except:
            size_child = None
        if type_child == "DIVIDER":
            sub_x = abs(i_child['data']['to']['x'] - i_child['data']['from']['x'])
            sub_y = abs(i_child['data']['to']['y'] - i_child['data']['from']['y'])
            if sub_x <= 1:
                width_child = 1
                height_child = sub_y
            if sub_y <= 1:
                width_child = sub_x
                height_child = 1
        else:
            try:
                width_child = i_child["data"]["width"]
                if width_child == None:
                    width_child = 0
            except:
                width_child = 0
            try:
                height_child = i_child["data"]["height"]
                if height_child == None:
                    height_child = 0
            except:
                height_child = 0
        x_child = i_child["data"]["position"]["x"] + x_pattern
        y_child = i_child["data"]["position"]["y"] + y_pattern
        if type_child == "CONTAINER":
            children = i_child["children"]
            result = parse_children(children,x_child,y_child,result)
        else:
            if 'text' in i_child["data"]:
                result.append({
                    "type": type_child,
                    "text": i_child["data"]['text'],
                    "width": width_child,
                    "height": height_child,
                    "x": x_child,
                    "y": y_child,
                    "size": size_child
                })
            else:
                result.append({
                    "type": type_child,
                    "width": width_child,
                    "height": height_child,
                    "x": x_child,
                    "y": y_child,
                    "size": size_child
                })
    return result

def resize_component_pattern(data):
    list_pattern = []
    for i_pattern in data:
        name_pattern = i_pattern["name"]
        id_pattern = i_pattern['id']
        data_pattern = i_pattern["data"]
        width_pattern = data_pattern["data"]["width"]
        height_pattern = data_pattern["data"]["height"]
        x_pattern = data_pattern["data"]["position"]["x"]
        y_pattern = data_pattern["data"]["position"]["y"]
        list_children = data_pattern["children"]
        children_info = []
        children_info = parse_children(list_children,x_pattern,y_pattern,children_info)
        list_pattern.append({
            "name_pattern": name_pattern,
            'id_pattern': id_pattern,
            "width_pattern": width_pattern,
            "height_pattern": height_pattern,
            "x_pattern": x_pattern,
            "y_pattern": y_pattern,
            "info_children": children_info
        })
    return list_pattern


def translate_component(file_json):
    # Chuyển đổi tọa độ của các components trong pattern
    f = open(file_json)
    data_json = json.load(f)
    json_data = resize_component_pattern(data_json)
    #Lay toa do tuyet doi cua component
    for i in range(len(json_data)):
        a = 0 - json_data[i]['x_pattern']
        b = 0 - json_data[i]['y_pattern']
        json_data[i]['x_pattern'] = 0
        json_data[i]['y_pattern'] = 0
        for j in range(len(json_data[i]['info_children'])):
            json_data[i]['info_children'][j]['x'] = a + json_data[i]['info_children'][j]['x']
            json_data[i]['info_children'][j]['y'] = b + json_data[i]['info_children'][j]['y']
    
    #Tinh x_center y_center      
    for i in range(len(json_data)):
        for j in range(len(json_data[i]['info_children'])):
            json_data[i]['info_children'][j]['x'] = (json_data[i]['info_children'][j]['x'] + json_data[i]['info_children'][j]['width']) / 2
            json_data[i]['info_children'][j]['y'] = (json_data[i]['info_children'][j]['y'] + json_data[i]['info_children'][j]['height']) / 2
            
    # export json
    with open('position_center.json', 'w') as f:
        json.dump(json_data, f)

### Thực hiện mô hình recommend pattern

In [3]:
def get_classes(file_json):
    #Các loại components phân biệt trong toàn tập dữ liệu
    f = open(file_json)
    data_json = json.load(f)
    list_components = []
    list_components_text = []
    for i in range(len(data_json)):
        type_component = []
        type_component_text = []
        for j in range(len(data_json[i]['info_children'])):
            type_component.append(data_json[i]['info_children'][j]['type'])
            if('text' in data_json[i]['info_children'][j]):
                type_component_text.append(data_json[i]['info_children'][j]['type'])
        list_components.append(type_component)
        list_components_text.append(type_component_text)
        
    temp1 = pd.DataFrame(list_components) 
    temp1 = temp1.replace(np.nan,'',regex=True)
    temp1 = temp1.to_numpy()
    temp1 = np.array(temp1)
    classes = np.unique(temp1)
    classes = classes[1:]
    
    temp2 = pd.DataFrame(list_components_text) 
    temp2 = temp2.replace(np.nan,'',regex=True)
    temp2 = temp2.to_numpy()
    temp1 = np.array(temp2)
    classes_text = np.unique(temp2)
    classes_text = classes_text[1:]
    return classes,classes_text

In [4]:
def cal_distance_angle(file_json,classes):
    # Chọn gốc trái trên bên trái là gốc tọa độ
    #Tính khoảng cách và góc giữa component và gốc tọa độ
    distance = []
    angle = []
    f = open(file_json)
    input_file = json.load(f)
    for i in range(len(input_file)):
        d_pattern = []
        a_pattern = []
        for class_i in classes:
            class_distance=[]
            class_angle = []
            for j in range(len(input_file[i]['info_children'])):
                if class_i == input_file[i]['info_children'][j]['type']:
                    temp_distance = math.sqrt(float(input_file[i]['info_children'][j]['x'])**2 +\
                                              float(input_file[i]['info_children'][j]['y'])**2)
                    try:
                        temp_angle = float(input_file[i]['info_children'][j]['x']) / temp_distance
                    except:
                        temp_angle = 1
                    
                    class_distance.append(temp_distance)
                    class_angle.append(temp_angle)
                    
            class_distance.sort(reverse=False)
            class_angle.sort(reverse=True)
            
            d_pattern.append(class_distance)
            a_pattern.append(class_angle)
            
        distance.append(d_pattern)
        angle.append(a_pattern)
    return distance,angle

In [5]:
def cal_distance_angle_input(list_input,classes):
    distance = []
    angle = []
    for class_i in classes:
        class_distance=[]
        class_angle = []
        for j in range(len(list_input)):
            if class_i == list_input[j]['type']:
                temp_distance = math.sqrt(float(list_input[j]['x'])**2 + float(list_input[j]['y'])**2)
                try:
                    temp_angle = float(list_input[j]['x']) / temp_distance
                except:
                    temp_angle = 1
                
                class_distance.append(temp_distance)
                class_angle.append(temp_angle)
                
        class_distance.sort(reverse=False)
        class_angle.sort(reverse=True)
            
        distance.append(class_distance)
        angle.append(class_angle)
    return distance,angle

In [6]:
def pad_or_truncate(some_list, target_len,symbol):
    return some_list[:target_len] + [symbol]*(target_len - len(some_list))

In [7]:
def get_max_range_classes(classes,feature):
    range_classes = []
    for i in range(len(classes)):
        max_classes = 0
        for j in range(len(feature)):
            if max_classes < len(feature[j][i]):
                max_classes = len(feature[j][i])
        range_classes.append(max_classes)
    return range_classes

In [8]:
def fill_with_symbol(classes,range_classes,feature,symbol):
    for i in range(len(range_classes)):
        for j in range(len(feature)):
            feature[j][i] = pad_or_truncate(feature[j][i], range_classes[i],symbol)
    return feature

In [9]:
def fill_with_symbol_input(classes,range_classes,feature_input,symbol):
    for i in range(len(range_classes)):
        for j in range(len(feature_input)):
            feature_input[i] = pad_or_truncate(feature_input[i], range_classes[i],symbol)
    return feature_input

In [10]:
def merge_sublist(feature):
    total = []
    for pattern in feature:
        temp = []
        for classes in pattern:
            temp += classes
        total.append(temp)
    return np.array(total)

In [11]:
def merge_sublist_input(feature_input):
    total = []
    for classes in feature_input:
        total += classes
    return np.array(total)

In [12]:
def norm_list(l):
    xmin = min(l) 
    xmax = max(l)
    for i, x in enumerate(l):
        l[i] = (x-xmin) / (xmax-xmin)
    return l

In [13]:
def norm_distance(d_array):
    for i in range(len(d_array)):
        d_array[i] = norm_list(d_array[i])
    return d_array

In [14]:
def cal_distance_angle_pattern_vector(file_pattern):
    distance,angle = cal_distance_angle(file_pattern,classes)
    
    range_classes = get_max_range_classes(classes,distance)
    
    distance = fill_with_symbol(classes,range_classes,distance,0)
    angle = fill_with_symbol(classes,range_classes,angle,0)
    
    distance = merge_sublist(distance)
    angle = merge_sublist(angle)
    distance = norm_distance(distance)
    distance_angle = np.concatenate((distance, angle), axis=1)
    return distance_angle,range_classes

In [15]:
def cos_sim(a, b):
    """Takes 2 vectors a, b and returns the cosine similarity 
    """
    dot_product = np.dot(a, b) # x.y
    norm_a = np.linalg.norm(a) #|x|
    norm_b = np.linalg.norm(b) #|y|
    return dot_product / (norm_a * norm_b)

In [16]:
def cal_distance_angle_similarity(vector_input,vector_pattern):
    #Tính độ tương tự (similarity) giữa input và từng pattern trong tập dữ liệu
    similarity_list = []
    for pattern in vector_pattern:
        similarity_list.append(cos_sim(vector_input, pattern))
    return similarity_list

In [17]:
def get_header(classes,range_classes):
    header = []
    for i in range(len(classes)):
        for j in range(range_classes[i]):
            header.append(classes[i] + str(j))
    return header

In [18]:
def cal_distance_angle_input_vector(list_input):
    distance_input,angle_input = cal_distance_angle_input(list_input,classes)
    distance_input = fill_with_symbol_input(classes,range_classes,distance_input,0)
    angle_input = fill_with_symbol_input(classes,range_classes,angle_input,0)
    
    #input
    distance_input = merge_sublist_input(distance_input)
    angle_input = merge_sublist_input(angle_input)
    
    distance_input = distance_input.T
    angle_input = angle_input.T
    
    distance_input = norm_list(distance_input)
    
    distance_angle_input = np.concatenate((distance_input, angle_input))
    return distance_angle_input

In [19]:
def text_component_pattern(file_json,classes_text):
    list_text_component_pattern = []
    f = open(file_json)
    input_file = json.load(f)
    for i in range(len(input_file)):
        text_pattern = []
        for class_i in classes_text:
            text_component=[]
            for j in range(len(input_file[i]['info_children'])):
                if class_i == input_file[i]['info_children'][j]['type']:
                    text_component.append(input_file[i]['info_children'][j]['text'])
                    
            text_pattern.append(text_component)
            
        list_text_component_pattern.append(text_pattern)
    return list_text_component_pattern

In [20]:
def text_pattern_vector(file_json):
    list_text_component_pattern = text_component_pattern(file_json,classes_text)
        
    range_classes_text = get_max_range_classes(classes_text,list_text_component_pattern)
    
    list_text_component_pattern = fill_with_symbol(classes_text,range_classes_text,list_text_component_pattern,'')
    
    list_text_component_pattern = merge_sublist(list_text_component_pattern )
    return list_text_component_pattern,range_classes_text

In [21]:
def text_component_input(list_input,classes_text):
    list_text_component_input = []
    for class_i in classes_text:
        text_component=[]
        for j in range(len(list_input)):
            if class_i == list_input[j]['type']:                
                text_component.append(list_input[j]['text'])
            
        list_text_component_input.append(text_component)
    return list_text_component_input

In [22]:
def text_input_vector(list_input):
    list_text_component_input = text_component_input(list_input,classes_text)
    list_text_component_input = fill_with_symbol_input(classes_text,range_classes_text,list_text_component_input,'')
    
    #input
    list_text_component_input = merge_sublist_input(list_text_component_input)
    
    list_text_component_input = list_text_component_input.T
    return list_text_component_input

In [23]:
from difflib import SequenceMatcher

def similar(a,b):
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

In [24]:
def cal_text_similarity(list_text_component_pattern,list_text_component_input):
    text_similarity = []
    for pattern_i in range(len(list_text_component_pattern)):
        len_input = 0
        total_similar = 0
        for range_i in range_classes_text:
            s_max = 0
            for i in range(range_i):
                if list_text_component_input[i + len_input] == '':
                    continue
                
                for j in range(range_i):
                    if(str(list_text_component_pattern[pattern_i][j + len_input]) == ''):
                        continue
                    else:
                        s = similar(str(list_text_component_input[i + len_input]),str(list_text_component_pattern[pattern_i][j + len_input]))
                    if s > s_max:
                        s_max = s
            len_input += range_i-1
            total_similar += s_max
        text_similarity.append(total_similar)
    return text_similarity

In [25]:
def get_list_field(file_json,field):
    l = []
    f = open(file_json)
    input_file = json.load(f)
    for i in range(len(input_file)):
        l.append(input_file[i][field])
    return l

In [26]:
id_pattern_list = get_list_field('position_center.json','id_pattern')
name_pattern_list = get_list_field('position_center.json','name_pattern')

classes,classes_text = get_classes('position_center.json')
#pattern
distance_angle_pattern,range_classes = cal_distance_angle_pattern_vector('position_center.json')
list_text_component_pattern,range_classes_text = text_pattern_vector('position_center.json')
header = get_header(classes,range_classes)
df = pd.DataFrame (distance_angle_pattern, columns = header+header)
df.to_csv('pattern_vector.csv',index = False)

### Test trên Visily

In [27]:
def parse_children(list_children,x_pattern,y_pattern,result):
    # result_children = []
    for i_child in list_children:
        type_child = i_child["type"]
        try:
            size_child = i_child["data"]["size"]
        except:
            size_child = None
        if type_child == "DIVIDER":
            sub_x = abs(i_child['data']['to']['x'] - i_child['data']['from']['x'])
            sub_y = abs(i_child['data']['to']['y'] - i_child['data']['from']['y'])
            if sub_x <= 1:
                width_child = 1
                height_child = sub_y
            if sub_y <= 1:
                width_child = sub_x
                height_child = 1
        else:
            try:
                width_child = i_child["data"]["width"]
                if width_child == None:
                    width_child = 0
            except:
                width_child = 0
            try:
                height_child = i_child["data"]["height"]
                if height_child == None:
                    height_child = 0
            except:
                height_child = 0
        x_child = i_child["data"]["position"]["x"] + x_pattern
        y_child = i_child["data"]["position"]["y"] + y_pattern
        if type_child == "CONTAINER":
            children = i_child["children"]
            result = parse_children(children,x_child,y_child,result)
        else:
            #x_c = x_child + width_child/2
            #y_c = y_child - height_child/2
            #result.append(f"{type_child}({x_c},{y_c})")
            if 'text' in i_child["data"]:
                result.append({
                    "type": type_child,
                    "text": i_child["data"]['text'],
                    "width": width_child,
                    "height": height_child,
                    "x": x_child,
                    "y": y_child,
                    "size": size_child
                })
            else:
                result.append({
                    "type": type_child,
                    "width": width_child,
                    "height": height_child,
                    "x": x_child,
                    "y": y_child,
                    "size": size_child
                })
    return result

def resize_component_pattern(data):
    list_pattern = []
    for i_pattern in data:
        id_pattern = i_pattern['data']['patternId']
        width_pattern = i_pattern["data"]["width"]
        height_pattern = i_pattern["data"]["height"]
        x_pattern = i_pattern["data"]["position"]["x"]
        y_pattern = i_pattern["data"]["position"]["y"]
        list_children = i_pattern["children"]
        children_info = []
        children_info = parse_children(list_children,x_pattern,y_pattern,children_info)
        # print(children_info)
        list_pattern.append({
            'id_pattern': id_pattern,
            "width_pattern": width_pattern,
            "height_pattern": height_pattern,
            "x_pattern": x_pattern,
            "y_pattern": y_pattern,
            "info_children": children_info
        })
        # break
        # print(list_pattern)
    return list_pattern


def translate_component(data_json):
    json_data = resize_component_pattern(data_json)
    #Lay toa do tuyet doi cua component
    for i in range(len(json_data)):
        a = 0 - json_data[i]['x_pattern']
        b = 0 - json_data[i]['y_pattern']
        json_data[i]['x_pattern'] = 0
        json_data[i]['y_pattern'] = 0
        for j in range(len(json_data[i]['info_children'])):
            json_data[i]['info_children'][j]['x'] = a + json_data[i]['info_children'][j]['x']
            json_data[i]['info_children'][j]['y'] = b + json_data[i]['info_children'][j]['y']
    
    #Tinh x_center y_center      
    for i in range(len(json_data)):
        for j in range(len(json_data[i]['info_children'])):
            json_data[i]['info_children'][j]['x'] = (json_data[i]['info_children'][j]['x'] + json_data[i]['info_children'][j]['width']) / 2
            json_data[i]['info_children'][j]['y'] = (json_data[i]['info_children'][j]['y'] + json_data[i]['info_children'][j]['height']) / 2
        
    return json_data

In [28]:
def get_input_demo_and_recommend(file_name):   
    f = open(file_name + '.json')
    data_json = json.load(f)
    list_patterns = translate_component(data_json)
    id_pattern = list_patterns[0]['id_pattern']
    list_input = list_patterns[0]['info_children']
    
    #distance_angle
    distance_angle_input = cal_distance_angle_input_vector(list_input)
    distance_angle_si = cal_distance_angle_similarity(distance_angle_input,distance_angle_pattern)
    #text
    list_text_component_input = text_input_vector(list_input)
    text_si = cal_text_similarity(list_text_component_pattern,list_text_component_input)
    #total
    si = np.array(distance_angle_si) + np.array(text_si)
    data = {'distance_angle_similarity':distance_angle_si,'text_si':text_si,'similarity':si,'Id_recommend':id_pattern_list,'Name_recommend':name_pattern_list,}

    df = pd.DataFrame(data)
    df = df.sort_values(by = 'similarity',ascending=False)
    df = df.head(5)
    df['Id'] = id_pattern
    df.to_csv(file_name + '.csv',index = False)

In [29]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

def visualize_input(file_name,list_input):
    im = Image.open('frame.png')
    fig, ax = plt.subplots()
    ax.imshow(im)
    
    for item in list_input:
        type_child = item["type"]
        width_child = item["width"]
        height_child = item["height"]
        x_child = item["x"]
        y_child = item["y"]
        if item["cluster"] ==0:
            rect_child = patches.Rectangle(
                (x_child, y_child), width_child,height_child, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect_child)
            rx, ry = rect_child.get_xy()
            cx = rx + rect_child.get_width()/2.0
            cy = ry + rect_child.get_height()/2.0
            ax.annotate(type_child, (cx, cy), color='r',
                    weight='bold', fontsize=3, ha='center', va='center')
        elif item["cluster"] == 1:
            rect_child = patches.Rectangle(
                (x_child, y_child), width_child,height_child, linewidth=1, edgecolor='blue', facecolor='none')
            ax.add_patch(rect_child)
            rx, ry = rect_child.get_xy()
            cx = rx + rect_child.get_width()/2.0
            cy = ry + rect_child.get_height()/2.0
            ax.annotate(type_child, (cx, cy), color='blue',
                    weight='bold', fontsize=3, ha='center', va='center')
        elif item["cluster"] == 2:
            rect_child = patches.Rectangle(
                (x_child, y_child), width_child,height_child, linewidth=1, edgecolor='green', facecolor='none')
            ax.add_patch(rect_child)
            rx, ry = rect_child.get_xy()
            cx = rx + rect_child.get_width()/2.0
            cy = ry + rect_child.get_height()/2.0
            ax.annotate(type_child, (cx, cy), color='green',
                    weight='bold', fontsize=3, ha='center', va='center')
        elif item["cluster"] == 3:
            rect_child = patches.Rectangle(
                (x_child, y_child), width_child,height_child, linewidth=1, edgecolor='yellow', facecolor='none')
            ax.add_patch(rect_child)
            rx, ry = rect_child.get_xy()
            cx = rx + rect_child.get_width()/2.0
            cy = ry + rect_child.get_height()/2.0
            ax.annotate(type_child, (cx, cy), color='yellow',
                    weight='bold', fontsize=3, ha='center', va='center')
        elif item["cluster"] == 4:
            rect_child = patches.Rectangle(
                (x_child, y_child), width_child,height_child, linewidth=1, edgecolor='purple', facecolor='none')
            ax.add_patch(rect_child)
            rx, ry = rect_child.get_xy()
            cx = rx + rect_child.get_width()/2.0
            cy = ry + rect_child.get_height()/2.0
            ax.annotate(type_child, (cx, cy), color='purple',
                    weight='bold', fontsize=3, ha='center', va='center')
    #plt.show()
    plt.savefig(file_name + '_clustering', bbox_inches='tight')
    plt.close('all')

In [30]:
def export_json(file_name,list_id_recommend,list_name_recommend):   
    recommend_data = {'id': list_id_recommend,
             'name': list_name_recommend}
    
    df = pd.DataFrame(recommend_data, columns= ['id','name'])
    
    recommend_df = df.pivot_table(columns=['id','name'], aggfunc='size')
    recommend_df = recommend_df.sort_values(ascending = False)
    recommend_df = recommend_df[:5]
    recommend_df = recommend_df.to_frame().reset_index().drop(columns = [0])
    recommend_df.to_json(file_name + '_recommned_by_split_clusters.json', orient='index')

In [31]:
def get_input_demo_and_recommend_for_cluster(file_name):
    f = open(file_name + '.json')
    input_file = json.load(f)
    list_patterns = translate_component(input_file)
    list_input = []
    id_pattern = []
    for i in range(len(list_patterns)):
        list_input += list_patterns[i]['info_children']
        id_pattern.append(list_patterns[i]['id_pattern'])
    id_pattern = ' | '.join(id_pattern)
    
    X = []
    Y = []
    for i in range(len(list_input)):
        X.append(list_input[i]['x'])
        Y.append(list_input[i]['y'])
    data = list(zip(X,Y))
    
    range_n_clusters = [2, 3, 4, 5, 6]
    silhouette_max = 0
    num_cluster = 1 
    for n_clusters in range_n_clusters:
        clusterer = KMeans(n_clusters=n_clusters)
        try:
            cluster_labels = clusterer.fit_predict(data)
            silhouette_avg = silhouette_score(data, cluster_labels)
            if silhouette_max < silhouette_avg:
                silhouette_max = silhouette_avg
                num_cluster = n_clusters
        except:
            continue
    
    kmeans = KMeans(num_cluster)
    kmeans.fit(data)
    identified_clusters = kmeans.fit_predict(data)
    
    for i in range(len(list_input)):
        list_input[i]['cluster'] = identified_clusters[i]
        
    visualize_input(file_name,list_input)
    
    cluster_list = []
    for i in range(num_cluster):
        temp = []
        for j in range(len(list_input)):
            if list_input[j]['cluster'] == i:
                temp.append(list_input[j])
        cluster_list.append(temp)
    
    list_id_recommend = []
    list_name_recommend = []
    for i in range(len(cluster_list)) :   
        #distance_angle 
        distance_angle_input = cal_distance_angle_input_vector(cluster_list[i])
        distance_angle_si = cal_distance_angle_similarity(distance_angle_input,distance_angle_pattern)
        #text
        list_text_component_input = text_input_vector(cluster_list[i])
        text_si = cal_text_similarity(list_text_component_pattern,list_text_component_input)
        #total
        si = np.array(distance_angle_si) + np.array(text_si)
        data = {'distance_angle_similarity':distance_angle_si,'text_si':text_si,'similarity':si,'Id_recommend':id_pattern_list,'Name_recommend':name_pattern_list,}
        df = pd.DataFrame(data)
        df = df.sort_values(by = 'similarity',ascending=False)
        df = df.head(5)
        df['Id_input'] = id_pattern
        df = df.reset_index()
        df.to_csv(f'{file_name}_cluster{i}.csv',index = False)
        list_id_recommend += df['Id_recommend'].to_list()
        list_name_recommend += df['Name_recommend'].to_list()
    export_json(file_name,list_id_recommend,list_name_recommend)

In [32]:
file1 = open('list_file_name.txt', 'r')
Lines = file1.readlines()

for line in Lines:
    file_name = line.strip()
    get_input_demo_and_recommend(file_name)

In [33]:
file2 = open('list_file_name_for_cluster.txt','r')
Lines = file2.readlines()

for line in Lines:
    file_name = line.strip()
    get_input_demo_and_recommend_for_cluster(file_name)