In [132]:
#import basic libraries

import csv
import sys
import os
import math
import numpy as np
import numpy.linalg as npl
import scipy
from scipy import sparse
from scipy import linalg
import scipy.sparse.linalg as spla
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.metrics import f1_score
from sklearn.metrics import PrecisionRecallDisplay
from sklearn.metrics import (precision_recall_curve,
                             PrecisionRecallDisplay)
from sklearn import metrics
import matplotlib.ticker as mtick
import matplotlib as mpl


In [133]:
# update : (time, x, y, color)
update_info = dict()
with open(update_info_file, "r") as f:
    reader = csv.reader(f, delimiter = ',')
    next(reader)
    for line in reader:
        up = int(line[0])
        time = int(int(line[1]) / 1000)
        x = int(line[3])
        y = int(line[4])
        color = int(line[5])
        update_info[up] = (time, x, y, color)

In [134]:
# build features
succ_updates = "/home/yw180/place/data/merged_8/merge_user_emb/successful_1.csv"
failed_updates = "/home/yw180/place/data/merged_8/merge_user_emb/unsuccessful_1.csv"
succ_users = "/home/yw180/place/data/merged_8/merge_user_emb/successful_user_1.csv"
failed_users = "/home/yw180/place/data/merged_8/merge_user_emb/unsuccessful_user_1.csv"

In [135]:
# csv <#updates,#users,center_point_x, center_point_y, start_time,max_area,color_entropy, successful>
feature = list()

In [136]:
# updates
with open(succ_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        feature.append([len(line)])
with open(failed_updates, "r") as f:
    reader = csv.reader(f, delimiter =',')
    for line in reader:
        feature.append([len(line)])

In [137]:
# users
with open(succ_users, "r") as f:
    reader = csv.reader(f, delimiter=',')
    i = 0
    for line in reader:
        feature[i].append(len(line))
        i += 1

with open(failed_users, "r") as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        feature[i].append(len(line))
        i += 1

In [138]:
update_info_file = "/scratch/yw180/place/data/sorted_tile_placements_idx.csv"

In [139]:
# center x and y, start_time
with open(succ_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    i = 0
    for line in reader:
        center_x = 0
        center_y = 0
        start_time = 1491238734
        for up in line:
            up = int(up)
            tup = update_info[up]
            center_x += tup[1]
            center_y += tup[2]
            start_time = min(tup[0], start_time)
        
        feature[i].append(center_x / len(line))
        feature[i].append(center_y/len(line))
        feature[i].append(start_time)
        i += 1

with open(failed_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        center_x = 0
        center_y = 0
        start_time = 1491238734
        for up in line:
            up = int(up)
            tup = update_info[up]
            center_x += tup[1]
            center_y += tup[2]
            start_time = min(tup[0], start_time)
        
        feature[i].append(center_x / len(line))
        feature[i].append(center_y/len(line))
        feature[i].append(start_time)
        i += 1

In [140]:
def pixels_per_project(line, update_info):
    pos_set = set()
    for item in line:
        item = int(item)
        pos_set.add((update_info[item][1], update_info[item][2]))
    return len(pos_set)

In [141]:
# max area
with open(succ_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    i = 0
    for line in reader:
        max_area = pixels_per_project(line, update_info)
        feature[i].append(max_area)
        i += 1

with open(failed_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        max_area = pixels_per_project(line, update_info)
        feature[i].append(max_area)
        i += 1

In [142]:
def color_entropy_per_project(line, update_info):
    num_colors = 17
    color_count = np.zeros(num_colors)
    for item in line:
        item = int(item)
        color = update_info[item][3]
        color_count[color] += 1
    ent = 0.
    color_count = color_count / np.sum(color_count)
    for c in range(num_colors):
        p = color_count[c]
        if p > 0:
            ent = ent - p * np.log(p)
    return ent

In [143]:
# color entropy
with open(succ_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    i = 0
    for line in reader:
        ent = color_entropy_per_project(line, update_info)
        feature[i].append(ent)
        i += 1

with open(failed_updates, "r") as f:
    reader = csv.reader(f, delimiter=',')
    for line in reader:
        ent = color_entropy_per_project(line, update_info)
        feature[i].append(ent)
        i += 1

In [144]:
# succ and failed label
num_succ = 775
for i in range(0, len(feature)):
    if i < num_succ:
        feature[i].append(1)
    else:
        feature[i].append(0)

In [145]:
feature

[[289392,
  92202,
  525.9611115718471,
  209.03259246972965,
  1490979831,
  30442,
  1.3092230844150816,
  1],
 [277240,
  71043,
  932.65988674073,
  287.7213966238638,
  1490979658,
  44813,
  2.0415968131819158,
  1],
 [197475,
  25266,
  654.8123205469046,
  25.787689580959615,
  1490980257,
  28807,
  2.059055218141376,
  1],
 [369956,
  58110,
  678.9434338137509,
  84.65868373536313,
  1490980371,
  30158,
  1.7612334553498357,
  1],
 [359087,
  82879,
  336.6991509021491,
  832.7379604385567,
  1490980120,
  27308,
  1.848563851329611,
  1],
 [120242,
  40625,
  374.06109346151925,
  436.6044809633905,
  1490979690,
  17114,
  1.8898325839760395,
  1],
 [129193,
  38123,
  578.5215530253187,
  428.1212681801646,
  1490979781,
  28505,
  2.6215730838506306,
  1],
 [613320,
  122515,
  496.7025728820192,
  502.39224874453794,
  1490979957,
  18590,
  1.8656053966709785,
  1],
 [164116,
  49032,
  533.6712691023422,
  747.3219186429111,
  1490980174,
  21305,
  2.214163017861317

In [146]:
with open("/home/yw180/place/data/merged_8/merge_user_emb/feature_1.csv", "w") as file_out:
    writer = csv.writer(file_out, delimiter = ",")
    for line in feature:
        writer.writerow(line)