In [19]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from snorkel.learning import GenerativeModel
from scipy import sparse
import matplotlib.pyplot as plt
from functools import partial
from sklearn.metrics import f1_score
from scipy.optimize import minimize
from snorkel.learning.structure import DependencySelector


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# Generate Primitives
def has_bike(object_names):
    if ('cycle' in object_names) or ('bike' in object_names) or ('bicycle' in object_names):
        return 1
    else:
        return 0
def has_human(object_names):
    if (('person' in object_names) or ('woman' in object_names) or ('man' in object_names)) \
        and (('bicycle' in object_names) or 'bicycles' in object_names):
        return 1
    else:
        return 0
def has_road(object_names):
    if ('road' in object_names) or ('street' in object_names) or ('concrete' in object_names):
        return 1
    else:
        return 0
def has_cars(object_names):
    if ('car' in object_names) or ('cars' in object_names) or \
        ('bus' in object_names) or ('buses' in object_names) or \
        ('truck' in object_names) or ('trucks' in object_names):
        return 1
    else:
        return 0

In [21]:
from primitive_helpers import bike_human_distance, bike_human_size, bike_human_nums
from data_loader import DataLoader
loader = DataLoader()

def create_primitives(loader):
    m = 7 # number of primitives
    primitive_mtx = np.zeros((loader.train_num,m))

    for i in range(loader.train_num):
        primitive_mtx[i,0] = has_human(loader.train_object_names[i])
        primitive_mtx[i,1] = has_road(loader.train_object_names[i])
        primitive_mtx[i,2] = has_cars(loader.train_object_names[i])
        primitive_mtx[i,3] = has_bike(loader.train_object_names[i])

        primitive_mtx[i,4] = bike_human_distance(loader.train_object_names[i], 
                                                 loader.train_object_x[i], 
                                                 loader.train_object_y[i])

        area = np.multiply(loader.train_object_height[i], loader.train_object_width[i])
        primitive_mtx[i,5] = bike_human_size(loader.train_object_names[i], area)
        primitive_mtx[i,6] = bike_human_nums(loader.train_object_names[i])

    return primitive_mtx
primitive_mtx = create_primitives(loader)

p_keys = {
    'has_human': primitive_mtx[:,0],
    'has_road': primitive_mtx[:, 1],
    'has_cars': primitive_mtx[:, 2],
    'has_bike': primitive_mtx[:, 3],
    'bike_human_distance': primitive_mtx[:, 4],
    'bike_human_size': primitive_mtx[:, 5],
    'bike_human_num': primitive_mtx[:, 6]
   }
pos = list(np.where(loader.train_ground>0)[0])
neg = list(np.where(loader.train_ground<0)[0])[-len(pos):]
chosen_data = pos + neg
print(len(chosen_data))
loader.train_ground = loader.train_ground[chosen_data]
loader.train_num = len(chosen_data)

540


In [40]:
# extract X and Y 
X = np.zeros((len(chosen_data), 512))
from img_to_vec import Img2Vec
import skimage.io as io
from PIL import Image
img2vec = Img2Vec()
print()
for i in range(len(chosen_data)):
    j = chosen_data[i]
    img = io.imread(loader.data[int(loader.train_vg_idx[j])]['url'])
    img = Image.fromarray(img)
    X[i,] = img2vec.get_vec(img)
Y = loader.train_ground




In [64]:
print(X.shape, Y.shape)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split 
X_train, X_test, Y_train, Y_test, chosen_train, chosen_test = train_test_split(X, Y, chosen_data, test_size=0.3, random_state=42)
def metric(labels):
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, labels)
    labels = clf.predict(X_test)
    gen = np.mean(labels == Y_test)
    f1_gen = f1_score(Y_test, labels)
    return gen, f1_gen 
print(metric(Y_train), chosen_train)

(540, 512) (540,)
(0.62962962962962965, 0.61538461538461531) [678, 813, 662, 172, 607, 70, 47, 789, 208, 757, 824, 867, 769, 295, 614, 554, 66, 891, 547, 825, 9, 369, 134, 760, 571, 45, 143, 697, 5, 724, 738, 201, 875, 339, 636, 551, 82, 887, 783, 74, 19, 605, 848, 335, 110, 519, 869, 371, 356, 546, 675, 574, 745, 735, 460, 650, 716, 732, 781, 535, 707, 879, 441, 711, 212, 561, 756, 550, 630, 872, 727, 728, 648, 253, 336, 591, 466, 290, 503, 868, 754, 775, 696, 258, 322, 543, 538, 660, 603, 229, 620, 857, 873, 373, 579, 628, 694, 768, 736, 527, 655, 740, 569, 109, 431, 840, 870, 195, 349, 552, 637, 871, 703, 488, 894, 733, 113, 627, 659, 541, 471, 753, 473, 778, 829, 392, 714, 303, 456, 786, 568, 305, 762, 558, 689, 384, 612, 674, 811, 616, 835, 797, 606, 764, 581, 397, 610, 739, 796, 734, 408, 856, 595, 847, 729, 843, 695, 597, 559, 712, 670, 613, 351, 596, 619, 669, 656, 584, 492, 702, 763, 839, 743, 542, 588, 425, 654, 862, 761, 359, 378, 685, 741, 771, 884, 405, 888, 544, 819, 866,



In [65]:
#x1 = []
#x2 = []
def LF_street(has_human, has_road):
    if has_human >= 1: 
        if has_road >= 1:
            return 1
        else:
            return 0
    return -1

def LF_vehicles(has_human, has_cars):
    if has_human >= 1: 
        if has_cars >= 1:
            return 1
        else:
            return 0
    return -1

# def LF_vehicles(has_human, has_bikes):
#     if has_human >= 1: 
#         if has_bikes >= 1:
#             return 1
#         else:
#             return -1
#     return -1

def LF_has_cars(has_cars):
    if has_cars >= 1:
        return 1
    else:
        return -1 
def LF_has_road(has_road):
    if has_road >= 1:
        return 1
    else:
        return -1 

def LF_distance(has_human, has_bike, bike_human_distance, thre=8):
    if has_human >= 1:
        if has_bike >= 1: 
            #x1.append(bike_human_distance)
            if bike_human_distance <= thre:
                return 1
            else:
                return -1
        else:
            return 0 
    else:
        return 0 
    
def LF_size(has_human, has_bike, bike_human_size, thre=1000):
    if has_human >= 1:
        if has_bike >= 1: 
            #x2.append(bike_human_size)
            if bike_human_size <= thre:
                return 1
            else:
                return 0
    else:
        return -1
    
    
def LF_number(has_human, has_bike, bike_human_num):
    if has_human >= 1:
        if has_bike >= 1: 
            if bike_human_num >= 2:
                return 1
            if bike_human_num >= 1:
                return 0
            if bike_human_num >= 0:
                return 1 
    else:
        return -1

In [86]:
# def f(alpha, L_fns=None, ind=None, weights=None, epochs=None):
#     """L_fns: list of functions
#        l: list of indicator for whether thresholds involve in the particular function 
#     """
#     #alpha = np.exp(alpha)
#     for i in range(len(ind)):
#         L_fns[ind[i]] = partial(L_fns[ind[i]], thre=alpha[i])
        
#     L = np.zeros((len(L_fns),loader.train_num)).astype(int)
    
#     for j in range(len(chosen_data)):
#         i = chosen_data[j]
#         L[0,j] = L_fns[0](p_keys['has_human'][i], p_keys['has_road'][i])
#         L[1,j] = L_fns[1](p_keys['has_human'][i], p_keys['has_cars'][i])
#         L[2,j] = L_fns[2](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_distance'][i])
#         L[3,j] = L_fns[3](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_size'][i])
#         L[4,j] = L_fns[4](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_num'][i])
        
#     L_train = sparse.csr_matrix(L.T)
#     gen_model = GenerativeModel()
#     gen_model.train(L.T, epochs=epochs, decay=0.95, step_size= 0.01/ L.shape[1], reg_param=1e-6)
#     if weights is not None:
#         gen_model.weights = weights
#     train_marginals, likelihood = gen_model.marginals(L_train)
#     labels = 2 * (train_marginals > 0.5) - 1
#     gen = np.mean(labels == loader.train_ground)
#     f1_gen = f1_score(loader.train_ground, labels)
#     if weights is None:
#         return (gen,f1_gen), gen_model.weights, -likelihood 
#     else:
#         return -likelihood 

    
def f(alpha, L_fns=None, ind=None, weights=None, epochs=None):
    """L_fns: list of functions
       l: list of indicator for whether thresholds involve in the particular function 
    """
    #alpha = np.exp(alpha)
    for i in range(len(ind)):
        L_fns[ind[i]] = partial(L_fns[ind[i]], thre=alpha[i])
        
    L = np.zeros((len(L_fns),len(X_train))).astype(int)
    
    for j in range(len(chosen_train)):
        i = chosen_train[j]
#         for i in range(len(L_fns)):
#             L[i,j] = L_fns[i](X[j])
        L[0,j] = L_fns[0](p_keys['has_cars'][i])
        L[1,j] = L_fns[1](p_keys['has_road'][i])
        L[2,j] = L_fns[2](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_distance'][i])
        #L[0,j] = L_fns[0](p_keys['has_human'][i], p_keys['has_road'][i])
#         L[1,j] = L_fns[1](p_keys['has_human'][i], p_keys['has_cars'][i])
#         L[2,j] = L_fns[2](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_distance'][i])
#         L[3,j] = L_fns[3](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_size'][i])
#         L[4,j] = L_fns[4](p_keys['has_human'][i], p_keys['has_bike'][i], p_keys['bike_human_num'][i])
       
    L_train = sparse.csr_matrix(L.T)
    ds = DependencySelector()
    deps = ds.select(L.T, threshold=0.05)
    gen_model = GenerativeModel()
    gen_model.train(L.T, deps=deps, epochs=epochs, decay=0.95, step_size= 0.01/ L.shape[1], reg_param=1e-6)
    if weights is not None:
        gen_model.weights = weights
    train_marginals, likelihood = gen_model.marginals(L_train)
    labels = 2 * (train_marginals > 0.5) - 1
    gen = np.mean(labels == Y_train)
    f1_gen = f1_score(Y_train, labels)
    if weights is None:
        return (gen,f1_gen,labels), gen_model.weights, -likelihood 
    else:
        return -likelihood 

    
    
    
    

In [90]:
#L_fns = [LF_street, LF_vehicles, LF_distance, LF_size, LF_number]
L_fns = [LF_has_cars, LF_has_cars, LF_distance]
ind = [2]
lower = 0
upper = 50 
sim_n = 10 
tmp = np.random.uniform(lower, upper, (len(ind),sim_n))
acc = [[0,0] for _ in range(sim_n)]
acc_baseline = [[0,0] for _ in range(sim_n)]
f1 = [[0,0] for _ in range(sim_n)]
f1_baseline = [[0,0] for _ in range(sim_n)]
one_d = range(100)
print(tmp)

[[ 8.57765597  4.63131153  1.30423208  1.55270378  1.05851542  9.65078869
   0.2949134   1.59573973  0.03772962  0.13680092]]


In [92]:
# baseline 
for i in range(10):
    tmp_alpha = tmp[:,i]
    ans, _, _ = f(tmp_alpha, L_fns=L_fns, ind=ind, epochs=1000)
    acc_baseline[i][0], f1_baseline[i][0] = ans[0:2]
    print(ans[0:2])
    labels = ans[2]
    acc_baseline[i][1], f1_baseline[i][1] = metric(labels)


(0.65343915343915349, 0.67167919799498743)
(0.64814814814814814, 0.65984654731457804)
(0.64814814814814814, 0.65454545454545454)
(0.64814814814814814, 0.65454545454545454)
(0.64814814814814814, 0.65454545454545454)
(0.65079365079365081, 0.66999999999999993)
(0.64550264550264547, 0.64921465968586389)
(0.64814814814814814, 0.65454545454545454)
(0.64550264550264547, 0.64921465968586389)
(0.64550264550264547, 0.64921465968586389)
0.648148148148 0.656735154255


In [93]:

alpha = [1]
#alpha = [20,5000]
init = np.zeros((2,1))
init[:,0] = [1,10]
# init[:,1] = [1, 10, 100]

iters = 5
for i in range(iters):
    ans, weights, fvalue = f(alpha, L_fns=L_fns, ind=ind, epochs=200)
    print(fvalue)
    f_new = partial(f, L_fns=L_fns, ind=ind, epochs=0, weights=weights)
    alpha = minimize(f_new, alpha, options={'initial_simplex':init}, method='Nelder-Mead').x
    print(f_new(alpha=alpha), f_new(alpha=[20]))
    print(alpha)
    print(ans[0:2]) 
    
    


-626.191237369
-686.136262116 -652.269011658
[ 172.]
(0.64814814814814814, 0.65454545454545454)
-695.641541392
-695.677725937 -658.479324787
[ 163.]
(0.67989417989417988, 0.71794871794871795)
-695.763483625
-695.763483625 -658.522406967
[ 163.]
(0.67724867724867721, 0.71495327102803741)
-695.763483625
-695.763483625 -658.522406967
[ 163.]
(0.67724867724867721, 0.71495327102803741)
-695.763483625
-695.763483625 -658.522406967
[ 163.]
(0.67724867724867721, 0.71495327102803741)


In [None]:
print(a)