In [1]:
import numpy as np
import pandas as pd
"""import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib.image as mpimg"""
from PIL import Image
import io
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from functools import reduce
import os
from scipy.spatial.distance import cosine
import pickle
import cvxpy as cp



In [2]:
!pip install cvx



In [3]:
def avg_hash_img(pth):
    from PIL import Image
    with Image.open(pth) as img:
        img = img.resize((10,10), Image.ANTIALIAS).convert("1") #shrink and reduce colors
        pixel_data = list(img.getdata())
        avg_pixel = sum(pixel_data)/len(pixel_data)
        bits = "".join(['1' if (px >= avg_pixel) else '0' for px in pixel_data])
        hex_representation = str(hex(int(bits, 2)))[2:][::-1].upper()
        return hex_representation
    
def use_pkl(mode, pkl_path, file_to_pkl = None):
    if not mode == 'rb' and not mode == 'ab' and not mode == 'wb':
        return None
    with open(pkl_path, mode) as f:
        if mode == 'rb':
            res = pickle.load(f)
            assert type(res) == np.ndarray or type(res) == pd.DataFrame
            res_add = []
            while True:
                try:
                    res_add.append(pickle.load(f))
                except EOFError:
                    break
            if type(res) == np.ndarray:
                return np.concatenate([res, *res_add], axis = 0)
            else:
                return pd.concat([res, *res_add], axis=0)
        else:
            pickle.dump(file_to_pkl, f)  

In [4]:
recommender_df = use_pkl('rb', 'models_by_dim/embeddings.pkl')
hashtag_features = use_pkl('rb', 'models_by_dim/hashtags.pkl')

In [92]:

def prepare_image(img_path, dims = (224, 224, 3), where='local'):
    height, width, _ = dims
    if not where == 'local':
        return
    img = tf.cast(tf.image.decode_image(tf.io.read_file(img_path)), tf.float32)
    img = (img/127.5) - 1 #normalize
    img = tf.image.resize(img, (height, width))
    if img.shape != dims: #for grayscale
        img = tf.concat([img, img, img], axis=2)
    return img


def extract_features(image, nn):
    image_np = image.numpy()
    images_np = np.expand_dims(image_np, axis=0)
    deep_features = nn.predict(images_np)[0]
    return deep_features

def prepare_img(image_path, where='local'):
    prep_image = prepare_image(image_path, where='local')
    pic = pca_model.transform(extract_features(prep_image, neural_network).reshape(1,-1)).reshape(-1)
    return pic

def find_neighbor_vectors(pic, k=5, recommender_df=recommender_df):
    """Find image features (user vectors) for similar images."""
    rdf = recommender_df.copy()
    rdf['dist'] = rdf['deep_features'].apply(lambda x: cosine(x, pic))
    rdf = rdf.sort_values(by='dist')
    return rdf.head(k)

def generate_hashtags(pic, return_uf = False, min_recs = 10):
    fnv = find_neighbor_vectors(pic, k=5, recommender_df=recommender_df)
    # Find the average of the 5 user features found based on cosine similarity.
    features = []
    for item in fnv.features.values:
        features.append(item)

    avg_features = np.mean(np.asarray(features), axis=0)
    
    # Add new column to the hashtag features which will be the dot product with the average image(user) features
    hashtag_features['dot_product'] = hashtag_features['features'].apply(lambda x: np.asarray(x).dot(avg_features))

    # Find the 10 hashtags with the highest feature dot products
    final_recs = hashtag_features.sort_values(by='dot_product', ascending=False).head(min_recs)
    #print(final_recs)
    # Look up hashtags by their numeric IDs
    if not return_uf:
        avg_features = None
    return final_recs['tag'].to_list(), avg_features

def get_results(path_to_pic, add_to_model = False, new_ind = None):
    
    """Need to load embeddings.pkl to recommender_df, hashtags.pkl to hashtag_features for this to work"""
    
    im_hash = avg_hash_img(path_to_pic)
    
    in_db = sum(recommender_df['image_hash'].isin([im_hash]))
    
    if in_db:
        
        target_pic = recommender_df[recommender_df['image_hash'] == im_hash]
        
        embedding = target_pic['deep_features'].values[0]
    
    else:
        
        embedding = prepare_img(path_to_pic)
        
    tags, user_feat = generate_hashtags(embedding, return_uf = add_to_model)
    
    if in_db: new_ind = target_pic.index[0]
    
    elif add_to_model:
        
        new_ind = max(recommender_df.index) + 1
        
        props = {'image_hash' : im_hash, 'hashtags' : [tags], 
                               'deep_features' : [embedding], 'features' : [user_feat]}
        
        to_pkl = pd.DataFrame(props, index = [new_ind])
        
        for k,v in props.items():
            
            recommender_df.loc[new_ind,k] = v[0]
            
        use_pkl('ab', 'embeddings.pkl', file_to_pkl = to_pkl)
        
    return tags, new_ind #recommended tags & index of the newly added picture - necessary for uploading new hashtags

def add_hashtags_to_db(picture_df_index, str_of_hashtags, vector_len = 50, eps = 1e-2):
    
    global hashtag_features
    
    assert picture_df_index in recommender_df.index
    
    str_of_hashtags = list(set(str_of_hashtags))
    
    A = [fv for fv in recommender_df['features']]
    
    props = {'features' : [], 'id' : [], 'tag' : []}
    
    curr_index = max(hashtag_features.index) + 1
    
    #dotprod = np.array([f for f in recommender_df.features]) @ np.array([t for t in hashtag_features.features]).T
    #max_dotprod = dotprod.max()
    #min_dotprod = dotprod.min()
    
    #A = np.array([f for f in recommender_df.features])
    
    for hashtag in str_of_hashtags:
        
        hashtag = '#' + hashtag
        
        h_index = hashtag_features[hashtag_features['tag'] == hashtag]
        
        #pics_for_hashtag = h_index['image_id'].values
        
        if len(h_index) > 0: #hashtag already in database
            
            h_index = h_index.index[0]
            
        else:
            
            h_index = None
        
        A, b = [], [] 
        
        hashtag_vector = cp.Variable((vector_len, ))  
        
        for index in recommender_df.index:
            
            if h_index and (index != picture_df_index): #hashtag is not suitable for the pic
                
                A.append(recommender_df.at[index, 'features'])
                b.append(recommender_df.at[index, 'features'] @ hashtag_features.at[h_index, 'features'].T)
                
        A, b = np.array(A), np.array(b)
        
        problem = cp.Problem(cp.Maximize(recommender_df.at[picture_df_index, 'features'] @ hashtag_vector), 
                             [A @ hashtag_vector <= b + 0.01,
                             b - 0.01 <= A @ hashtag_vector])
        
        problem.solve()
        
        hashtag_vector = hashtag_vector.value
        
        if hashtag_vector is None: continue
        
        if h_index:
            
            hashtag_features.at[h_index, 'features'] = hashtag_vector.T
            
        else:
            props['features'].append(hashtag_vector)
            props['tag'].append(hashtag)
            props['id'].append(curr_index)
            curr_index += 1
        
    if props['id']:
        to_pkl = pd.DataFrame(props, index = props['id'])

        hashtag_features = pd.concat([hashtag_features, to_pkl])

        use_pkl('ab', 'hashtags.pkl', file_to_pkl = to_pkl)

In [10]:
img_shape = (224, 224, 3)

base_model = MobileNetV2(input_shape=img_shape, include_top=False, weights='imagenet')

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

neural_network = tf.keras.Sequential([
  base_model,
  global_average_layer,
])

In [113]:
path_to_img = r'D:\fixed_data\fixed_data\summer\_Post Ca1Q3LfhLKM_2022-03-08_06-08-49_UTC.jpg'
#path_to_img = r'D:\fixed_data\fixed_data\love\_Post CcfTC9OrTUl_2022-04-18_10-27-23_UTC_1.jpg'
path_to_img = r'D:\fixed_data\fixed_data\tattoo\_Post CdBwlO2vFa2_2022-05-01_19-39-39_UTC.jpg'
recom_tags, new_img_index = get_results(path_to_img)

In [114]:
recom_tags

['#fashion',
 '#likeforlikes',
 '#model',
 '#music',
 '#happy',
 '#cute',
 '#picoftheday',
 '#beauty',
 '#style',
 '#nofilter']

In [111]:
usr_hashtags = 'model beauty' #пусть пользователь добавляет только в таком виде

In [112]:
usr_hashtags = usr_hashtags.split()
add_hashtags_to_db(new_img_index, usr_hashtags)

In [57]:
recommender_df

Unnamed: 0_level_0,image_hash,hashtags,deep_features,features
image_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7086,454AA240948248A444AA459AA,"[#photography, #photooftheday, #smile, #fashio...","[5.882194995880127, -0.2370794713497162, 2.885...","[0.32798207, 0.07688697, 0.84293115, 0.2175694..."
1828,AF7BDBB55D4D6B27DC65BACFF,"[#love, #art, #foryou, #healthylifestyle, #exp...","[-1.8345609903335571, 2.8283426761627197, 5.58...","[0.4288217, -0.28197622, 0.5414387, 0.3370693,..."
8523,FFFFFFFFFFFAFBFFFFFFFDFFF,[#foodporn],"[-5.465549468994141, 5.820854187011719, 4.0939...","[-0.0026790556, -0.3919861, -0.04752933, 0.132..."
12361,0860539A5844EA4A27B82B4BD,[#instamood],"[3.5241565704345703, 0.36587613821029663, -1.0...","[-0.14015087, 0.013544979, -0.27844265, 0.2826..."
7591,80DDEFFF7BFFF7DBEFBC7FEFA,"[#fitness, #academia, #fit]","[3.1401686668395996, 1.776392936706543, -1.935...","[-0.26365972, -0.08887135, -0.15122034, 0.1204..."
...,...,...,...,...
13123,EF20455DABA3FC9F3BA590945,[#life],"[-2.20752215385437, 9.932308197021484, 2.21486...","[-0.26728785, 0.14979428, 0.092606515, 0.02256..."
19648,24040441A02413255DFD3BEFF,"[#photooftheday, #sports, #instalove, #basketb...","[-8.943175315856934, 6.463971138000488, -0.098...","[0.13210227, 0.41727182, -0.041841343, -0.1275..."
9845,42525A25910B49482845B8671,[#happy],"[6.141420364379883, -1.9803180694580078, 0.050...","[-0.28683725, -0.103673086, -0.15897422, 0.041..."
10799,5702018040045A40674B5AEEF,[#instadaily],"[3.5415308475494385, -2.96679949760437, 3.5645...","[0.005479329, -0.11904847, -0.07229346, -0.028..."


In [22]:
hashtag_features.drop(index = 1234, inplace = True)

In [115]:
recommender_df.at[new_img_index, 'features'] @ hashtag_features[hashtag_features['tag'] == '#beauty']['features'].to_numpy()[0].T

0.012104370370379428

In [116]:
recommender_df.at[new_img_index, 'features'] @ hashtag_features[hashtag_features['tag'] == '#model']['features'].to_numpy()[0].T

0.02307794236917885

In [106]:
hashtag_features[hashtag_features['tag'] == '#tattoo']['features'].to_numpy()[0]

array([-1.94173011e-03,  7.94576675e-03, -1.56590863e-02, -1.58128187e-02,
        8.59699248e-04,  1.59424289e-03, -9.70232211e-03,  1.79261321e-04,
        2.91500957e-03,  1.16909468e-02,  1.32688194e-02, -2.00407389e-02,
       -6.68882306e-03, -1.65787900e-02,  6.61496696e-05, -3.00379792e-03,
        8.53128637e-03,  4.28541947e-03,  1.09570143e-02,  1.61566250e-02,
       -1.08955094e-02,  1.16876365e-02,  4.90742868e-03,  1.66359474e-02,
        2.35087836e-02, -3.51989329e-03,  7.99301737e-03,  8.05896683e-03,
       -4.62536580e-03, -2.64432413e-03, -6.56887848e-03, -1.47258760e-03,
       -9.77266727e-03, -1.56688933e-03,  2.54001679e-02, -4.63402378e-03,
        5.62924561e-03, -8.15451705e-03,  7.67098445e-03,  7.21694107e-03,
       -2.66708971e-03,  9.74296001e-05,  1.13586292e-03,  7.10431367e-03,
       -7.01534111e-03,  5.68182440e-03,  1.85779232e-02, -2.28698725e-02,
        1.49190855e-02, -7.38033686e-03])

In [81]:
None is not None

False

In [15]:
recommender_df.at[68, 'features'] @ hashtag_features.at[68, 'features'].T

0.0004191944

In [16]:
hashtag_features.loc[68]

features       [0.0006249155, 0.0005484236, -0.0015651134, -0...
id                                                            68
tag                                                        #band
dot_product                                             0.001747
Name: 68, dtype: object