In [1]:
from gensim.models.ldamodel import LdaModel
import pandas as pd

from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary

from gensim.models import TfidfModel

from collections import Counter

# Adidas

In [2]:
def createLDA(column_name, im_cap_tf, n_topics = 5, alpha_val = 0.5, eta_val=0.5, top_n = None):
    
    if top_n != None:
        l = []
        for i in im_cap_tf[column_name]:
            l+=i
    
        wc = Counter(l)
        wc.most_common()
        
        mc_adidas = wc.most_common()[top_n:]
        mc_adidas_list = [i[0] for i in mc_adidas]
        
        texts = im_cap_tf[column_name].apply(lambda x : list(set(x).intersection(set(mc_adidas_list))))
    
    else:
        texts = im_cap_tf[column_name]
        
    # Create a corpus from a list of texts
    dictionary = Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    # Train the model on the corpus.
    lda = LdaModel(corpus, num_topics=n_topics, alpha=alpha_val, eta = eta_val, id2word = dictionary)
    
    for p in lda.print_topics(num_topics=n_topics, num_words=10):
        print(p[1])
    
    return lda

def createLDA_TFIDF(column_name, im_cap_tf, n_topics = 5, alpha_val = 0.5, eta_val=0.5, top_n = None):
    
    if top_n != None:
        l = []
        for i in im_cap_tf[column_name]:
            l+=i
    
        wc = Counter(l)
        wc.most_common()
        
        mc_adidas = wc.most_common()[top_n:]
        mc_adidas_list = [i[0] for i in mc_adidas]
        
        texts = im_cap_tf[column_name].apply(lambda x : list(set(x).intersection(set(mc_adidas_list))))
    
    else:
        texts = im_cap_tf[column_name]
        
    # Create a corpus from a list of texts
    dictionary = Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    model = TfidfModel(corpus)  # fit model

    tfidf_corpus = model[corpus]  # apply model to the first corpus document

    # Train the model on the corpus.
    lda = LdaModel(corpus, num_topics=n_topics, alpha=alpha_val,eta = eta_val, id2word = dictionary)
    
    for p in lda.print_topics(num_topics=n_topics, num_words=10):
        print(p[1])
    
    return lda

## Only Captions

In [3]:
data = pd.read_csv("./Adidas/adidas_caption_tf.csv", index_col=0)
print(data.columns)
column_name = 'caption_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['fearless', 'strongerforit', 'coding', 'jaque1212', 'leomessi',
       'coreyrichproduction', 'baller', 'feel', 'unscripte', 'lelylob',
       ...
       'writer', 'historical', 'distance', 'without', 'ritaora', 'eiger',
       'argentinas', 'collide', 'caption', 'caption_list'],
      dtype='object', length=2469)


In [4]:
n_topics=5
alpha_val=0.7
eta_val = 0.7
top_n =30

print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, eta_val = eta_val, top_n=top_n)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, eta_val=eta_val, top_n=top_n)


------------------------LDA without TFIDF ----------------------- 

0.002*"city" + 0.002*"play" + 0.002*"creator" + 0.002*"go" + 0.002*"story" + 0.002*"head" + 0.002*"street" + 0.002*"watch" + 0.002*"futurecraft" + 0.002*"worldwide"
0.003*"never" + 0.002*"head" + 0.002*"go" + 0.002*"creator" + 0.002*"athlete" + 0.002*"ever" + 0.002*"speedtakes" + 0.002*"mean" + 0.002*"story" + 0.002*"collection"
0.003*"store" + 0.002*"together" + 0.002*"tap" + 0.002*"win" + 0.002*"hometeam" + 0.002*"'s" + 0.002*"drop" + 0.002*"life" + 0.002*"help" + 0.002*"pack"
0.002*"story" + 0.002*"store" + 0.002*"together" + 0.002*"'s" + 0.002*"champion" + 0.002*"celebrate" + 0.002*"training" + 0.002*"ever" + 0.002*"start" + 0.002*"drop"
0.002*"ever" + 0.002*"never" + 0.002*"creator" + 0.002*"creativity" + 0.002*"join" + 0.002*"performance" + 0.002*"go" + 0.002*"'s" + 0.002*"store" + 0.002*"speedtakes"

------------------------LDA with TFIDF ----------------------- 

0.003*"'s" + 0.002*"pack" + 0.002*"history" + 0

## Only Image Labels

In [9]:
data = pd.read_csv("./Adidas/adidas_label_tf.csv", index_col=0)
print(data.columns)
column_name = 'labels_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['farmworker', 'photograph', 'symbol', 'portrait', 'eyewear', 'toy',
       'flag', 'kickflip', 'concrete', 'bicycles',
       ...
       'leg', 'tire', 'music', 'sportswear', 'serveware', 'bookcase',
       'astronomical', 'freeze', 'labels', 'labels_list'],
      dtype='object', length=670)


In [11]:
n_topics=3
alpha_val=0.7
eta_val = 0.7
top_n =10

print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, eta_val = eta_val, top_n=top_n)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, eta_val=eta_val, top_n=top_n)


------------------------LDA without TFIDF ----------------------- 

0.018*"leg" + 0.018*"thigh" + 0.017*"human" + 0.017*"gesture" + 0.013*"happy" + 0.013*"street" + 0.013*"player" + 0.012*"blue" + 0.012*"footwear" + 0.011*"art"
0.018*"street" + 0.018*"footwear" + 0.016*"sport" + 0.014*"uniform" + 0.012*"outdoor" + 0.012*"human" + 0.011*"leg" + 0.011*"event" + 0.011*"player" + 0.011*"gesture"
0.017*"-" + 0.015*"leg" + 0.015*"thigh" + 0.013*"art" + 0.012*"happy" + 0.012*"gesture" + 0.012*"t" + 0.011*"footwear" + 0.010*"shirt" + 0.010*"blue"

------------------------LDA with TFIDF ----------------------- 

0.017*"-" + 0.015*"happy" + 0.014*"art" + 0.013*"sport" + 0.013*"blue" + 0.013*"thigh" + 0.012*"street" + 0.011*"leg" + 0.011*"uniform" + 0.011*"gesture"
0.018*"thigh" + 0.018*"leg" + 0.018*"footwear" + 0.014*"gesture" + 0.014*"knee" + 0.013*"human" + 0.012*"body" + 0.011*"street" + 0.011*"player" + 0.010*"happy"
0.018*"human" + 0.015*"gesture" + 0.015*"leg" + 0.015*"street" + 0.012*"f

## Image Labels + Caption

In [7]:
data = pd.read_csv("./Adidas/adidas_img_cap_tf.csv", index_col=0)
print(data.columns)
column_name = 'img_cap_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['fearless', 'strongerforit', 'coding', 'jaque1212', 'concrete',
       'leomessi', 'coreyrichproduction', 'feel', 'baller', 'unscripte',
       ...
       'writer', 'historical', 'distance', 'without', 'ritaora', 'bookcase',
       'eiger', 'argentinas', 'collide', 'img_cap_list'],
      dtype='object', length=2925)


In [8]:
n_topics=5
alpha_val=0.7
eta_val = 0.7
top_n =30

print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, eta_val = eta_val, top_n=top_n)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, eta_val=eta_val, top_n=top_n)


------------------------LDA without TFIDF ----------------------- 

0.005*"shirt" + 0.004*"t" + 0.004*"new" + 0.004*"body" + 0.003*"shorts" + 0.003*"entertainment" + 0.003*"nature" + 0.003*"event" + 0.003*"white" + 0.003*"adidasoriginal"
0.004*"neck" + 0.004*"world" + 0.003*"event" + 0.003*"arm" + 0.003*"body" + 0.003*"leisure" + 0.003*"nature" + 0.003*"shirt" + 0.003*"forehead" + 0.003*"eyebrow"
0.004*"design" + 0.004*"world" + 0.004*"electric" + 0.004*"black" + 0.003*"outdoor" + 0.003*"white" + 0.003*"--" + 0.003*"shoulder" + 0.003*"head" + 0.003*"body"
0.004*"black" + 0.004*"outdoor" + 0.003*"shorts" + 0.003*"and" + 0.003*"new" + 0.003*"--" + 0.003*"leisure" + 0.003*"arm" + 0.003*"shirt" + 0.003*"sneakers"
0.004*"in" + 0.004*"body" + 0.003*"short" + 0.003*"nature" + 0.003*"and" + 0.003*"plant" + 0.003*"outdoor" + 0.003*"smile" + 0.003*"shorts" + 0.003*"design"

------------------------LDA with TFIDF ----------------------- 

0.003*"new" + 0.003*"body" + 0.003*"plant" + 0.003*"world