In [92]:
from gensim.models.ldamodel import LdaModel
import pandas as pd

from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary

from gensim.models import TfidfModel

from collections import Counter

# Adidas

In [120]:
def createLDA(column_name, im_cap_tf, n_topics = 5, alpha_val = 0.5, top_n = None):
    
    if top_n != None:
        l = []
        for i in im_cap_tf[column_name]:
            l+=i
    
        wc = Counter(l)
        wc.most_common()
        
        mc_adidas = wc.most_common()[top_n:]
        mc_adidas_list = [i[0] for i in mc_adidas]
        
        texts = im_cap_tf[column_name].apply(lambda x : list(set(x).intersection(set(mc_adidas_list))))
    
    else:
        texts = im_cap_tf[column_name]
        
    # Create a corpus from a list of texts
    dictionary = Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    # Train the model on the corpus.
    lda = LdaModel(corpus, num_topics=n_topics, alpha=alpha_val, id2word = dictionary)
    
    for p in lda.print_topics(num_topics=n_topics, num_words=10):
        print(p[1])
    
    return lda

def createLDA_TFIDF(column_name, im_cap_tf, n_topics = 5, alpha_val = 0.5, top_n = None):
    
    if top_n != None:
        l = []
        for i in im_cap_tf[column_name]:
            l+=i
    
        wc = Counter(l)
        wc.most_common()
        
        mc_adidas = wc.most_common()[top_n:]
        mc_adidas_list = [i[0] for i in mc_adidas]
        
        texts = im_cap_tf[column_name].apply(lambda x : list(set(x).intersection(set(mc_adidas_list))))
    
    else:
        texts = im_cap_tf[column_name]
        
    # Create a corpus from a list of texts
    dictionary = Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    model = TfidfModel(corpus)  # fit model

    tfidf_corpus = model[corpus]  # apply model to the first corpus document

    # Train the model on the corpus.
    lda = LdaModel(corpus, num_topics=n_topics, alpha=alpha_val, id2word = dictionary)
    
    for p in lda.print_topics(num_topics=n_topics, num_words=10):
        print(p[1])
    
    return lda

## Only Captions

In [140]:
data = pd.read_csv("./Adidas/adidas_caption_tf.csv", index_col=0)
print(data.columns)
column_name = 'caption_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['fearless', 'strongerforit', 'coding', 'jaque1212', 'leomessi',
       'coreyrichproduction', 'baller', 'feel', 'unscripte', 'lelylob',
       ...
       'writer', 'historical', 'distance', 'without', 'ritaora', 'eiger',
       'argentinas', 'collide', 'caption', 'caption_list'],
      dtype='object', length=2469)


In [141]:
print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=5, alpha_val=0.5, top_n = 30)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=5, alpha_val=0.5, top_n = 30)


------------------------LDA without TFIDF ----------------------- 

0.005*"store" + 0.004*"champion" + 0.004*"drop" + 0.004*"creator" + 0.003*"online" + 0.003*"pack" + 0.003*"collection" + 0.003*"kickoftheweek" + 0.003*"worldwide" + 0.003*"build"
0.004*"head" + 0.004*"x" + 0.004*"collection" + 0.003*"start" + 0.003*"hometeam" + 0.003*"city" + 0.003*"look" + 0.003*"together" + 0.003*"wave" + 0.003*"futurecraft"
0.007*"story" + 0.004*"ever" + 0.004*"inspire" + 0.003*"tap" + 0.003*"creator" + 0.003*"store" + 0.003*"next" + 0.003*"together" + 0.003*"go" + 0.003*"shoe"
0.005*"speedtakes" + 0.005*"creativity" + 0.004*"never" + 0.004*"well" + 0.003*"'s" + 0.003*"ever" + 0.003*"impossibleisnothe" + 0.003*"play" + 0.003*"head" + 0.003*"share"
0.006*"'s" + 0.005*"go" + 0.004*"tap" + 0.004*"drop" + 0.004*"win" + 0.003*"play" + 0.003*"1st" + 0.003*"ever" + 0.003*"fast" + 0.003*"street"

------------------------LDA with TFIDF ----------------------- 

0.005*"creator" + 0.005*"store" + 0.004*"'s" +

## Only Image Labels

In [137]:
data = pd.read_csv("./Adidas/adidas_label_tf.csv", index_col=0)
print(data.columns)
column_name = 'labels_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['farmworker', 'photograph', 'symbol', 'portrait', 'eyewear', 'toy',
       'flag', 'kickflip', 'concrete', 'bicycles',
       ...
       'leg', 'tire', 'music', 'sportswear', 'serveware', 'bookcase',
       'astronomical', 'freeze', 'labels', 'labels_list'],
      dtype='object', length=670)


In [139]:
print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=3, alpha_val=0.5, top_n = 30)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=3, alpha_val=0.5, top_n = 10)


------------------------LDA without TFIDF ----------------------- 

0.019*"plant" + 0.017*"event" + 0.014*"entertainment" + 0.012*"design" + 0.011*"electric" + 0.011*"nature" + 0.011*"short" + 0.010*"shorts" + 0.010*"sneaker" + 0.010*"automotive"
0.016*"t" + 0.015*"and" + 0.015*"muscle" + 0.014*"arm" + 0.014*"eyebrow" + 0.012*"outerwear" + 0.011*"shorts" + 0.011*"joint" + 0.010*"forehead" + 0.010*"chin"
0.018*"black" + 0.018*"in" + 0.018*"white" + 0.017*"nature" + 0.015*"sneakers" + 0.014*"water" + 0.012*"grey" + 0.012*"shorts" + 0.012*"leisure" + 0.012*"and"

------------------------LDA with TFIDF ----------------------- 

0.028*"leg" + 0.023*"footwear" + 0.021*"human" + 0.019*"happy" + 0.018*"thigh" + 0.017*"blue" + 0.017*"knee" + 0.015*"outdoor" + 0.015*"people" + 0.014*"gesture"
0.026*"street" + 0.021*"gesture" + 0.019*"uniform" + 0.018*"thigh" + 0.013*"-" + 0.013*"player" + 0.013*"sport" + 0.012*"shirt" + 0.011*"muscle" + 0.011*"human"
0.021*"art" + 0.017*"-" + 0.017*"sport" + 0.

## Image Labels + Caption

In [143]:
data = pd.read_csv("./Adidas/adidas_img_cap_tf.csv", index_col=0)
print(data.columns)
column_name = 'img_cap_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['fearless', 'strongerforit', 'coding', 'jaque1212', 'concrete',
       'leomessi', 'coreyrichproduction', 'feel', 'baller', 'unscripte',
       ...
       'writer', 'historical', 'distance', 'without', 'ritaora', 'bookcase',
       'eiger', 'argentinas', 'collide', 'img_cap_list'],
      dtype='object', length=2925)


In [144]:
print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=3, alpha_val=0.5, top_n = 30)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=3, alpha_val=0.5, top_n = 10)


------------------------LDA without TFIDF ----------------------- 

0.006*"outdoor" + 0.006*"shorts" + 0.006*"new" + 0.005*"design" + 0.005*"plant" + 0.005*"world" + 0.005*"adidasoriginal" + 0.004*"shirt" + 0.004*"smile" + 0.004*"nature"
0.005*"body" + 0.005*"neck" + 0.004*"leisure" + 0.004*"cool" + 0.004*"t" + 0.004*"arm" + 0.004*"week" + 0.004*"short" + 0.004*"chin" + 0.003*"black"
0.005*"event" + 0.005*"white" + 0.005*"body" + 0.005*"shirt" + 0.005*"electric" + 0.005*"black" + 0.004*"neck" + 0.004*"--" + 0.004*"world" + 0.004*"eyebrow"

------------------------LDA with TFIDF ----------------------- 

0.007*"sky" + 0.006*"heretocreate" + 0.006*"happy" + 0.006*"create" + 0.005*"blue" + 0.005*"see" + 0.005*"-" + 0.005*"new" + 0.005*"footwear" + 0.004*"street"
0.011*"leg" + 0.007*"sky" + 0.007*"thigh" + 0.007*"footwear" + 0.006*"street" + 0.006*"player" + 0.006*"human" + 0.006*"gesture" + 0.005*"outdoor" + 0.005*"uniform"
0.007*"create" + 0.007*"street" + 0.007*"gesture" + 0.007*"heret