In [1]:
from gensim.models.ldamodel import LdaModel
import pandas as pd

from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary

from gensim.models import TfidfModel

from collections import Counter

# Adidas

In [2]:
def createLDA(column_name, im_cap_tf, n_topics = 5, alpha_val = 0.5, top_n = None):
    
    if top_n != None:
        l = []
        for i in im_cap_tf[column_name]:
            l+=i
    
        wc = Counter(l)
        wc.most_common()
        
        mc_adidas = wc.most_common()[top_n:]
        mc_adidas_list = [i[0] for i in mc_adidas]
        
        texts = im_cap_tf[column_name].apply(lambda x : list(set(x).intersection(set(mc_adidas_list))))
    
    else:
        texts = im_cap_tf[column_name]
        
    # Create a corpus from a list of texts
    dictionary = Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]

    # Train the model on the corpus.
    lda = LdaModel(corpus, num_topics=n_topics, alpha=alpha_val, id2word = dictionary)
    
    for p in lda.print_topics(num_topics=n_topics, num_words=10):
        print(p[1])
    
    return lda

def createLDA_TFIDF(column_name, im_cap_tf, n_topics = 5, alpha_val = 0.5, top_n = None):
    
    if top_n != None:
        l = []
        for i in im_cap_tf[column_name]:
            l+=i
    
        wc = Counter(l)
        wc.most_common()
        
        mc_adidas = wc.most_common()[top_n:]
        mc_adidas_list = [i[0] for i in mc_adidas]
        
        texts = im_cap_tf[column_name].apply(lambda x : list(set(x).intersection(set(mc_adidas_list))))
    
    else:
        texts = im_cap_tf[column_name]
        
    # Create a corpus from a list of texts
    dictionary = Dictionary(texts)
    corpus = [dictionary.doc2bow(text) for text in texts]
    
    model = TfidfModel(corpus)  # fit model

    tfidf_corpus = model[corpus]  # apply model to the first corpus document

    # Train the model on the corpus.
    lda = LdaModel(corpus, num_topics=n_topics, alpha=alpha_val, id2word = dictionary)
    
    for p in lda.print_topics(num_topics=n_topics, num_words=10):
        print(p[1])
    
    return lda

## Only Captions

In [3]:
data = pd.read_csv("./Adidas/adidas_caption_tf.csv", index_col=0)
print(data.columns)
column_name = 'caption_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['fearless', 'strongerforit', 'coding', 'jaque1212', 'leomessi',
       'coreyrichproduction', 'baller', 'feel', 'unscripte', 'lelylob',
       ...
       'writer', 'historical', 'distance', 'without', 'ritaora', 'eiger',
       'argentinas', 'collide', 'caption', 'caption_list'],
      dtype='object', length=2469)


In [4]:
n_topics=5
alpha_val=0.5
top_n =30

print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, top_n=top_n)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, top_n=top_n)


------------------------LDA without TFIDF ----------------------- 

0.005*"never" + 0.005*"story" + 0.004*"ever" + 0.004*"creator" + 0.004*"play" + 0.003*"narrate" + 0.003*"tap" + 0.003*"hometeam" + 0.003*"together" + 0.003*"history"
0.005*"creativity" + 0.004*"start" + 0.003*"ultraboost" + 0.003*"celebrate" + 0.003*"'s" + 0.003*"creator" + 0.003*"step" + 0.003*"together" + 0.003*"inspire" + 0.003*"build"
0.006*"speedtakes" + 0.005*"champion" + 0.004*"go" + 0.004*"ready" + 0.004*"pack" + 0.003*"collection" + 0.003*"store" + 0.003*"life" + 0.003*"'s" + 0.003*"win"
0.007*"store" + 0.005*"head" + 0.004*"drop" + 0.004*"futurecraft" + 0.004*"online" + 0.003*"home" + 0.003*"hometeam" + 0.003*"never" + 0.003*"adidasfootball" + 0.003*"go"
0.004*"performance" + 0.004*"'s" + 0.003*"find" + 0.003*"street" + 0.003*"collection" + 0.003*"ever" + 0.003*"help" + 0.003*"x" + 0.003*"training" + 0.003*"story"

------------------------LDA with TFIDF ----------------------- 

0.005*"speedtakes" + 0.005*"d

## Only Image Labels

In [5]:
data = pd.read_csv("./Adidas/adidas_label_tf.csv", index_col=0)
print(data.columns)
column_name = 'labels_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['farmworker', 'photograph', 'symbol', 'portrait', 'eyewear', 'toy',
       'flag', 'kickflip', 'concrete', 'bicycles',
       ...
       'leg', 'tire', 'music', 'sportswear', 'serveware', 'bookcase',
       'astronomical', 'freeze', 'labels', 'labels_list'],
      dtype='object', length=670)


In [6]:
n_topics=3
alpha_val=0.5
top_n = 10

print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, top_n=top_n)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, top_n=top_n)


------------------------LDA without TFIDF ----------------------- 

0.016*"street" + 0.016*"footwear" + 0.015*"gesture" + 0.014*"-" + 0.013*"blue" + 0.013*"people" + 0.012*"shorts" + 0.012*"plant" + 0.012*"thigh" + 0.012*"shirt"
0.024*"happy" + 0.021*"gesture" + 0.017*"footwear" + 0.016*"street" + 0.015*"human" + 0.013*"forehead" + 0.013*"art" + 0.012*"chin" + 0.011*"jaw" + 0.011*"neck"
0.032*"leg" + 0.025*"thigh" + 0.019*"human" + 0.018*"knee" + 0.016*"sport" + 0.015*"uniform" + 0.015*"-" + 0.015*"art" + 0.015*"player" + 0.014*"short"

------------------------LDA with TFIDF ----------------------- 

0.026*"thigh" + 0.023*"gesture" + 0.020*"leg" + 0.017*"blue" + 0.017*"human" + 0.017*"art" + 0.017*"happy" + 0.013*"electric" + 0.012*"people" + 0.012*"in"
0.018*"sport" + 0.016*"player" + 0.014*"happy" + 0.013*"art" + 0.013*"cloud" + 0.012*"shorts" + 0.012*"leisure" + 0.012*"leg" + 0.011*"street" + 0.010*"-"
0.023*"footwear" + 0.018*"street" + 0.017*"leg" + 0.016*"outdoor" + 0.016*"-" + 

## Image Labels + Caption

In [7]:
data = pd.read_csv("./Adidas/adidas_img_cap_tf.csv", index_col=0)
print(data.columns)
column_name = 'img_cap_list'
data[column_name] = data[column_name].apply(lambda x : eval(x))

Index(['fearless', 'strongerforit', 'coding', 'jaque1212', 'concrete',
       'leomessi', 'coreyrichproduction', 'feel', 'baller', 'unscripte',
       ...
       'writer', 'historical', 'distance', 'without', 'ritaora', 'bookcase',
       'eiger', 'argentinas', 'collide', 'img_cap_list'],
      dtype='object', length=2925)


In [8]:
n_topics=7
alpha_val=0.5
top_n = 30

print('\n------------------------LDA without TFIDF ----------------------- \n')
lda_cap = createLDA(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, top_n=top_n)

print('\n------------------------LDA with TFIDF ----------------------- \n')
lda_cap_tfidf = createLDA_TFIDF(column_name=column_name, im_cap_tf=data, n_topics=n_topics, alpha_val=alpha_val, top_n=top_n)


------------------------LDA without TFIDF ----------------------- 

0.008*"nature" + 0.007*"world" + 0.007*"in" + 0.006*"design" + 0.005*"grass" + 0.005*"entertainment" + 0.005*"white" + 0.004*"plant" + 0.004*"adidasoriginal" + 0.004*"event"
0.007*"t" + 0.006*"shirt" + 0.006*"design" + 0.006*"black" + 0.006*"smile" + 0.006*"--" + 0.005*"electric" + 0.005*"short" + 0.005*"world" + 0.004*"white"
0.006*"shorts" + 0.006*"joint" + 0.006*"shirt" + 0.006*"nature" + 0.005*"outdoor" + 0.005*"body" + 0.005*"in" + 0.005*"available" + 0.004*"t" + 0.004*"recreation"
0.005*"event" + 0.005*"short" + 0.005*"body" + 0.005*"electric" + 0.004*"world" + 0.004*"plant" + 0.004*"active" + 0.004*"cloud" + 0.004*"new" + 0.004*"adidasoriginal"
0.005*"outdoor" + 0.005*"arm" + 0.005*"body" + 0.005*"black" + 0.005*"event" + 0.005*"neck" + 0.005*"shirt" + 0.004*"cloud" + 0.004*"plant" + 0.004*"available"
0.006*"design" + 0.006*"and" + 0.006*"outdoor" + 0.005*"waist" + 0.004*"athletic" + 0.004*"event" + 0.004*"shor