## Imports 📦

In [1]:
import pandas as pd
import numpy as np
import emoji
import warnings
import nltk
from nltk.corpus import stopwords    
import string
import statistics

In [2]:
from matplotlib import pyplot as plt

In [3]:
warnings.filterwarnings('ignore')

## Load Data 💿

In [4]:
df = pd.read_csv("Data/cleaned_by_language.csv")

In [5]:
en_df = df[df['language'] == 'en']
es_df = df[df['language'] == 'es']

In [6]:
en_comment = en_df['text only'].to_list()
es_comment = es_df['text only'].to_list()
en_comment_with_emoji = en_df['text'].to_list()
es_comment_with_emoji = es_df['text'].to_list()

In [7]:
nltk.download('punkt')
nltk.download('stopwords')
en_stop_words = set(stopwords.words('english'))
es_stop_words = set(stopwords.words('spanish'))

def filter_comments(comment_list, stop_words):
    filtered_comments = []

    for comment in comment_list:
        # remove word starts with @
        comment = " ".join(filter(lambda x:x[0]!='@', comment.split()))
    
        # remove punctuations
        comment = comment.translate(str.maketrans('', '', string.punctuation))

        # remove stopwords
        words = nltk.word_tokenize(comment)
        filtered_comment = [word for word in words if word.lower() not in stop_words]
        filtered_comment = ' '.join(filtered_comment)
        filtered_comment = comment
        filtered_comments.append(filtered_comment)
    return filtered_comments

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/yuhsinhuang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/yuhsinhuang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [8]:
en_filtered_comments = filter_comments(en_comment, en_stop_words)
es_filtered_comments = filter_comments(es_comment, es_stop_words)
en_with_emoji_filtered_comments = filter_comments(en_comment_with_emoji, en_stop_words)
es_with_emoji_filtered_comments = filter_comments(es_comment_with_emoji, es_stop_words)

## Text Embeddings using LASER - this pretrained model supports cross-lingual tasks and embeds in setence-level

In [9]:
pip install laserembeddings

Note: you may need to restart the kernel to use updated packages.


In [10]:
# run this in terminal first: python -m laserembeddings download-models
from laserembeddings import Laser

laser = Laser()
en_text_array = laser.embed_sentences(en_filtered_comments, lang='en')
es_text_array = laser.embed_sentences(es_filtered_comments, lang='es')
en_array = laser.embed_sentences(en_with_emoji_filtered_comments, lang='en')
es_array = laser.embed_sentences(es_with_emoji_filtered_comments, lang='es')

In [11]:
en_text_array.shape

(280, 1024)

## Emoji Embeddings using Emoji2Vec

In [12]:
from gensim.models import KeyedVectors

# Load pretrained emoji embeddings
emoji_model = KeyedVectors.load_word2vec_format('emoji2vec.bin', binary=True)

def emoji_embedding(df):
    emoji_list_list = df['emoji list'].to_list()

    # Initialize a list to store emoji embeddings
    emoji_embedding = []

    for emoji_list in emoji_list_list:
        emoji_list_embedding = []  # Initialize a list for embeddings of each emoji list
        for emoji in emoji_list:
            try:
                emoji_list_embedding.append(emoji_model[emoji])
                
            except KeyError:
                pass

            # if len(emoji_list_embedding) != 0:
            # emoji_list_embedding = np.concatenate(emoji_list_embedding, axis=0)
        emoji_embedding.append(emoji_list_embedding)
    
    # average
    res = []
    for embeddings in emoji_embedding:
        average_embedding = np.mean(embeddings, axis=0)
        # padded arrays with zero emojis with a length 300 array filled with zeros
        if average_embedding is None or np.isnan(average_embedding).all():
            res.append(np.zeros(300))
        else:
            res.append(average_embedding)
    
    return np.array(res)

# en_max_size = max(len(arr) for arr in en_emoji_embedding)
# en_padded_arrays = [np.pad(arr, (0, en_max_size - len(arr)), 'constant') for arr in en_emoji_embedding]
# en_emoji_array = np.vstack(en_padded_arrays)

In [13]:
en_emoji_array = emoji_embedding(en_df)
es_emoji_array = emoji_embedding(es_df)

In [14]:
es_emoji_array.shape

(280, 300)

## Concatenate the text embeddings and emoji embeddings 

In [15]:
# Concatenate along columns (horizontally)
en_embeddings = np.concatenate((en_text_array, en_emoji_array), axis=1)
es_embeddings = np.concatenate((es_text_array, es_emoji_array), axis=1)

In [16]:
en_embeddings.shape

(280, 1324)

## Variable names 📇
- en_text_array: LASER embeddings on text only (shape:280x1024)
- en_emoji_array: emoji2vec embeddings on emoji only (shape: 280x300)
- en_embeddings: concatenate en_text_array and en_emoji_array (shape: 280, 1324)
- en_array: LASER embeddings on text+emoji (shape: 280x1024)
- (replace en by es to get result from Spanish dataset)

## Hierarchical Clustering 🪜

In [103]:
from scipy.cluster.hierarchy import linkage, fcluster

def apply_hierarchical_clustering(lang_embeddings, lang_df):
    # Calculate the linkage matrix
    linkage_matrix = linkage(lang_embeddings, method='ward')

    # Perform hierarchical clustering and obtain cluster labels
    cluster_labels = fcluster(linkage_matrix, t=3, criterion='distance') 

    # Create a DataFrame to store the clustering results
    df_clusters = pd.DataFrame({
        "text": lang_df['text'],
        "cluster": cluster_labels
    })

    return df_clusters.sort_values(by='cluster')

In [104]:
# Perform hierarchical clustering
en_linkage_matrix = apply_hierarchical_clustering(en_embeddings, en_df)
en_linkage_matrix

Unnamed: 0,text,cluster
0,@xidiwldnxjqpspqpskxls morroco is a siblingcou...,1
328,@nacho.cagiao imagine riding off the back of a...,1
326,Wow so much fun 😂,1
309,@richard.escobarc by 😂😂🇲🇦💪,1
308,They themselves said it portugal sui😂😂,1
...,...,...
164,@_hey.its.sup_ we did it 😉🙌,8
322,@afaseleccion come back match💥💥💥,8
321,@eugeneolderman ...dear sunshine under the pro...,8
22,@glg_sky yeah I’m sure Qatar being ranked fina...,8


In [107]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 1]['text'].tolist()

['@xidiwldnxjqpspqpskxls morroco is a siblingcountry aswell. 20% of them live in Belgium😂.',
 '@nacho.cagiao imagine riding off the back of a game from 2014 😂😂',
 'Wow so much fun 😂',
 '@richard.escobarc by 😂😂🇲🇦💪',
 'They themselves said it portugal sui😂😂',
 '@spasho05 😂😂😂😂',
 '@fine.kln it took you two days to think how to come back at me lol and do you even call this “fine” thing a joke 😂 honey 🤭😂',
 'Gol salto olivier giround,offside😂',
 'Right back at you 😂😂',
 'Man i Park 300 bags of cement in an hour that’s my job so don’t let us talk enjoy your money in peace 😂',
 '@izayahlopez1118 omg so sad an upset little inferior American 😂😂',
 'Thought is was sniper’s rifle at first 😂',
 '@ban_all_mondays verpiss dich goodbye😂',
 'Japan played better 😢',
 '@rya._.ntz23 and now denmark are out 😂',
 '@88godiswithme 😂😂😂😂😂😂😂 congratulations!!',
 '@anchan_0722_ 😂😂😂',
 '@mr_yasser25 yeah but u didn’t qualify 😂😂😂😂😂 try again in the next 4yrs 😂😂😂',
 'All I watch was pessi missing a pen a 3 big chan

In [108]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 2]['text'].tolist()

['the real blue lock🙌🔥🔥😍',
 'The big and champions France in Qatar2022🔥',
 'That throw was lit👏🔥',
 'Congratulations 🔥👏',
 'Wow very gorgeous ❤️🔥😍',
 '🇹🇳 ❤️🔥 should be proud 👏',
 'Cool🔥',
 'Mbappe on another level in this world cup🔥🔥🔥❤️',
 'The France 🇫🇷 is on Fire 🔥',
 'Best player 🔥🔥',
 'We love you hazards🔥🔥🔥',
 "Thank you for entertaining me Japan, I'm waiting for the excitement again in four years👏❤️\u200d🔥🇯🇵🇯🇵❤️\u200d🔥",
 'dats literally my dad 🔥',
 'Th rise of asian football ❤️❤️❤️proudly all Asian can say japan is a Asian football country...,🔥🔥🔥',
 '@beatbox_musicality For real bro🔥🔥🔥',
 'So much ❤️❤️❤️❤️ for CR7🔥🔥',
 'This nikka sick wit it🔥😂',
 'Messi wins the this exciting wc!!! 👏🔥❤️\u200d🔥',
 'Argentina vs France 👏🔥👏🔥👏 Greatest rivalry',
 'Well played Morocco’s number 7 a good play maker enjoyed his movments 🔥 see you next time fight for 3rd 👏',
 '@itisjuanderful AAJAA POR NO PERMITIR GAYS EN LA COPA MUNDIAL YO LO VEO EXCELENTE👏🔥',
 'Go Croatia!!🔥🔥',
 'Uff he was the best 🔥

In [119]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 3]['text'].tolist()

['hand ball 👏',
 '@fabreezy_ 👏👏👏 this is yours???',
 'Definitely!!! 👏👏👏👏👏',
 'respect 👏',
 'The Bluetooth goal 👏',
 '@porti_elias Its True buddy👏👏',
 'He is completely right! 👏',
 'The Best goal of the world cup👏👏👏⚽️',
 'Best of luck upcoming final game @afaseleccion 👏👏👏🏆',
 '@reemasul_r3 there u go I told u guys yall can win too, good job 👏🏻👏🏻 u guys won fair and square, dont feel low for ur team. They are good enough alrdy 👏🏻',
 '@k.mbappe proud of you👏',
 'Netherlands👏👏👏 and also cricket',
 '@itsabdelfatah you were right 👏',
 'Just an amazing team great job 👏🏻👏🏻👏🏻👏🏻👏🏻👏🏻👏🏻👏🏻❤️❤️❤️❤️',
 '@pr1ncesa_mar1 never! Korea is much better team! 👏']

In [120]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 4]['text'].tolist()

['Did he score in this video 🤣',
 '@_msomali_wao and guess what morocco also lost🤣🤣🤣🤣🤣🤣',
 'Yep, ney is gone🤣',
 '@5400.seconds are u there bro? 🤣',
 'Portugal humiliated you Shaq🤣🤣🤣\n￼',
 "@gousasmokingoniran I think that Biranvand's feet made Christian Polsic not have children anymore 🤣🤣🤣",
 '@leobrajkovic2 Airport, this way ✈️🤣',
 '@_francechy_ 🤣🤣🤣',
 '@realestate_topg what happened 🤣🤣',
 '@mitchellpolak  it was lucky win.... Argentine will destroy you 🤣🤣',
 '@goborm hehe Habibi🤣🤣',
 'Priceless!! 🇨🇲\n🤣🤣🤣',
 '@pictures_for_stickers Brazil 🇧🇷🛫🏠🤣🤣🤣🤣',
 '@twinty_999  airport this way 🤣👍🏻',
 "@tiago.rendas2 bro I do know football but yesterday match that wasn't an offside lol 🤣🤣🤣what cause the camera showed his leg over the line but have you seen the last defender in the box 🤣🤣🤣go and learn football then come back chat to me"]

In [121]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 5]['text'].tolist()

['bro peru wasnt even in the wc💀',
 'the last one 💀',
 '@__oresec_ yo I’m back, we won against Spain 💀 so good luck against Brazil',
 'They kost💀',
 '@ludmilamasc someone DIDNT watch the match. shut up bruh💀.',
 'All out lets add new people\n\nWhere is Son?\nHome💀💀']

In [122]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 6]['text'].tolist()

['Proudly Ghanaian 🇬🇭 🇬🇭 🇬🇭 🇬🇭 🇬🇭 ❤️',
 '@the_real_thorstinho #onetwo 🤭🤭🤭🤭🤭',
 '@smithswith_ ❤️so good!!',
 '@symon9001 hello ? 🤟🏽🇦🇷 some body here ?',
 'Brazils back that way⬅️',
 '@goodthanksyourself nice goal❤️',
 '@sonam_la24 in ur dreams... Croatia!!!🇭🇷❤️✌️',
 '@mess_91_ @aiden44w home and happy🥳🥳🤌🏻',
 'Proudly Moroccan 🇲🇦🇲🇦🇲🇦🇲🇦❤️❤️❤️❤️',
 'He gave us the best goal of the World Cup so far ❤️',
 'Son🥺',
 'Mount❤️',
 'TERANGA boys 🇸🇳🇸🇳❤️',
 '@maidenles.s its game not war ☺️',
 'The second golden age for the socceroos 🇦🇺🟡🟢',
 'Follow m’y account please ❤️',
 '@zarbassir what I love to ear ❤️',
 'They are so cute \U0001f979\U0001f979',
 '@sarou_lifestyle and you were right , they made history ♥️♥️',
 'The best version ever ❤️ #qatar @qatar',
 'So sweet❤️ Hope Korea and Japan both do our best in the next game☺️',
 'Beauty of Qatar ❤️',
 'Italy >>❤️',
 'Loveee this ❤️',
 'Appreciate to the Admin❤️ who always updated us']

In [123]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 7]['text'].tolist()

['Missing Italy in the world cup😍😍😍🇮🇹🇮🇹🇮🇹',
 'oooh jungkook and his song 😍And congratulations on the victory of the French',
 'HE SO FINE OMLL 😍',
 'Best World Cup ever 😍',
 "Congrates Ghana!! Great job u guys did.😍 But I still don't understand why did the referee decided that way. 🥲",
 '@hassan.ahmed.07 6 Games 5 goals 3 assist now😍',
 "@el_dado1004 ya ya ya, croatia lost to spain 6-0 in 2018. You are the kid who couldn't handle the facts and couldnt help yourself but to run your fingers through my comment. Stay mad bestie 😍",
 'Congratulations to Japan, Morocco, Brazil, South Korea and Qatar 😍',
 'France ❤️😍and Argentina 😍❤️❤️',
 'Follow @__footycontent_ 😍',
 "Looking forward to Hakimi vs MBappe'😍❤️ May the best win!"]

In [124]:
en_linkage_matrix[en_linkage_matrix['cluster'] == 8]['text'].tolist()

['Dude… YOUR team was so rude and had poor sportsmanship. This new generation think they are entitled😡 Any other country would have been honored to be in the world cup to play on the same field with these GREAT players from 🇭🇷 \n\nI hope this humbles them. Canada player were acting like little brats. Croatia shut them up and put them on time out. Literally OUT😂💪🏻🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷🏆🏆🏆🏆⚽️⚽️⚽️🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷🇭🇷',
 'I know for sure I’ll be missing the World Cup , No game days already feel weird 😒',
 'Hii dear mera name is Hansraj hai main jaipur se hu main apni study ke sath sath online partime work pichhle 2 months se kar raha hu agar aap bhi apni study ke saath saath partime ya full time work karna chahte ho contact me 6375438113 know the details 💯🚀🤔🤔',
 'Meet you in final🙌🏻',
 '@yousselhak ok will see with France 😝😘',
 'Cancel this game. 😒😒',
 'The best and only goat 🐐',
 'CR7 ❤️🙌',
 "@manas.f.rollins bruh that's the Belgium flag 🤦",
 'Who’s that first guy? His answer ✨',
 '@kunchan_13 And th

## Spanish

In [109]:
pip install googletrans==4.0.0-rc1

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
Collecting httpx==0.13.3
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[K     |████████████████████████████████| 55 kB 9.8 MB/s  eta 0:00:01
[?25hCollecting chardet==3.*
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 13.1 MB/s eta 0:00:01
[?25hCollecting hstspreload
  Downloading hstspreload-2023.1.1-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 34.8 MB/s eta 0:00:01
[?25hCollecting idna==2.*
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 20.3 MB/s eta 0:00:01
Collecting httpcore==0.9.*
  Downloading httpcore-0.9.1-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 8.8 MB/s  eta 0:00:01
[?25hCollecting rfc3986<2,>=1.3
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)
Collecting h11<0.10,>=0.8
  Downloading h11-0.9.0-py2

In [110]:
from googletrans import Translator

# Initialize the translator
translator = Translator()

# Function to translate a text
def translate_text(text, target_language='en'):
    try:
        translation = translator.translate(text, dest=target_language)
        return translation.text
    except Exception as e:
        return str(e)

In [111]:
es_linkage_matrix = apply_hierarchical_clustering(es_embeddings, es_df)

es_linkage_matrix['translated_text'] = es_linkage_matrix['text'].apply(translate_text, target_language='en')

es_linkage_matrix

Unnamed: 0,text,cluster,translated_text
255,y mechico?😢,1,and mechico? 😢
497,Jamás pensé que un mundial del cuál tantas per...,1,I never thought that a World Cup from which so...
190,@fifaworldcup Ecuador se merecía el pase 😢,1,@fifaworldcup Ecuador deserved the pass 😢
250,Aún duele😢,1,It still hurts
474,Casillas lloron😢,1,Casillas Lloron😢
...,...,...,...
275,Pride? 🏳️‍🌈?,7,Pride? 🏳️‍🌈?
558,Julian & Enzo 🇦🇷💙,7,Julian & Enzo 🇦🇷💙
433,Cómo planio😂🙌,7,How plan
431,"Está todo cocinado , Messi campeón último mund...",7,"Everything is cooked, Messi World Cup Champion 🤑🤫"


In [112]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 1]['translated_text'].tolist()

['and mechico? 😢',
 'I never thought that a World Cup from which so many people expected to see quality is being so manipulated and fixed how it is, we wantedOut, even the referees seem bought because they have been a non -existent penalty to favor the teams they wanted to see in the final, how sad is the world of Qatar 2022 😢😢😢.',
 '@fifaworldcup Ecuador deserved the pass 😢',
 'It still hurts',
 'Casillas Lloron😢',
 'The final is played by 2 teams ...... What great disappointment this World Cup 😢',
 'You deserved to win Japan 😢',
 'How are my blacks shit 😢',
 '@guiguelpaulo 😢',
 '@mdsojid364 cries Brazilian 😢',
 'Neymar da Silva Santos Junior 😢',
 'Our last game in a World Cup 🇨🇴😢']

In [113]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 2]['translated_text'].tolist()

['It moves more a cockroach with Baygon😂',
 'Q pass the riot in Qatar 😂',
 '@lucaasperee Texas is worth pure vrga\nWe are from the most territory countries in the world\nBut in Las Malvinas you speak English 😂😂😂😂',
 'What happened is that the phenomenon of now no longer plays as before the cago😂😂😂😂',
 'Croatia 😂😂😂😂😂',
 '😂 Ronaldo',
 '@_ab_do_r.c.A Morocco outside the FIFA on day 6😂😂😂',
 'Ecuador made them danced and supposedly this was as the best player 😂 After they annul',
 '@Swapichi do this then and I believe you.😂',
 '@life_is_sedd How is that?😂',
 'Poor guy 😂',
 'Hahahaha or want to find the search engine 😂',
 "@diegooo_menendezz there was no error in the VAR, they don't cry dines😂😂😂😂",
 'I hope you support your own worldcup 😂',
 '@jomiguelrivera The goal of the century did it to Italy😂',
 '@b.local 😂😂😂👏',
 'Those of Saudi Arabia want to die were happy because they won the first game 😂😂😂 come on Argentina 🇦🇷🇦🇷',
 'Shit 😂😂😂😂😂😂😂😂',
 'Go there 😂',
 '@r_cotic27 your croatia taking be

In [114]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 3]['translated_text'].tolist()

['😍ñ🔥🔥🔥',
 '🔥🔥🔥👏🙌 mexico 🇲🇽',
 'The trios are always good bro 🔥😉',
 '@hilla_loaiza what level 🔥🔥🔥',
 "Messias returned 🇦🇷🇦🇷🇦🇷🇦🇷🇦🇷🇦🇷 Let's go selection 🔥🔥🔥",
 "What's up the one of this lord🔥",
 'It was an exquisite party 🔥',
 'Come on Argentina 🔥🔥',
 'Argentina 🔥💚',
 'Ronaldo🔥🔥🔥🔥',
 'Aidoun 🔥❤️',
 'CommOn argentina..you can do it 🔥❤️🔥',
 'Gano Argentina 🔥🔥🔥❤️❤️❤️',
 'Goncalo Ramos 🔥🔥👏👏',
 'The best in the world!❤️\u200d🔥❤️\u200d🔥',
 '@marqu3si yo bro chill 👏🔥',
 'Life changes when it begins to focus on you🎶🔥',
 'All my faith for Argentina 🔥',
 'Valencia🔥🔥🔥',
 'It will be to break the curse🔥🔥🔥',
 '@hoonjy you, if those shots did not get stuck in the Uffff stick🔥',
 '@sandra_cariel camereron 100% 🔥🔥',
 'Argentina, Alvares🔥',
 '@Borenia Grs !!!🔥🙌❤️ Soccer is football',
 '🔥Argentina.3 🔥 France 1🔥',
 'We love you @yosoy8a 🇲🇽🔥',
 'The master magician🔥🔥🤍👏🏻⚡',
 'Ronaldo🙌🔥',
 'With every teacher 🔥🇨🇷',
 'Argentina 😍❤️🔥🔥',
 'Roberto 🔥🔥',
 "Let's go Spain 🔥",
 'Argentina goes for the third World C

In [115]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 4]['translated_text'].tolist()

['What will Qatar do with so many stages after the World Cup?🤔',
 'The goal of the century 🇨🇷\U0001fae1',
 "🤏🤏🤏🤏🤏 I didn't want to lose the level ..",
 'the eldest 🤩🤩',
 'There is no other fans the same.The best in the world that proud to have been born in Argentina ❤️🇦🇷🇦🇷',
 "Argentina feed !!!!!Let's fuck bla ining",
 "Let's go Morocco ❤️",
 'How big is Lukita 🇭🇷❤',
 "Let's go Ecuador ❤️❤️",
 "Let's go Peru ❤️",
 '@Luisitorres_ Yes 🥵',
 '🤫 traki just, everything is still said',
 '❤️ What good steam France',
 "@lovely_angel_kh I don't understand you but Chill🤙",
 'Go Ecuador 🇪🇨❤️',
 '@ Jctorr130 1-1🤫🤫🤫🤫',
 'Paquetá 🥵',
 '🇦🇷 ♥ ️❤️ Airport, Modric.',
 '@jonathanardi_ The friad chest is coming 🥶 little is missing',
 'Change the flag, we are Ecuador 🇪🇨☝🏻',
 '@_Ompoficial__Alejandro who is the best, continue to celebrate ahead of time 🤫',
 '@bcaudana22 I wait until the World Cups of all the American continent selections somados less equal to the titles of Brazil \U0001fae1',
 'Morocco❤️',


In [116]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 5]['translated_text'].tolist()

["We are thicerly that if one can, they turn to when they left the death group in 2014 when they all gave them for dead and the dead turned out to be others.Let's go even if my team is USA but my heart won it in that 2014 World Cup.But it is my USA are you for me.❤️",
 'Mexican pride ❤️👏🏻🇲🇽',
 'Ecuador present 🇪🇨👏👏',
 'Maradona help Messi to achieve triumph ❤️👏👏👏',
 '👏👏We go with everything, Strong',
 '👏👏🙌🙌Ecuador',
 'Congratulations to Ecuador for winning the slaves 👏🏼👏🏼',
 'Moroccos👏👏👐👐',
 'Better publish the moments of canceled goals 👏👏',
 'The best in the world 👏🏻 😍',
 'Argentina 🇦🇷 Leo 👏👏👏👏',
 'Double breast 😂👏',
 'The best of all ❤️👏',
 '15 World Cups 👏👏👏',
 'A great my uncle Ril or 👏👏👏 @dorian_ybaceta @lopezillo.public @hugo_charcape03 @hanrykux03',
 'The best in Portugal 👏👏',
 '@hassan.ahmed.07 And how many penalties?that they have not even been fouls to El.👏👏',
 '@caro2653 👏👏👏',
 'Great the mathematician',
 '@Maxirodriguez11Oficial genius !!!👏👏👏👏💙🤍💙',
 'The best midfielder 👏🏻'

In [117]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 6]['translated_text'].tolist()

['😍ronaldo',
 'Proudly Ecuadorians 😍',
 'Eden hazard comes back😍😍😍',
 'gracias 😍',
 "I have always liked Ghana's uniforms😍",
 '❤️❤️❤️ located Ecuadorians 😍😍😍😍😍',
 'I really love my idol Portugal - Cristiano Ronaldo.😍',
 '@Btito_.Avak Best goalkeeper in the world, proceeds to lose 7 to 0😍😍😍',
 'Love the France shirt 😍😍😍 🇺🇾🇨🇵',
 'Lol Casillas!!! 😍😍']

In [118]:
es_linkage_matrix[es_linkage_matrix['cluster'] == 7]['translated_text'].tolist()

["@axl_sash3 I don't get arg.Am.But we are talking about Ecuador, right?The selection is today 💙 win or die.",
 '@Hichemourad13 Is this individual still thought about?Ecuador is great 🇪🇨😉😉',
 'Poetry in photos, poetry in motion🇦🇷🥂',
 "@fifaWorldcup 🇪🇨 He played much better 😎 They canceled a legitimate goal 😠 I had to have won the game 👏 But let's go 🇪🇨 If you can and you can always 💪",
 'For me it would also be like seeing Santa Claus!We love you Leo 🇦🇷⚽️💪🥰',
 '@letigallo My respect for women everywhere🙌 💕',
 '@carolinacbarbos Bono🇲🇦👊',
 'Pure trunks defending and another good coconut 🤷🏽\u200d♂️.',
 'The best player in the world 🌎 is called Messi ❤️🙌',
 '@Charlywolfph naaaaa, it is only an opinion that follows from logic and common sense ... I\'m not interested in being DT although I thank you very much ... "Machine" 👍🏼😏',
 'Pele 3 World Cups Argentina 2 🤣🤣🤣',
 'North Korea 😮 won',
 'ECUADOR IN DA HOUSE 🔥🔥🔥🔥🔥🔥🇪🇨🇪🇨🇪🇨🇪🇨🇪🇨🇪🇨🇪🇨🙌🙌🙌🙌🙌🙌🙌🙌 #ecuadorarrechohp',
 '"Maurice I can\'t move it, move 