In [1]:
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import IPython.display as display
from IPython.display import HTML

In [29]:
GITHUB_RENDER = False

In [62]:
def basic_line_plotly(plot_df, x, y, color_col, xt, yt, title, WIDTH, HEIGHT):
    fig = px.line(plot_df, x=x, y=y, color=color_col, labels={'x': xt, 'y': yt})
    fig.update_layout(title_x=0.5, width=WIDTH, height=HEIGHT, yaxis=dict(title="Count", tickformat=',d', 
            tickfont=dict(size=16, family="Courier New, monospace"),
            title_font=dict(size=16)), xaxis=dict(
            tickfont=dict(size=16, family="Courier New, monospace"),
            title_font=dict(size=16)
        ), xaxis_title_font=dict(size=16, family="Courier New, monospace"),
        yaxis_title_font=dict(size=16, family="Courier New, monospace"),
        legend=dict(
            font=dict(size=16, family="Courier New, monospace"),
            title_font=dict(size=16, family="Courier New, monospace"),
            orientation="h", 
            yanchor="bottom",
            y=-0.4, 
            xanchor="center",
            x=0.5
        ),
        margin=dict(b=100)
    )
    fig.update_xaxes(range=['2023-07-01', '2023-07-31'])
    if GITHUB_RENDER:
        image_bytes = pio.to_image(fig, format='png')
        display.display(display.Image(image_bytes))
    else:
        fig.show()

# Reddit

In [64]:
df = pd.read_csv('../out/new_filtered_df_60_final.csv', index_col=0)
df.head()

Unnamed: 0,reddit image,imkg image,score
0,meme/s96e69g5t7ab1.jpg,https://i.imgflip.com/2twnaq.jpg,0.868993
1,meme/e9gksc03f7ab1.jpg,https://i.imgflip.com/544pnd.jpg,0.730533
2,meme/t8jgr364z6ab1.jpg,https://i.imgflip.com/3h4dth.jpg,0.624141
3,meme/ivr37kvgl6ab1.jpg,https://i.imgflip.com/5secj1.jpg,0.657292
4,meme/wjgmc07y76ab1.png,https://i.imgflip.com/2xmijc.jpg,0.835818


In [65]:
imkg_templates = pd.read_csv('../graph/out/imkg_templates.tsv', sep='\t')
node1 = list(imkg_templates['node1'].values)
node2 = list(imkg_templates['node2'].values)
df_imkg_nodes = list(df['imkg image'].values)
df_imkg_nodes = [x.split('/')[-1].split('.')[0] for x in df_imkg_nodes]
back_dict = {}
final_dict = {}
for x, y in tqdm(zip(node1, node2)):
    back_dict[x.split(':')[-1]] = y

1326032it [00:01, 1295389.64it/s]


In [66]:
# Add timestamp information
modified_df = pd.DataFrame(columns=["reddit image", "imkg image", "score", "timestamp", "reddit score", "IMKG Class"])
for row in tqdm(df.iterrows()):
    reddit_image = row[1]['reddit image']
    ri_cat = reddit_image.split('/')[0]+"_transformed.csv"
    df_file = pd.read_csv('../data/final_files/s3_df_{}'.format(ri_cat))
    ts = df_file[df_file['image_location'] == reddit_image]['timestamp'].values[0]
    reddit_score = df_file[df_file['image_location'] == reddit_image]['score'].values[0]
    
    imkg_image = row[1]['imkg image']
    imkg_image = imkg_image.split('/')[-1].split('.')[0]
    imkg_image = back_dict[imkg_image].split('/')[-1]
    imkg_image = imkg_image.split(':')[-1]
    
    modified_df.loc[len(modified_df)] = [row[1]['reddit image'], row[1]['imkg image'], row[1]['score'], ts, reddit_score, imkg_image]

1953it [01:23, 23.33it/s]


In [67]:
modified_df.shape

(1953, 6)

In [68]:
modified_df.head()

Unnamed: 0,reddit image,imkg image,score,timestamp,reddit score,IMKG Class
0,meme/s96e69g5t7ab1.jpg,https://i.imgflip.com/2twnaq.jpg,0.868993,2023-07-05 21:32:17,4.0,Pimples-Zero
1,meme/e9gksc03f7ab1.jpg,https://i.imgflip.com/544pnd.jpg,0.730533,2023-07-05 20:13:27,6.0,Giga-Chad
2,meme/t8jgr364z6ab1.jpg,https://i.imgflip.com/3h4dth.jpg,0.624141,2023-07-05 18:44:06,10.0,BestBetter-Blurst
3,meme/ivr37kvgl6ab1.jpg,https://i.imgflip.com/5secj1.jpg,0.657292,2023-07-05 17:27:24,1.0,Gus-Fring-we-are-not-the-same
4,meme/wjgmc07y76ab1.png,https://i.imgflip.com/2xmijc.jpg,0.835818,2023-07-05 16:12:57,11.0,big-yellow-ball-and


In [69]:
modified_df['IMKG Class'].value_counts()

IMKG Class
Drake-Hotline-Bling                       43
Press-button-hard-choice                  30
Afraid-To-Ask-Andy                        30
Mother-Ignoring-Kid-Drowning-In-A-Pool    28
undertaker                                26
                                          ..
3rd-Place-Celebration                      1
1950s-Middle-Finger                        1
Shrek-Fiona-Harold-Donkey                  1
what-is-my-purpose                         1
salt-bae                                   1
Name: count, Length: 535, dtype: int64

In [70]:
# Popular IMKG class
popular_imkg_class = ["Drake-Hotline-Bling", "Press-button-hard-choice", "Afraid-To-Ask-Andy", \
                 "Mother-Ignoring-Kid-Drowning-In-A-Pool", "undertaker"]

In [71]:
# Filter rest of the classes
popular_df = modified_df[modified_df['IMKG Class'].isin(popular_imkg_class)]
popular_df.shape

(157, 6)

In [72]:
popular_df.head()

Unnamed: 0,reddit image,imkg image,score,timestamp,reddit score,IMKG Class
20,meme/ctm1aw9wkz9b1.jpg,https://i.imgflip.com/46hivo.jpg,0.801495,2023-07-04 17:51:43,6.0,Mother-Ignoring-Kid-Drowning-In-A-Pool
21,meme/013ku8ie7z9b1.jpg,https://i.imgflip.com/101or6.jpg,0.80086,2023-07-04 16:36:04,4.0,Afraid-To-Ask-Andy
23,meme/s1vcopevtx9b1.jpg,https://i.imgflip.com/431kiz.jpg,0.820349,2023-07-04 11:58:28,3238.0,Press-button-hard-choice
26,meme/x3c8vrscsu9b1.jpg,https://i.imgflip.com/101or6.jpg,0.65499,2023-07-04 01:44:38,22.0,Afraid-To-Ask-Andy
27,meme/zlrx6ymd3u9b1.jpg,https://i.imgflip.com/1gndfw.jpg,0.710332,2023-07-03 23:24:37,2.0,Drake-Hotline-Bling


## Daily trend analysis

In [73]:
popular_df['Date'] = pd.to_datetime(popular_df['timestamp'])
popular_df['Date'] = pd.to_datetime(popular_df['Date'])
popular_df.set_index('Date', inplace=True)
popular_weekly_df = popular_df.groupby('IMKG Class').resample('W-MON').size().reset_index(name='Counts')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [74]:
basic_line_plotly(popular_weekly_df, x='Date', y='Counts', \
                  color_col='IMKG Class', xt='Date', \
                  yt='Count', title='Temporal Spread of Memes in Reddit', \
                  WIDTH=1000, HEIGHT=600)

# Discord

In [75]:
df = pd.read_csv('../out/new_discord_filtered_df_60_final.csv', index_col=0)
df.head()

Unnamed: 0,discord image,imkg image,score,timestamp,channel
0,https://images-ext-1.discordapp.net/external/W...,https://i.imgflip.com/48k2zx.jpg,0.678281,2023-07-31T23:43:57.965+00:00,TheDungeon
1,https://images-ext-1.discordapp.net/external/D...,https://i.imgflip.com/4jf1mm.jpg,0.707667,2023-07-31T23:43:10.035+00:00,TheDungeon
2,https://images-ext-2.discordapp.net/external/v...,https://i.imgflip.com/3lklq9.jpg,0.890164,2023-07-31T23:26:23.419+00:00,TheDungeon
3,https://images-ext-2.discordapp.net/external/n...,https://i.imgflip.com/34apmh.jpg,0.609027,2023-07-31T22:36:52.764+00:00,TheDungeon
4,https://images-ext-2.discordapp.net/external/D...,https://i.imgflip.com/44u5fm.jpg,0.658732,2023-07-31T22:15:57.541+00:00,TheDungeon


In [76]:
df.shape

(1494, 5)

In [77]:
imkg_templates = pd.read_csv('../graph/out/imkg_templates.tsv', sep='\t')
node1 = list(imkg_templates['node1'].values)
node2 = list(imkg_templates['node2'].values)
df_imkg_nodes = list(df['imkg image'].values)
df_imkg_nodes = [x.split('/')[-1].split('.')[0] for x in df_imkg_nodes]
back_dict = {}
final_dict = {}
for x, y in tqdm(zip(node1, node2)):
    back_dict[x.split(':')[-1]] = y

1326032it [00:01, 1325912.19it/s]


In [78]:
# Add timestamp information
modified_df = pd.DataFrame(columns=["discord image", "imkg image", "score", "timestamp", "IMKG Class"])
for row in tqdm(df.iterrows()):
    imkg_image = row[1]['imkg image']
    imkg_image = imkg_image.split('/')[-1].split('.')[0]
    imkg_image = back_dict[imkg_image].split('/')[-1]
    imkg_image = imkg_image.split(':')[-1]
    modified_df.loc[len(modified_df)] = [row[1]['discord image'], row[1]['imkg image'], row[1]['score'], row[1]['timestamp'], imkg_image]

1494it [00:01, 977.17it/s]


In [79]:
modified_df['IMKG Class'].value_counts()

IMKG Class
Drake-Hotline-Bling                               30
Press-button-hard-choice                          27
Afraid-To-Ask-Andy                                27
Happy--Shock                                      23
undertaker                                        20
                                                  ..
This-Little-Manuever-is-Gonna-Cost-us-51-Years     1
party-loner                                        1
Sad-Keanu                                          1
See-Nobody-Cares                                   1
Think-Mark-Think                                   1
Name: count, Length: 447, dtype: int64

In [80]:
# Popular IMKG class
popular_imkg_class = ["Drake-Hotline-Bling", "Press-button-hard-choice", "Afraid-To-Ask-Andy", \
                 "Happy--Shock", "undertaker"]

In [81]:
# Filter rest of the classes
popular_df = modified_df[modified_df['IMKG Class'].isin(popular_imkg_class)]
popular_df.shape

(127, 5)

In [82]:
popular_df.head()

Unnamed: 0,discord image,imkg image,score,timestamp,IMKG Class
1,https://images-ext-1.discordapp.net/external/D...,https://i.imgflip.com/4jf1mm.jpg,0.707667,2023-07-31T23:43:10.035+00:00,undertaker
2,https://images-ext-2.discordapp.net/external/v...,https://i.imgflip.com/3lklq9.jpg,0.890164,2023-07-31T23:26:23.419+00:00,Press-button-hard-choice
7,https://images-ext-1.discordapp.net/external/p...,https://i.imgflip.com/53y256.jpg,0.652669,2023-07-31T21:07:34.06+00:00,Happy--Shock
19,https://images-ext-1.discordapp.net/external/m...,https://i.imgflip.com/101or6.jpg,0.824065,2023-07-31T10:38:22.329+00:00,Afraid-To-Ask-Andy
33,https://images-ext-1.discordapp.net/external/s...,https://i.imgflip.com/1023mz.jpg,0.818152,2023-07-30T22:26:38.397+00:00,Afraid-To-Ask-Andy


## Daily trend analysis

In [83]:
popular_df['timestamp'] = popular_df['timestamp'].str.slice(0, 19)
popular_df['Date'] = pd.to_datetime(popular_df['timestamp'])
popular_df.set_index('Date', inplace=True)
popular_weekly_df = popular_df.groupby('IMKG Class').resample('W-MON').size().reset_index(name='Counts')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [84]:
basic_line_plotly(popular_weekly_df, x='Date', y='Counts', \
                  color_col='IMKG Class', xt='Date', \
                  yt='Count', title='Temporal Spread of Memes in Discord', \
                  WIDTH=1000, HEIGHT=600)