# Analyze Real World Social Media Data: Truth Social

In [1]:
import os
import sys
import json

import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# print(notebook_dir)
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

from data_processing import DataProcessing

In [49]:
pd.set_option('max_colwidth', 800)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

## Load Data

In [2]:
base_path = os.path.join(notebook_dir, '../', 'data/open_measures/', 'truth_social_raw_data/')
files = os.listdir(base_path)

dfs = []

for file in tqdm(files):
    full_path = os.path.join(notebook_dir, '../', 'data/open_measures/', 'truth_social_raw_data/', file)
    df = DataProcessing.load_from_file(full_path, 'csv')
    dfs.append(df)

dfs

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:00<00:00, 213.63it/s]


[     Unnamed: 0                                            account bookmarked  \
 0             0  {'acct': 'ImBackBitchs', 'display_name': 'Just...      False   
 1             1  {'acct': 'NFL', 'display_name': 'NFL', 'id': '...      False   
 2             2  {'acct': 'freelovepatriot', 'display_name': 'b...      False   
 3             3  {'acct': 'WillieO2', 'display_name': 'Billy O'...      False   
 4             4  {'acct': 'reneeras', 'display_name': 'Renee in...      False   
 ..          ...                                                ...        ...   
 155         155  {'acct': 'Artbyunclejunior', 'display_name': '...      False   
 156         156  {'acct': 'michaelfunk', 'display_name': 'Funks...      False   
 157         157  {'acct': 'vickieski', 'display_name': 'Vickie ...      False   
 158         158  {'acct': 'billrogers76', 'display_name': 'W A ...      False   
 159         159  {'acct': 'nicolespink70', 'display_name': 'Nik...      False   
 
              

## Data Processing

+ Data stored as dfs[df_1, df_2, ... n].
+ Concat all DFs to one.
+ `data_range_per_file` is the index 0 to n_1 belongs to df_1, 0_n_2 belongs to n_2, etc.

In [3]:
df = DataProcessing.concat_dfs(dfs)
df.rename(columns={"Unnamed: 0": "data_range_per_file"}, inplace=True)
df

Unnamed: 0,data_range_per_file,account,bookmarked,card,collected_by,content,content_cleaned,created_at,datatype,downvotes_count,...,labels,langs,reply,embeds,bridgyOriginalText,bridgyOriginalUrl,indexedAt,sequence,openmeasures_meta,rkey_str
0,0,"{'acct': 'ImBackBitchs', 'display_name': 'Just...",False,,smat-scrapy-crawlers,<p>I‚Äôm so tired of hearing about Colin Kaeper...,I‚Äôm so tired of hearing about Colin Kaepernic...,2022-04-24T20:52:32.230+00:00,post,0.0,...,,,,,,,,,,
1,1,"{'acct': 'NFL', 'display_name': 'NFL', 'id': '...",False,"{'author_name': '', 'author_url': '', 'blurhas...",smat-scrapy-crawlers,<p>Burrow: Bengals 'know what it takes' now to...,Burrow: Bengals 'know what it takes' now to wi...,2022-05-17T20:50:14.967+00:00,post,0.0,...,,,,,,,,,,
2,2,"{'acct': 'freelovepatriot', 'display_name': 'b...",False,"{'author_name': '', 'author_url': '', 'blurhas...",smat-scrapy-crawlers,"<p><a href=""https://www.instagram.com/reel/Cha...",https://www.instagram.com/reel/Cha-AWmst5v/?ig...,2022-09-07T17:26:19.589+00:00,post,0.0,...,,,,,,,,,,
3,3,"{'acct': 'WillieO2', 'display_name': 'Billy O'...",False,,smat-scrapy-crawlers,<p>The NFL will now play flag football in lieu...,The NFL will now play flag football in lieu of...,2022-09-26T23:32:51.382+00:00,post,,...,,,,,,,,,,
4,4,"{'acct': 'reneeras', 'display_name': 'Renee in...",False,,smat-scrapy-crawlers,<p>Well performance plays show otherwise. Jus...,Well performance plays show otherwise. Just w...,2022-10-03T01:47:01.368+00:00,comment,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
922,155,"{'acct': 'Artbyunclejunior', 'display_name': '...",False,"{'author_name': '', 'author_url': '', 'blurhas...",smat-scrapy-crawlers,<p>Washington Red Skins&apos; Playoff Run Ends...,Washington Red Skins&apos; Playoff Run Ends in...,2025-01-27T17:29:45.435+00:00,comment,,...,,,,,,,,,,
923,156,"{'acct': 'michaelfunk', 'display_name': 'Funks...",False,,smat-scrapy-crawlers,<p>Don‚Äôt get to excited buddy. The refs will m...,Don‚Äôt get to excited buddy. The refs will make...,2025-01-27T20:32:47.859+00:00,comment,0.0,...,,,,,,,,,,
924,157,"{'acct': 'vickieski', 'display_name': 'Vickie ...",False,,smat-scrapy-crawlers,"<p>For some time, I feel the message to the pu...","For some time, I feel the message to the publi...",2025-02-08T00:19:42.311+00:00,post,,...,,,,,,,,,,
925,158,"{'acct': 'billrogers76', 'display_name': 'W A ...",False,,smat-scrapy-crawlers,<p>It is just boys playing a gameüèàüòú<br/>How mu...,It is just boys playing a gameüèàüòúHow much do NF...,2025-02-08T07:08:42.339+00:00,post,,...,,,,,,,,,,


In [None]:
def user_json_to_df(df):
    """
    Users in 'account' col is stored as str'json', so need to convert and split the keys : cols and values to entries.
    
    """
    users = []
    error_processing_users = []
    
    for idx, row in df.iterrows():
        account = row['account']
        content = row['content_cleaned']
        date_time = row['created_at']
        datatype = row['datatype']
        if idx <= 3:
            print(f"Index: {idx}\n\tAccount: {account}\n\tContent: {content}\n\tDate: {date_time}\n\tType of Post: {datatype}\n")
        
        try:
            json_to_str = account.replace("'", '"') # Convert single quotes to double quotes to make it valid JSON
            json_obj = json.loads(json_to_str) # Parse the JSON string
            user_series = pd.Series(json_obj)
            user_series['content'] = content
            update_date_time = pd.to_datetime(date_time)
            user_series['date_with_time'] = update_date_time
            user_series['type_of_post'] = datatype
            users.append(user_series)
        except ValueError as e:
            user_to_error = (account, e)
            error_processing_users.append(user_to_error)
            continue
        except AttributeError as e:
            user_to_error = (account, e)
            error_processing_users.append(user_to_error)
            continue
            
        users_df = pd.concat(users, axis=1)
    return users_df.T, error_processing_users

In [50]:
user_account_info_df, error_processing_users = user_json_to_df(df)
user_account_info_df

Index: 0
	Account: {'acct': 'ImBackBitchs', 'display_name': 'Just Another White Guy', 'id': '107911908216773717', 'username': 'ImBackBitchs'}
	Content: I‚Äôm so tired of  hearing about Colin Kaepernick he‚Äôs not good enough to win a Super Bowl he‚Äôs not good enough to make the playoffs has nothing to do with his skin color he sucks also why would he want to be a slave didn‚Äôt he say NFL was a slave
	Date: 2022-04-24T20:52:32.230+00:00
	Type of Post: post

Index: 1
	Account: {'acct': 'NFL', 'display_name': 'NFL', 'id': '107771700550828158', 'username': 'NFL'}
	Content: Burrow: Bengals 'know what it takes' now to win https://www.espn.com/nfl/story/_/id/33934040/joe-burrow-says-cincinnati-bengals-know-takes-now-win-run-super-bowl
	Date: 2022-05-17T20:50:14.967+00:00
	Type of Post: post

Index: 2
	Account: {'acct': 'freelovepatriot', 'display_name': 'boomhauer88', 'id': '107848917774818876', 'username': 'freelovepatriot'}
	Content: https://www.instagram.com/reel/Cha-AWmst5v/?igshid=YmMy

Unnamed: 0,acct,display_name,id,username,content,date_with_time,type_of_post
0,ImBackBitchs,Just Another White Guy,107911908216773717,ImBackBitchs,I‚Äôm so tired of hearing about Colin Kaepernick he‚Äôs not good enough to win a Super Bowl he‚Äôs not good enough to make the playoffs has nothing to do with his skin color he sucks also why would he want to be a slave didn‚Äôt he say NFL was a slave,2022-04-24 20:52:32.230000+00:00,post
1,NFL,NFL,107771700550828158,NFL,Burrow: Bengals 'know what it takes' now to win https://www.espn.com/nfl/story/_/id/33934040/joe-burrow-says-cincinnati-bengals-know-takes-now-win-run-super-bowl,2022-05-17 20:50:14.967000+00:00,post
2,freelovepatriot,boomhauer88,107848917774818876,freelovepatriot,https://www.instagram.com/reel/Cha-AWmst5v/?igshid=YmMyMTA2M2Y=,2022-09-07 17:26:19.589000+00:00,post
3,WillieO2,Billy O,107845009115085392,WillieO2,"The NFL will now play flag football in lieu of the Pro Bowl, which in essence has become a game of touch football over the years anyway. The continued rule changes for roughing the QB, like landing on the QB with too much force, continue to make a mockery of the game of football. So, they may as well just put a flag belt on the QB's during league games, eliminate playoffs &amp; the Super Bowl, give everyone a participation trophy &amp; call it a day. All further adding to the pussification of America!!",2022-09-26 23:32:51.382000+00:00,post
4,reneeras,Renee in Texas,107854478793524619,reneeras,"Well performance plays show otherwise. Just wait, after this little time out, he will have something else happen and out again.When DAK came to the Cowboys, he was my favorite (even bought his jersey).Been too many disappointments. It would be great to see Dallas for playoffs and Super Bowl üôè",2022-10-03 01:47:01.368000+00:00,comment
...,...,...,...,...,...,...,...
469,Artbyunclejunior,ArtbyUncleJR,107834646100522511,Artbyunclejunior,"Washington Red Skins&apos; Playoff Run Ends in Defeat, but Begins a New Era. The Red Skins reflect on their season following their loss against the Philadelphia Eagles. Costly mistakes ended the Washington Red Skins&apos; Super Bowl dreams in a 55-23 loss at the hands of the Philadelphia Eagles during the 2024-2025 NFL Playoffs.Maybe the DeI mayor will learn how to spell soon: https://youtu.be/5s0pxJLM_7M?si=Ga1qZ7x6zZbl26gG",2025-01-27 17:29:45.435000+00:00,comment
470,michaelfunk,Funkster,107837755896978391,michaelfunk,Don‚Äôt get to excited buddy. The refs will make sure you guys don‚Äôt win the Super Bowl. That‚Äôs how the NFL rolls. They missed so many calls in the playoffs. They threatened the Bills with giving the Chefs a touchdown. Can you imagine if they did. You are a stand up guy. Kick some KC Ass. Please.,2025-01-27 20:32:47.859000+00:00,comment
471,vickieski,Vickie Dembinski,107834840758287063,vickieski,"For some time, I feel the message to the public which many people know, the NFL, NBA etc are all manipulated. Lower terms ""rigged"", anywhere there is big money, we all know who it is controlled by. Like the Romans said, ""Make them an arena"". Only a few big players, making the big money (hush money) get the play book way before the season starts and what is going to happen and to make it happen. I believe many college sports are also, esp football $$$ .. but does not involve the players, it involves the coaches and the college, based on making the right plays or plays to make it fail on purpose. Lets sit back and watch.. plus the Super Bowl is the highest human trafficking event all year. Since White Hats have been in control, they have been scooping up many criminals involved t...",2025-02-08 00:19:42.311000+00:00,post
472,billrogers76,W A R Liberty,107910638877822210,billrogers76,"It is just boys playing a gameüèàüòúHow much do NFL players get paid in the playoffs? The CBA also spells out how much players are paid in each *round of the postseason. Here are the üòµ‚Äçüí´bonuses for the 2024 regular season:\t‚Ä¢\tDivision winner: $54,500\t‚Ä¢\tWild Card/first-round *bye: $49,500\t‚Ä¢\tDivisional Round: $54,500\t‚Ä¢\tConference Championship: $77,000 \t‚Ä¢\tSuper Bowl *losing team: $96,000\t‚Ä¢\tSuper Bowl winning team: $171,000",2025-02-08 07:08:42.339000+00:00,post


In [47]:
error_processing_users

[('{\'acct\': \'EricLaraTrumpJr\', \'display_name\': "Hunter\'s Laptop", \'id\': \'108330713294059048\', \'username\': \'EricLaraTrumpJr\'}',
  json.decoder.JSONDecodeError("Expecting ',' delimiter: line 1 column 53 (char 52)")),
 ('{\'acct\': \'EricLaraTrumpJr\', \'display_name\': "Hunter\'s Laptop", \'id\': \'108330713294059048\', \'username\': \'EricLaraTrumpJr\'}',
  json.decoder.JSONDecodeError("Expecting ',' delimiter: line 1 column 53 (char 52)")),
 ('{\'acct\': \'EricLaraTrumpJr\', \'display_name\': "Hunter\'s Laptop", \'id\': \'108330713294059048\', \'username\': \'EricLaraTrumpJr\'}',
  json.decoder.JSONDecodeError("Expecting ',' delimiter: line 1 column 53 (char 52)")),
 ('{\'acct\': \'EricLaraTrumpJr\', \'display_name\': "Hunter\'s Laptop", \'id\': \'108330713294059048\', \'username\': \'EricLaraTrumpJr\'}',
  json.decoder.JSONDecodeError("Expecting ',' delimiter: line 1 column 53 (char 52)")),
 ('{\'acct\': \'EricLaraTrumpJr\', \'display_name\': "Hunter\'s Laptop", \'id\':

In [51]:
filt_comments = (user_account_info_df['type_of_post'] == 'comment')
user_comments_df = user_account_info_df[filt_comments]
user_comments_df

Unnamed: 0,acct,display_name,id,username,content,date_with_time,type_of_post
4,reneeras,Renee in Texas,107854478793524619,reneeras,"Well performance plays show otherwise. Just wait, after this little time out, he will have something else happen and out again.When DAK came to the Cowboys, he was my favorite (even bought his jersey).Been too many disappointments. It would be great to see Dallas for playoffs and Super Bowl üôè",2022-10-03 01:47:01.368000+00:00,comment
5,KnightinArmor,KnightinArmor,108338464491105640,KnightinArmor,NFL &amp; NBA are DOA to us....some college football every other weekend - that &amp; a few bowl game playoffs. College basketball tourney for the Cinderella factor !,2022-11-15 21:42:54.197000+00:00,comment
11,FJIFJB,FJB,107835286059680530,FJIFJB,"The playoffs to the Super Bowl Sunday ü§î NFL &amp; media are starting to worry! They can NO LONGER hide the truth about how unsafe the jabs are! So many are finally waking up, tho very sad for those who actually took the jabs. üò¢ so many vaxxed are injured &amp; many #suddendeath üò°",2023-01-05 13:48:13.019000+00:00,comment
14,RealNYALLO,STOP STUPIDING,107854388557166965,RealNYALLO,"Doug Peterson brings PHI it‚Äôs 1st SUPERBOWL championship ever! Builds the deepest roster in the league. And he‚Äôs fired for it for not being the right optic. So he goes to JAC and takes a 1-15 team to the playoffs. PHI, even in their moment of such success this year, should be ashamed of themselves for their lack of gratitude and appreciation. I will boycott the Super Bowl if PHI plays KC and ‚Äúfuture HOFer Pat Mahommes‚Äù as ESPN anointed him in his first year as a starter. So DISRESPECTFUL!",2023-01-08 04:45:08.094000+00:00,comment
24,systemaze,systemaze,108476853925128734,systemaze,I bet everyone in the NFL playoffs and Super Bowl will be required to take ivermectin and sign an NDA.,2023-01-15 02:48:40.026000+00:00,comment
...,...,...,...,...,...,...,...
448,WinstonO1984,WinstonO1984,108320349986512790,WinstonO1984,"Just like in Politics &quot;It&apos;s the Economy Stupid.&quot; For the NFL &quot;It&apos;s the Ratings Stupid&quot;. BIG Ratings = BIG $. BIG Stories = BIG $. BIG Markets with Popular Teams = BIG $. We will NEVER see a Jacksonville Jaguars vs Arizona Cardinals Super Bowl. It is not Plays, or Players, or even Teams...IT IS OFFICIATING. Prime Example- Detroit Lions last second Touchdown, Beating Dallas Cowboys...being called back..Dallas Wins, the Officiating crew moves on in the Playoffs. Kinda hard to beat the opposing team, and the officials...especially when you can call HOLDING or PASS INTERFERENCE on ANY Given Play!",2024-02-17 21:26:34.540000+00:00,comment
452,guessonly,,110995789501741229,guessonly,"MAYOR DUGGAN: Detroit just hosted the largest NFL Draft in history, the Tigers are back in the playoffs, the Lions are headed to the Super Bowl, crime is down and our population is growing. Lots of cities should be like Detroit. And we did it all without Trump‚Äôs help.",2024-10-10 23:29:44.501000+00:00,comment
469,Artbyunclejunior,ArtbyUncleJR,107834646100522511,Artbyunclejunior,"Washington Red Skins&apos; Playoff Run Ends in Defeat, but Begins a New Era. The Red Skins reflect on their season following their loss against the Philadelphia Eagles. Costly mistakes ended the Washington Red Skins&apos; Super Bowl dreams in a 55-23 loss at the hands of the Philadelphia Eagles during the 2024-2025 NFL Playoffs.Maybe the DeI mayor will learn how to spell soon: https://youtu.be/5s0pxJLM_7M?si=Ga1qZ7x6zZbl26gG",2025-01-27 17:29:45.435000+00:00,comment
470,michaelfunk,Funkster,107837755896978391,michaelfunk,Don‚Äôt get to excited buddy. The refs will make sure you guys don‚Äôt win the Super Bowl. That‚Äôs how the NFL rolls. They missed so many calls in the playoffs. They threatened the Bills with giving the Chefs a touchdown. Can you imagine if they did. You are a stand up guy. Kick some KC Ass. Please.,2025-01-27 20:32:47.859000+00:00,comment


In [52]:
filt_posts = (user_account_info_df['type_of_post'] == 'post')
user_posts_df = user_account_info_df[filt_posts]
user_posts_df

Unnamed: 0,acct,display_name,id,username,content,date_with_time,type_of_post
0,ImBackBitchs,Just Another White Guy,107911908216773717,ImBackBitchs,I‚Äôm so tired of hearing about Colin Kaepernick he‚Äôs not good enough to win a Super Bowl he‚Äôs not good enough to make the playoffs has nothing to do with his skin color he sucks also why would he want to be a slave didn‚Äôt he say NFL was a slave,2022-04-24 20:52:32.230000+00:00,post
1,NFL,NFL,107771700550828158,NFL,Burrow: Bengals 'know what it takes' now to win https://www.espn.com/nfl/story/_/id/33934040/joe-burrow-says-cincinnati-bengals-know-takes-now-win-run-super-bowl,2022-05-17 20:50:14.967000+00:00,post
2,freelovepatriot,boomhauer88,107848917774818876,freelovepatriot,https://www.instagram.com/reel/Cha-AWmst5v/?igshid=YmMyMTA2M2Y=,2022-09-07 17:26:19.589000+00:00,post
3,WillieO2,Billy O,107845009115085392,WillieO2,"The NFL will now play flag football in lieu of the Pro Bowl, which in essence has become a game of touch football over the years anyway. The continued rule changes for roughing the QB, like landing on the QB with too much force, continue to make a mockery of the game of football. So, they may as well just put a flag belt on the QB's during league games, eliminate playoffs &amp; the Super Bowl, give everyone a participation trophy &amp; call it a day. All further adding to the pussification of America!!",2022-09-26 23:32:51.382000+00:00,post
6,TruthSports,TruthSports,107837755353057899,TruthSports,Biggest Question Each NFL Super Bowl Contender Needs to Answer Before the Playoffshttps://bleacherreport.com/articles/10056596-biggest-question-each-nfl-super-bowl-contender-needs-to-answer-before-the-playoffs,2022-11-24 16:48:00.761000+00:00,post
...,...,...,...,...,...,...,...
466,streetglidepohl,john,107856236890130785,streetglidepohl,RT @PowerDownMediaAlex Marlow: Follow the Money‚ÄîNFL Rigging Playoffs for the Chiefs So Taylor Swift Will Attend Super Bowl https://www.breitbart.com/clips/2025/01/24/alex-marlow-follow-the-money-nfl-rigging-playoffs-for-the-chiefs-so-taylor-swift-will-attend-super-bowl/,2025-01-26 21:00:44.119000+00:00,post
467,Gutterman70,Gutterman70,112994154379087050,Gutterman70,"C'mon NFL is a rigged thru sports betting bullshit sport, the Philadelphia Eagles should be crushing jerkoff Washington (Assholes) Redskins Oops Commanders, because Traitor Joe Duh Biden name changed after his crazy dog üêï, the Eagles are allowing the commander's to score it's so obvious, fu NFL and like in 60s, 70s, 80s, 90s teams that where heading into playoffs played their 1st string players all the way to Super Bowl üèà, after Kansas City, pulled there best players in playoffs first game, to avoid any injuries f that, I am also sick of bad refs, and all the bullshit celebration after scoring, overpaid jerkoffs, and stop the sports betting its obviously the issue, do it or lose fans, KABISH",2025-01-26 22:36:52.972000+00:00,post
468,victoria_56,Victoria,109875466344682540,victoria_56,I hear NFL playoffs are rigged and the two teams going to the Super Bowl will be the Eagles and Chiefs. I would like to see the Eagles and Bills,2025-01-26 23:41:34.560000+00:00,post
471,vickieski,Vickie Dembinski,107834840758287063,vickieski,"For some time, I feel the message to the public which many people know, the NFL, NBA etc are all manipulated. Lower terms ""rigged"", anywhere there is big money, we all know who it is controlled by. Like the Romans said, ""Make them an arena"". Only a few big players, making the big money (hush money) get the play book way before the season starts and what is going to happen and to make it happen. I believe many college sports are also, esp football $$$ .. but does not involve the players, it involves the coaches and the college, based on making the right plays or plays to make it fail on purpose. Lets sit back and watch.. plus the Super Bowl is the highest human trafficking event all year. Since White Hats have been in control, they have been scooping up many criminals involved t...",2025-02-08 00:19:42.311000+00:00,post
