# Analyze Real World Social Media Data: Bluesky

1. WEBSITE: [About Bluesky via Open Measures](https://openmeasures.io/platform)

In [1]:
import os
import sys

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# print(notebook_dir)
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

from data_processing import DataProcessing

## Load Data

In [2]:
base_path = os.path.join(notebook_dir, '../', 'data/open_measures/', 'bluesky_raw_data/')
files = os.listdir(base_path)

dfs = []

for file in tqdm(files):
    full_path = os.path.join(base_path, file)
    df = DataProcessing.load_from_file(full_path, 'csv')
    dfs.append(df)

dfs

100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 7/7 [00:00<00:00, 65.99it/s]


[     Unnamed: 0               $type                            author  \
 0             0  app.bsky.feed.post  did:plc:mcb6n67plnrlx4lg35natk2b   
 1             1  app.bsky.feed.post  did:plc:mcb6n67plnrlx4lg35natk2b   
 2             2  app.bsky.feed.post  did:plc:a2ijicgkwatuhlfss36lvwfx   
 3             3  app.bsky.feed.post  did:plc:mcb6n67plnrlx4lg35natk2b   
 4             4  app.bsky.feed.post  did:plc:mcb6n67plnrlx4lg35natk2b   
 ..          ...                 ...                               ...   
 651         651  app.bsky.feed.post  did:plc:cywy7vw3shrn7vp3ybgfrx33   
 652         652  app.bsky.feed.post  did:plc:nrr6yppar26qag7p2q3rawp7   
 653         653  app.bsky.feed.post  did:plc:m67kp6uoter7aeftq5nkzncm   
 654         654  app.bsky.feed.post  did:plc:eyaz2kbzyxmg5hgkhb3w7s25   
 655         655  app.bsky.feed.post  did:plc:5jycdvkvabnon545dxcisari   
 
                                                    cid  \
 0    zdpuAykzkHo8uZtsa4qZyQTtSePWRbHoKAJgxYwxqcrTx

## Data Processing

+ Data stored as dfs[df_1, df_2, ... n].
+ Concat all DFs to one.
+ `data_range_per_file` is the index 0 to n_1 belongs to df_1, 0_n_2 belongs to n_2, etc.

In [3]:
df = DataProcessing.concat_dfs(dfs)
df.rename(columns={"Unnamed: 0": "data_range_per_file"}, inplace=True)
df

Unnamed: 0,data_range_per_file,$type,author,cid,createdAt,embed,facets,indexedAt,sequence,text,...,langs,reply,tags,bridgyOriginalText,bridgyOriginalUrl,openmeasures_meta,rkey,rkey_str,Query Params,Site
0,0,app.bsky.feed.post,did:plc:mcb6n67plnrlx4lg35natk2b,zdpuAykzkHo8uZtsa4qZyQTtSePWRbHoKAJgxYwxqcrTxx6fi,2024-01-13T23:50:40.300023+00:00,"{'$type': 'app.bsky.embed.images', 'images': [...","[{'$type': 'app.bsky.richtext.facet', 'feature...",2024-01-13T23:50:40.805738,570168565.0,Here's the Top 10 Trending Words over the Past...,...,,,,,,,,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
1,1,app.bsky.feed.post,did:plc:mcb6n67plnrlx4lg35natk2b,zdpuAt42bPRcafad5PjKbsxSXBETMXPbP7Tb8H5Mu3NV78QCw,2024-01-15T04:20:38.045294+00:00,"{'$type': 'app.bsky.embed.images', 'images': [...","[{'$type': 'app.bsky.richtext.facet', 'feature...",2024-01-15T04:26:24.470523,574404975.0,Here's the Top 10 Trending Words over the Past...,...,,,,,,,,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
2,2,app.bsky.feed.post,did:plc:a2ijicgkwatuhlfss36lvwfx,bafyreibsxwnoso4kq6oltibk3xzbhqbjtlx7bmwxi73rs...,2024-01-17T18:52:28Z,"{'$type': 'app.bsky.embed.external', 'external...","[{'$type': 'app.bsky.richtext.facet', 'feature...",,,Does defense even matter in the NFL postseason...,...,,,,,,,,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
3,3,app.bsky.feed.post,did:plc:mcb6n67plnrlx4lg35natk2b,zdpuAuxtMWrau1PvrZQC7mC1DokTWBdYsK4mCJQyCnySk5aWb,2024-01-21T04:30:46.145649+00:00,"{'$type': 'app.bsky.embed.images', 'images': [...","[{'$type': 'app.bsky.richtext.facet', 'feature...",2024-01-21T04:30:47.004301,596638410.0,Here's the Top 10 Trending Words over the Past...,...,,,,,,,,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
4,4,app.bsky.feed.post,did:plc:mcb6n67plnrlx4lg35natk2b,zdpuArf8hkuDPBSJgJcwyhc1Du712poHWk73dRam3TuVDzgy4,2024-01-21T23:20:45.897184+00:00,"{'$type': 'app.bsky.embed.images', 'images': [...","[{'$type': 'app.bsky.richtext.facet', 'feature...",2024-01-21T23:20:46.653710,599632070.0,Here's the Top 10 Trending Words over the Past...,...,,,,,,,,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4516,625,app.bsky.feed.post,did:plc:cywy7vw3shrn7vp3ybgfrx33,bafyreian7dv3sdwf35zyhpix3zknyu4c2omfo2qsbvxhs...,2025-02-08T18:53:17.000Z,"{'$type': 'app.bsky.embed.external', 'external...",,,,,...,,,,"<figure>\n <img alt=""Josh Allen, Buffalo ...",https://www.sbnation.com/golf/2025/2/8/2436163...,"{'collected_by': 'jetstream', 'last_scraped_ts...",,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
4517,626,app.bsky.feed.post,did:plc:nrr6yppar26qag7p2q3rawp7,bafyreigsq3sbskb7hwcafektwr7p5i75v2gwgd7a675ii...,2025-02-08T20:00:00.000Z,"{'$type': 'app.bsky.embed.external', 'external...",,,,,...,,,['Esportes'],‚ÄúCom minha camiseta dos Eagles pendurada na po...,https://tribunaonline.com.br/esportes/taylor-s...,"{'collected_by': 'jetstream', 'last_scraped_ts...",,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
4518,627,app.bsky.feed.post,did:plc:m67kp6uoter7aeftq5nkzncm,bafyreigxo3xazujg34iw32q72dphpcsen665hebk4dwsr...,2025-02-08T22:00:00.720Z,"{'$type': 'app.bsky.embed.images', 'images': [...",[{'features': [{'$type': 'app.bsky.richtext.fa...,,,The Chiefs' George Karlaftis ranks No. 1 in pr...,...,,,,,,"{'collected_by': 'jetstream', 'last_scraped_ts...",,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
4519,628,app.bsky.feed.post,did:plc:eyaz2kbzyxmg5hgkhb3w7s25,bafyreigtzpa4bsolb5qkuc35735wcxgr4xsee3q46hgax...,2025-02-08T22:12:51.918Z,"{'$type': 'app.bsky.embed.external', 'external...",[{'features': [{'$type': 'app.bsky.richtext.fa...,,,www.bothteamsplayhard.wordpress.com/2025/02/08...,...,['en'],,,,,"{'collected_by': 'jetstream', 'last_scraped_ts...",,,{'term': '(NFL OR nfl) AND (playoffs) AND (Sup...,bluesky
