In [1]:
import os
import sys
import requests
import json

import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from tqdm import tqdm

AXIS_FONT_SIZE = 16

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# print(notebook_dir)
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

from data_processing import DataProcessing
from real_data_acquisition import OpenMeasuresDirector
from text_generation_models import TextGenerationModelFactory

In [2]:
pd.set_option('max_colwidth', 800)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

## Create prompt

In [3]:
prediction_properties = """a prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:

    1. <p_s>, any source entity in the sports domain.
        - Can be a person (with a name) or a sports domain person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc), civilian.
        - Can only be an organization that is associated with the sports prediction.
    2. <p_t>, any target entity in the sports domain.
	    - Can be a person (with a name) or a sports person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc).
        - Can only be an organization that is associated with the sports prediction.
    3. <p_d>, date or time range when <p> is expected to come to fruition or when one should observe the <p>.
        - Forecast can range from a second to anytime in the future.
        - Answers the questions: "How far to go out from today?" or "Where to stop?".
    4. <p_o>, sports prediction outcome.
        - Details relevant details such as outcome, a quantifiable metric, or slope.
        - Some example outcomes are the following: score, touchdown, goal, points, win, lose, etc.
"""

prediction_structures = """Here are how some sports predictions are structured:
    - sports template 1: <p_s> forecasts that the <p_o> at <p_t> potentially decrease in <p_d>.
    - sports template 2: On <p_d>, <p_s> speculates the <p_o> at <p_t> will likely increase.
    - sports template 3: <p_s> predicts on <p_d>, the <p_t> <p_o> may rise.
    - sports template 4: According to <p_s>, the <p_o> at <p_t> would fall in <p_d>.
    - sports template 5: In <p_d>, <p_s> envisions that <p_t> <p_o> has some probability to remain stable.
    - sports template 6: <p_t> <p_o> should stay same <p_d>, according to <p_s>. 
"""

sport_examples = """Here are some corresponding examples of sports predictions:
    - sport examples for template 1:
        1. Coach Lisa Martinez predicts that the touchdown rate at the Miami Dolphins will fall in 2020 of October.
        2. Analyst Mark Johnson forecasts that the goal average at Manchester United will stay the same in November 2025.
        3. Ryan forecasts win percentage he has for soccer will go up in 12/25/2016.
    - sport examples for template 2:
        1. On Sep 20, 2100, Coach Maria Lopez suggests that the score average at the Chicago Bulls is climbing.
        2. On 9/12/2025, Analyst David Kim anticipates the touchdown rate at the Kansas City Chiefs will likely surge.
        3. On October 8, 2123, Detravious foresees that the win probability he has for rugby is expected to trend downward.
    - sport examples for template 3:
        1. Coach Elena Ruiz predicts on 9/22/2025, the goal count at Real Madrid will climb.
        2. Analyst Marcus Lee forecasts that on Sep 30, 2055, the point average at the Golden State Warriors will be higher.
        3. George Jr. estimates that on October 15, 2035, the win ratio for games he has will disimprove.
    - sport examples for template 4:
        1. According to Coach Sarah Nguyen, the scoring average at the Dallas Mavericks is expected to dip in Sep 2021.
        2. According to Analyst Trevor Simmons, the touchdown rate at the Green Bay Packers will increase in 10/2025.
        3. According to Manchester United, the win percentage at Manchester United is projected to drop in October 2034.
    - sport examples for template 5:
        1. In 9/2025, Coach Miguel Torres envisions that the goal average at Paris Saint-Germain will hold steady.
        2. In October 2056, Analyst Fiona Bennett anticipates that the win rate at the Toronto Raptors will decrease slightly.
        3. In Sep 2086, Calvin foresees that the points per game he has in football will gradually increase.
    - sport examples for template 6:
        1. The goal count at Liverpool FC will surge in Sep 2012, according to Coach Daniel Alvarez.
        2. The win percentage at the Chicago Bears will taper off in October 2025, according to Analyst Priya Sharma.
        3. The scoring average on Arnolds footbal team will remain steady in 10/2034, according to Arnold.
"""

sport_requirements = """- Should be based on real-world sports.
    - Suppose the time when <p> was made is during any season of sports.
    - Include reports from all sports professionals, coaches, or any type of sport entity.
"""

initial_query_string = """(NFL OR nfl) AND (playoffs) AND (Super Bowl LIX)"""

In [4]:
prompt = f"""Generate a query string using boolean logic and keywords (related to sports predictions) to search a database. I define {prediction_properties} 
{prediction_structures}
{sport_examples}
These sports predictions can be found in social media data at large. My task here is to query the site to find relatable sentences (that aren't predictions) and prediction sentences). 
My initial query string: {initial_query_string}. Don't use brackets to wrap words nor to use quotation marks to wrap words. 
I need you to generate an improved (better prediction precision) query string taking into consideration the above along with {sport_requirements} \n Don't generate anything other than a new/imporved query string!
"""
prompt

'Generate a query string using boolean logic and keywords (related to sports predictions) to search a database. I define a prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:\n\n    1. <p_s>, any source entity in the sports domain.\n        - Can be a person (with a name) or a sports domain person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc), civilian.\n        - Can only be an organization that is associated with the sports prediction.\n    2. <p_t>, any target entity in the sports domain.\n\t    - Can be a person (with a name) or a sports person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc).\n        - Can only be an organization that is associated with the sports prediction.\n    3. <p_d>, date or time range when <p> is expected to come to fruition or when one should observe the <p>.\n        - Forecast 

## Query for data

- For query string, have user define `initial_query_string` or have any LLM in `text_generation_models.py` to generate via the prompt. Either way, the system is set up for user feedback. With this, check the query string (and url for data). If good with it type 'agree'. If not, add details. The details will append to old prompt.

In [9]:
# Configuring parameters
terms_for_query = OpenMeasuresDirector
query_string_by = 'user'
limit = 1000
# NFL playoffs (The NFL playoffs for the 2024 season began on January 11, 2025, and concluded with Super Bowl LIX on February 9 at Caesars Superdome in New Orleans, Louisiana | WIKI)
since = '2024-09-05'
until = '2025-02-09' 
esquery = 'query_string' # Elasticsearch across all fields

# sites = ["tiktok_comment", "bluesky", "truth_social"]
# sites = ["truth_social"]
sites = ["bluesky", "truth_social"]
hits_per_site_dfs = []
for site in sites:
    hits_for_site_df = OpenMeasuresDirector.construct_from_dataset(query_string=initial_query_string, query_string_by=query_string_by, limit=limit, site=site, start_date=since, end_date=until, querytype=esquery)
    hits_per_site_dfs.append(hits_for_site_df)

### RESET ###
### USER SPECIFY QUERY STRINGS ###
	Query String: (<class 'str'>, '(NFL OR nfl) AND (playoffs) AND (Super Bowl LIX)')

### SET QUERY ###
	Query's URL: http://api.smat-app.com/content?term=(NFL OR nfl) AND (playoffs) AND (Super Bowl LIX)&limit=1000&site=bluesky&since=2024-09-05&until=2025-02-09&querytype=query_string

### GET RAW HITS ###
	Hits: 200
Hits retrieved:
                  $type                            author  \
0    app.bsky.feed.post  did:plc:kqbsolnzpw3ixx2ea65qfd3s   
1    app.bsky.feed.post  did:plc:2ita5o32hmjxhnzg4a7sxe73   
2    app.bsky.feed.post  did:plc:37lm46sbrzz622ij4vucrceo   
3    app.bsky.feed.post  did:plc:pn7bt5fej5fep3uywocvak5d   
4    app.bsky.feed.post  did:plc:ltt57jg2sdeqzyqikirkf66s   
..                  ...                               ...   
625  app.bsky.feed.post  did:plc:cywy7vw3shrn7vp3ybgfrx33   
626  app.bsky.feed.post  did:plc:nrr6yppar26qag7p2q3rawp7   
627  app.bsky.feed.post  did:plc:m67kp6uoter7aeftq5nkzncm   
628  app.

In [10]:
hits_per_site_dfs

[[                  $type                            author  \
  0    app.bsky.feed.post  did:plc:kqbsolnzpw3ixx2ea65qfd3s   
  1    app.bsky.feed.post  did:plc:2ita5o32hmjxhnzg4a7sxe73   
  2    app.bsky.feed.post  did:plc:37lm46sbrzz622ij4vucrceo   
  3    app.bsky.feed.post  did:plc:pn7bt5fej5fep3uywocvak5d   
  4    app.bsky.feed.post  did:plc:ltt57jg2sdeqzyqikirkf66s   
  ..                  ...                               ...   
  625  app.bsky.feed.post  did:plc:cywy7vw3shrn7vp3ybgfrx33   
  626  app.bsky.feed.post  did:plc:nrr6yppar26qag7p2q3rawp7   
  627  app.bsky.feed.post  did:plc:m67kp6uoter7aeftq5nkzncm   
  628  app.bsky.feed.post  did:plc:eyaz2kbzyxmg5hgkhb3w7s25   
  629  app.bsky.feed.post  did:plc:5jycdvkvabnon545dxcisari   
  
                                                                                                                                                                                                                                                 

- in json i'm saving collect more metadataa - use all cols given from each site
1. time for each query using query process with Dr. Grant


In [11]:
hits_per_site_dfs

[[                  $type                            author  \
  0    app.bsky.feed.post  did:plc:kqbsolnzpw3ixx2ea65qfd3s   
  1    app.bsky.feed.post  did:plc:2ita5o32hmjxhnzg4a7sxe73   
  2    app.bsky.feed.post  did:plc:37lm46sbrzz622ij4vucrceo   
  3    app.bsky.feed.post  did:plc:pn7bt5fej5fep3uywocvak5d   
  4    app.bsky.feed.post  did:plc:ltt57jg2sdeqzyqikirkf66s   
  ..                  ...                               ...   
  625  app.bsky.feed.post  did:plc:cywy7vw3shrn7vp3ybgfrx33   
  626  app.bsky.feed.post  did:plc:nrr6yppar26qag7p2q3rawp7   
  627  app.bsky.feed.post  did:plc:m67kp6uoter7aeftq5nkzncm   
  628  app.bsky.feed.post  did:plc:eyaz2kbzyxmg5hgkhb3w7s25   
  629  app.bsky.feed.post  did:plc:5jycdvkvabnon545dxcisari   
  
                                                                                                                                                                                                                                                 

In [20]:
# tiktok_dfs = hits_per_site_dfs[0]
# tiktok_df = DataProcessing.concat_dfs(tiktok_dfs)

bluesky_dfs = hits_per_site_dfs[0]
bluesky_df = DataProcessing.concat_dfs(bluesky_dfs)

true_social_dfs = hits_per_site_dfs[1]
true_social_df = DataProcessing.concat_dfs(true_social_dfs)
true_social_df['text'] = true_social_df['content_cleaned']
true_social_df.tail(3)

Unnamed: 0,account,bookmarked,card,collected_by,content,content_cleaned,created_at,datatype,downvotes_count,emojis,...,tv,reblog,editable,edited_at,version,in_reply_to,title,openmeasures_media,Query Params,Site
20,"{'acct': 'vickieski', 'display_name': 'Vickie Dembinski', 'id': '107834840758287063', 'username': 'vickieski'}",False,,smat-scrapy-crawlers,"<p>For some time, I feel the message to the public which many people know, the NFL, NBA etc are all manipulated. Lower terms ""rigged"", anywhere there is big money, we all know who it is controlled by. Like the Romans said, ""Make them an arena"". Only a few big players, making the big money (hush money) get the play book way before the season starts and what is going to happen and to make it happen. I believe many college sports are also, esp football $$$ .. but does not involve the players, it involves the coaches and the college, based on making the right plays or plays to make it fail on purpose. Lets sit back and watch.. plus the Super Bowl is the highest human trafficking event all year. Since White Hats have been in control, they have been scooping up many criminals involve...","For some time, I feel the message to the public which many people know, the NFL, NBA etc are all manipulated. Lower terms ""rigged"", anywhere there is big money, we all know who it is controlled by. Like the Romans said, ""Make them an arena"". Only a few big players, making the big money (hush money) get the play book way before the season starts and what is going to happen and to make it happen. I believe many college sports are also, esp football $$$ .. but does not involve the players, it involves the coaches and the college, based on making the right plays or plays to make it fail on purpose. Lets sit back and watch.. plus the Super Bowl is the highest human trafficking event all year. Since White Hats have been in control, they have been scooping up many criminals involved t...",2025-02-08T00:19:42.311+00:00,post,,[],...,,,,,,,,"[{'_hash': '00d0f292df771a8839799409f19c9fddc9b94101', 'thumbnail_hash': '2869e7dc97b3797481e043643dd0ad8fca89e31a', 'thumbnail_mimetype': 'image/jpeg', 'mimetype': 'image/jpeg', 'source_id': '113965398687669786', 'source_url': 'https://static-assets-1.truthsocial.com/tmtg:prime-ts-assets/media_attachments/files/113/965/398/687/669/786/original/335b082a95bbbd03.jpg', 'enrichments': [{'service': 'blip', 'calculated_date': '2025-02-08T06:36:03.195116+00:00', 'type': 'image_caption', 'value': 'a screenshot of a screenshot of a screenshot of a game'}, {'service': 'tesseract', 'calculated_date': '2025-02-08T06:36:03.195128+00:00', 'type': 'OCR', 'value': 'Cy LE a ea Rea CoA Mie e Tan ean eal as (-L FATMMEsTo) (G7 W-Es3) ee eer} Q'}]}, {'_hash': '0a2eabdb13cbe2b6e94b1d772c7cdf9f8ee1a8e2', 't...","{'term': '(NFL OR nfl) AND (playoffs) AND (Super Bowl LIX)', 'limit': 1000, 'site': 'truth_social', 'since': '2024-09-05', 'until': '2025-02-09', 'querytype': 'query_string', 'model': 'user'}",truth_social
21,"{'acct': 'billrogers76', 'display_name': 'W A R Liberty', 'id': '107910638877822210', 'username': 'billrogers76'}",False,,smat-scrapy-crawlers,"<p>It is just boys playing a gameüèàüòú<br/>How much do NFL players get paid in the playoffs? The CBA also spells out how much players are paid in each *round of the postseason. Here are the üòµ‚Äçüí´bonuses for the 2024 regular season:<br/>\t‚Ä¢\tDivision winner: $54,500<br/>\t‚Ä¢\tWild Card/first-round *bye: $49,500<br/>\t‚Ä¢\tDivisional Round: $54,500<br/>\t‚Ä¢\tConference Championship: $77,000 <br/>\t‚Ä¢\tSuper Bowl *losing team: $96,000<br/>\t‚Ä¢\tSuper Bowl winning team: $171,000</p>","It is just boys playing a gameüèàüòúHow much do NFL players get paid in the playoffs? The CBA also spells out how much players are paid in each *round of the postseason. Here are the üòµ‚Äçüí´bonuses for the 2024 regular season:\t‚Ä¢\tDivision winner: $54,500\t‚Ä¢\tWild Card/first-round *bye: $49,500\t‚Ä¢\tDivisional Round: $54,500\t‚Ä¢\tConference Championship: $77,000 \t‚Ä¢\tSuper Bowl *losing team: $96,000\t‚Ä¢\tSuper Bowl winning team: $171,000",2025-02-08T07:08:42.339+00:00,post,,[],...,,,,,,,,,"{'term': '(NFL OR nfl) AND (playoffs) AND (Super Bowl LIX)', 'limit': 1000, 'site': 'truth_social', 'since': '2024-09-05', 'until': '2025-02-09', 'querytype': 'query_string', 'model': 'user'}",truth_social
22,"{'acct': 'nicolespink70', 'display_name': 'Nikki', 'id': '112194771027584744', 'username': 'nicolespink70'}",False,,smat-scrapy-crawlers,"<p>Really wish you would have Elon look into the referees that are in the pockets for KC. They should have never even made it to the playoffs. Many fans are disgusted with NFL and helping KC because of Taylor Swift! Many won‚Äôt watch Super Bowl because of it and are loosing fans. Remember when the fans boycotted NFL because of disrespect while our National Anthem was being played, and they did it for you as well. Please look into this so NFL doesn‚Äôt ruin the love of the great American game.Thank you for everything you do. You are Gods blessing.‚úùÔ∏èüôè‚ô•Ô∏è</p>","Really wish you would have Elon look into the referees that are in the pockets for KC. They should have never even made it to the playoffs. Many fans are disgusted with NFL and helping KC because of Taylor Swift! Many won‚Äôt watch Super Bowl because of it and are loosing fans. Remember when the fans boycotted NFL because of disrespect while our National Anthem was being played, and they did it for you as well. Please look into this so NFL doesn‚Äôt ruin the love of the great American game.Thank you for everything you do. You are Gods blessing.‚úùÔ∏èüôè‚ô•Ô∏è",2025-02-08T09:15:11.973+00:00,comment,0.0,[],...,,,False,,1.0,,,,"{'term': '(NFL OR nfl) AND (playoffs) AND (Super Bowl LIX)', 'limit': 1000, 'site': 'truth_social', 'since': '2024-09-05', 'until': '2025-02-09', 'querytype': 'query_string', 'model': 'user'}",truth_social


In [None]:
# save

DataProcessing.save_to_json(bluesky_df)

## Detect Prediction Label with LLM + Majority Vote

In [13]:
tgmf = TextGenerationModelFactory()

# Groq Cloud (https://console.groq.com/docs/overview)
gemma_29b_generation_model = tgmf.create_instance('gemma2-9b-it') 
llama_318b_instant_generation_model = tgmf.create_instance('llama-3.1-8b-instant') 
llama_3370b_versatile_generation_model = tgmf.create_instance('llama-3.3-70b-versatile')  
llama_guard_4_12b_generation_model = tgmf.create_instance('meta-llama/llama-guard-4-12b')  

models = [gemma_29b_generation_model, llama_318b_instant_generation_model, llama_3370b_versatile_generation_model, llama_guard_4_12b_generation_model]

# models = [gemma_29b_generation_model, llama_318b_instant_generation_model, llama_3370b_versatile_generation_model]
# models = [gemma_29b_generation_model, llama_318b_instant_generation_model, llama_guard_4_12b_generation_model]

In [14]:
def count_label(current_label, label_to_counts):
    """
    Given a sentence, use any LLM from text_generation_models to detect if it's a prediction or not.

    Parameters
    ----------
    current_label : pd.DataFrame
        Label from specific LLM on if it's a prediction or not
    label_to_counts : dict
        Data with the model_label : count

    Returns
    -------
    None
        Only update dictionary: label_to_counts
    
    
    """
    current_label = current_label.strip()
    # print(f"\tBase label_to_counts: {label_to_counts}")
    if current_label in label_to_counts.keys():
        old_count = label_to_counts[current_label]
        
        update_count = old_count + 1
        label_to_counts[current_label] = update_count
        # print(f"\tUpdated label_to_counts: {label_to_counts}")
    else:
        label_to_counts[current_label] = 1
        # print(f"\tUpdated label_to_counts: {label_to_counts}")

In [15]:
def get_majority_vote(data):
    """
    Given a dictionary, compute majority vote

    Parameters
    ----------
    data : dict
        Data with the model_label : count

    Returns
    -------
    str
        The label of the mode

    """
    mode = max(data.values()) 
    for key, val in data.items():
        if val == mode:
            if mode == 1: 
                majority_vote_label = 'None as all are different labels'
            else:
                majority_vote_label = key

    return majority_vote_label

In [16]:
def detect_predictions_with_llms(df: pd.DataFrame, notebook_dir: str, site: str):
    """
    Given a sentence, use any LLM from text_generation_models to detect if it's a prediction or not.

    Parameters
    ----------
    df : pd.DataFrame
        Data with the sentences we want to label
    notebook_dir : str
        The location of this notebook, so we can save files using relative paths
    site : str
        Source of the data ('tiktok', 'bluesky', 'truth social', 'llm generated', etc).

    Returns
    -------
    pd.DataFrame
        The mappings of 1 (sentence) : many LLMs
        The mappings of 1 LLM : 1 prediction label
    
    
    """
    labels = []
    batch_size = 10
    show_data = 1

    sentences = DataProcessing.df_to_list(df, col='text')
    meta_data_df = df.drop(columns=['text'])

    
    for batch_idx in tqdm(range(0, len(sentences), batch_size)):
        batch = sentences[batch_idx:batch_idx+batch_size]
        for sentences_idx, sentence in enumerate(batch):
            label_to_count = {}
            # sentence = batch[sentences_idx]
            sent_meta_data_series = meta_data_df.iloc[batch_idx + sentences_idx]
            sent_meta_data = sent_meta_data_series.to_dict()
            if show_data <=3: 
                # print(f"\>>>Sentence: {sentence} --- {site}\nMetadata: {sent_meta_data}")
                # print(f"\n>>>Sentence: {sentence} --- {site}")
                print(f">>>Sentence: {sentence}")
                show_data += 1
                # break
            prompt = f"Given this sentence ({sentence}), state if the sentence is a prediction, not a prediction, or not enough information. Also, if it is a prediction, state that it is prediction along with the prediction domain if any are finance, health, weather, policy, sports, or miscellaneous. Do not explain or provide any other details. Remember, your responses are discrete corresponding to the states in the list; [prediction-finance, prediction-health, prediction-weather, prediction-policy, prediction-sports, prediction-miscellaneous, not a prediction, not enough information]."
            
            sentence_labels = []
            # print(f"Prompt: {prompt}")
            for model in models:  
                input_prompt = model.user(prompt)
                # print(input_prompt)  
                
                raw_text_llm_generation = model.chat_completion([input_prompt])
                # print(raw_text_llm_generation)
                # print("====================================")
                for line in raw_text_llm_generation.split("\n"):
                    # print(line)
                    if line.strip():
                        label = line.strip()
                        # print(f"\tModel -> Label: {model.__name__()} -> {label}")
                        count_label(label, label_to_count)

                        sentence_labels.append({
                            "sentence": sentence,
                            "model": model.__name__(),
                            "label": label,
                            "meta_data": sent_meta_data
                        })

            majority_vote_label = get_majority_vote(label_to_count)
            # print(f"\t\tMajority vote: {majority_vote_label}")

            # Attach majority vote to each model's label
            for entry in sentence_labels:
                entry["majority_vote"] = majority_vote_label
                labels.append(entry)

        print()
        save_dir = os.path.dirname(notebook_dir)
        # print(f"Site: {site}")
        save_dir = os.path.join(save_dir, 'data', 'open_measures', f"{site}s")
        # print(f"\n\tLabels: {labels}")
        DataProcessing.save_to_json(labels, save_dir, site)
    return labels

In [17]:
# tiktok_site = tiktok_df['Site'][0]
# llms_generated_for_tiktok_comments = detect_predictions_with_llms(tiktok_df, notebook_dir, tiktok_site)
# llms_generated_for_tiktok_comments_df = pd.DataFrame(llms_generated_for_tiktok_comments).rename(columns={'sentence': 'Text', 'model': 'Model', 'label': 'Label'})
# llms_generated_for_tiktok_comments_df['Site'] = tiktok_site
# llms_generated_for_tiktok_comments_df.head(7)

In [18]:
updated_column_names={'sentence': 'Text', 'model': 'Model', 'label': 'Label', 'meta_data': 'Meta Data', 'majority_vote': 'Majority Vote'}

In [21]:
bluesky_site = bluesky_df['Site'][0]
# bluesky_site = 'bluesky'
llms_generated_for_bluesocial_comments = detect_predictions_with_llms(bluesky_df, notebook_dir, bluesky_site)
llms_generated_for_bluesocial_comments_df = pd.DataFrame(llms_generated_for_bluesocial_comments).rename(columns=updated_column_names)
llms_generated_for_bluesocial_comments_df.head(7)

  0%|          | 0/63 [00:00<?, ?it/s]

>>>Sentence: Yeah, Jones (whom I loathe) has this Super Bowl winners or nothing attitude and it's so stupid. If your team makes it to playoffs give em some props. They are just not Super Bowl good and never will be. #NFL
>>>Sentence: FWIW: During the Super-Bowl era, 255 teams have started 0-3; six of them have gone on to make the playoffs. The last 0-3 team to make the playoffs was the 2018 Houston Texans, who were quarterbacked by Deshaun Watson.
>>>Sentence: @AdamSchefter tweeted
FWIW: During the Super-Bowl era, 255 teams have started 0-3; six of them have gone on to make the playoffs. The last 0-3 team to make the playoffs was the 2018 Houston Texans, who were quarterbacked by Deshaun Watson. https://t.co/GM5pclYr0G


  2%|‚ñè         | 1/63 [00:11<11:56, 11.56s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-61.json


  3%|‚ñé         | 2/63 [00:22<11:18, 11.13s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-62.json


  5%|‚ñç         | 3/63 [00:34<11:34, 11.58s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-63.json


  6%|‚ñã         | 4/63 [00:57<15:51, 16.12s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-64.json


  8%|‚ñä         | 5/63 [01:20<17:50, 18.46s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-65.json


 10%|‚ñâ         | 6/63 [01:44<19:30, 20.53s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-66.json


 11%|‚ñà         | 7/63 [02:06<19:36, 21.02s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-67.json


 13%|‚ñà‚ñé        | 8/63 [02:29<19:38, 21.43s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-68.json


 14%|‚ñà‚ñç        | 9/63 [02:51<19:30, 21.69s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-69.json


 16%|‚ñà‚ñå        | 10/63 [03:14<19:31, 22.10s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-70.json


 17%|‚ñà‚ñã        | 11/63 [03:36<19:16, 22.24s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-71.json


 19%|‚ñà‚ñâ        | 12/63 [03:59<18:59, 22.34s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-72.json


 21%|‚ñà‚ñà        | 13/63 [04:22<18:49, 22.60s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-73.json


 22%|‚ñà‚ñà‚ñè       | 14/63 [04:45<18:37, 22.81s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-74.json


 24%|‚ñà‚ñà‚ñç       | 15/63 [05:08<18:11, 22.74s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-75.json


 25%|‚ñà‚ñà‚ñå       | 16/63 [05:30<17:37, 22.49s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-76.json


 27%|‚ñà‚ñà‚ñã       | 17/63 [05:52<17:04, 22.27s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-77.json


 29%|‚ñà‚ñà‚ñä       | 18/63 [06:14<16:36, 22.15s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-78.json


 30%|‚ñà‚ñà‚ñà       | 19/63 [06:36<16:14, 22.14s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-79.json


 32%|‚ñà‚ñà‚ñà‚ñè      | 20/63 [06:58<15:51, 22.14s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-80.json


 33%|‚ñà‚ñà‚ñà‚ñé      | 21/63 [07:21<15:37, 22.32s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-81.json


 35%|‚ñà‚ñà‚ñà‚ñç      | 22/63 [07:44<15:24, 22.56s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-82.json


 37%|‚ñà‚ñà‚ñà‚ñã      | 23/63 [08:06<15:02, 22.56s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-83.json


 38%|‚ñà‚ñà‚ñà‚ñä      | 24/63 [08:29<14:46, 22.72s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-84.json


 40%|‚ñà‚ñà‚ñà‚ñâ      | 25/63 [08:52<14:18, 22.60s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-85.json


 41%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 26/63 [09:13<13:45, 22.30s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-86.json


 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 27/63 [09:36<13:28, 22.45s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-87.json


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 28/63 [10:01<13:27, 23.06s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-88.json


 46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 29/63 [10:25<13:17, 23.45s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-89.json


 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 30/63 [10:48<12:52, 23.42s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-90.json


 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 31/63 [11:11<12:25, 23.30s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-91.json


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 32/63 [11:35<12:04, 23.39s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-92.json


 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 33/63 [11:58<11:40, 23.34s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-93.json


 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 34/63 [12:24<11:36, 24.02s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-94.json


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 35/63 [12:45<10:52, 23.29s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-95.json


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 36/63 [13:07<10:18, 22.91s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-96.json


 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 37/63 [13:30<09:53, 22.83s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-97.json


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 38/63 [13:52<09:23, 22.54s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-98.json


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 39/63 [14:16<09:13, 23.04s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-99.json


 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 40/63 [14:38<08:42, 22.71s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-100.json


 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 41/63 [15:01<08:21, 22.78s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-101.json


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 42/63 [15:25<08:07, 23.19s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-102.json


 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 43/63 [15:49<07:45, 23.28s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-103.json


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 44/63 [16:11<07:16, 22.97s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-104.json


 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 45/63 [16:34<06:54, 23.00s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-105.json


 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 46/63 [16:56<06:26, 22.74s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-106.json


 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 47/63 [17:19<06:04, 22.80s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-107.json


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 48/63 [17:41<05:39, 22.65s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-108.json


 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 49/63 [18:03<05:14, 22.47s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-109.json


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 50/63 [18:28<05:01, 23.19s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-110.json


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 51/63 [18:51<04:38, 23.20s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-111.json


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 52/63 [19:14<04:13, 23.06s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-112.json


 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 53/63 [19:37<03:48, 22.86s/it]


	Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-113.json


 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 53/63 [20:29<03:51, 23.19s/it]


RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for model `llama-3.3-70b-versatile` in organization `org_01jf12p7h2f9d8jj9h5fxm2h5d` service tier `on_demand` on tokens per day (TPD): Limit 100000, Used 100014, Requested 145. Please try again in 2m18.065s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}

In [None]:
true_social_site = true_social_df['Site'][0]
# true_social_site = 'Site'
llms_generated_for_true_social_comments = detect_predictions_with_llms(true_social_df, notebook_dir, true_social_site)
llms_generated_for_true_social_comments_df = pd.DataFrame(llms_generated_for_true_social_comments).rename(columns=updated_column_names)
llms_generated_for_true_social_comments_df.head(7)

In [None]:
llms_generated_for_true_social_comments_df