In [1]:
import os
import sys
import requests
import json

import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from tqdm import tqdm

AXIS_FONT_SIZE = 16

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# print(notebook_dir)
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../'))

from data_processing import DataProcessing
from real_data_acquisition import OpenMeasuresDirector
from text_generation_models import TextGenerationModelFactory

In [2]:
pd.set_option('max_colwidth', 800)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Generate Query Terms

In [3]:
prediction_properties = """a prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:

    1. <p_s>, any source entity in the sports domain.
        - Can be a person (with a name) or a sports domain person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc), civilian.
        - Can only be an organization that is associated with the sports prediction.
    2. <p_t>, any target entity in the sports domain.
	    - Can be a person (with a name) or a sports person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc).
        - Can only be an organization that is associated with the sports prediction.
    3. <p_d>, date or time range when <p> is expected to come to fruition or when one should observe the <p>.
        - Forecast can range from a second to anytime in the future.
        - Answers the questions: "How far to go out from today?" or "Where to stop?".
    4. <p_o>, sports prediction outcome.
        - Details relevant details such as outcome, a quantifiable metric, or slope.
        - Some example outcomes are the following: score, touchdown, goal, points, win, lose, etc.
"""

prediction_structures = """Here are how some sports predictions are structured:
    - sports template 1: <p_s> forecasts that the <p_o> at <p_t> potentially decrease in <p_d>.
    - sports template 2: On <p_d>, <p_s> speculates the <p_o> at <p_t> will likely increase.
    - sports template 3: <p_s> predicts on <p_d>, the <p_t> <p_o> may rise.
    - sports template 4: According to <p_s>, the <p_o> at <p_t> would fall in <p_d>.
    - sports template 5: In <p_d>, <p_s> envisions that <p_t> <p_o> has some probability to remain stable.
    - sports template 6: <p_t> <p_o> should stay same <p_d>, according to <p_s>. 
"""

sport_examples = """Here are some corresponding examples of sports predictions:
    - sport examples for template 1:
        1. Coach Lisa Martinez predicts that the touchdown rate at the Miami Dolphins will fall in 2020 of October.
        2. Analyst Mark Johnson forecasts that the goal average at Manchester United will stay the same in November 2025.
        3. Ryan forecasts win percentage he has for soccer will go up in 12/25/2016.
    - sport examples for template 2:
        1. On Sep 20, 2100, Coach Maria Lopez suggests that the score average at the Chicago Bulls is climbing.
        2. On 9/12/2025, Analyst David Kim anticipates the touchdown rate at the Kansas City Chiefs will likely surge.
        3. On October 8, 2123, Detravious foresees that the win probability he has for rugby is expected to trend downward.
    - sport examples for template 3:
        1. Coach Elena Ruiz predicts on 9/22/2025, the goal count at Real Madrid will climb.
        2. Analyst Marcus Lee forecasts that on Sep 30, 2055, the point average at the Golden State Warriors will be higher.
        3. George Jr. estimates that on October 15, 2035, the win ratio for games he has will disimprove.
    - sport examples for template 4:
        1. According to Coach Sarah Nguyen, the scoring average at the Dallas Mavericks is expected to dip in Sep 2021.
        2. According to Analyst Trevor Simmons, the touchdown rate at the Green Bay Packers will increase in 10/2025.
        3. According to Manchester United, the win percentage at Manchester United is projected to drop in October 2034.
    - sport examples for template 5:
        1. In 9/2025, Coach Miguel Torres envisions that the goal average at Paris Saint-Germain will hold steady.
        2. In October 2056, Analyst Fiona Bennett anticipates that the win rate at the Toronto Raptors will decrease slightly.
        3. In Sep 2086, Calvin foresees that the points per game he has in football will gradually increase.
    - sport examples for template 6:
        1. The goal count at Liverpool FC will surge in Sep 2012, according to Coach Daniel Alvarez.
        2. The win percentage at the Chicago Bears will taper off in October 2025, according to Analyst Priya Sharma.
        3. The scoring average on Arnolds footbal team will remain steady in 10/2034, according to Arnold.
"""

sport_requirements = """- Should be based on real-world sports.
    - Suppose the time when <p> was made is during any season of sports.
    - Include reports from all sports professionals, coaches, or any type of sport entity.
"""

initial_query_string = """NBA"""

In [4]:
prompt = f"""Generate a query string using boolean logic and keywords (related to sports predictions) to search a database. I define {prediction_properties} 
{prediction_structures}
{sport_examples}
These sports predictions can be found in social media data at large. My task here is to query the site to find relatable sentences (that aren't predictions) and prediction sentences). 
My initial query string: {initial_query_string}. Don't use brackets to wrap words nor to use quotation marks to wrap words. 
I need you to generate an improved (better prediction precision) query string taking into consideration the above along with {sport_requirements} \n Don't generate anything other than a new/imporved query string!
"""

print(f"Prompt: {prompt}")

Prompt: Generate a query string using boolean logic and keywords (related to sports predictions) to search a database. I define a prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:

    1. <p_s>, any source entity in the sports domain.
        - Can be a person (with a name) or a sports domain person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc), civilian.
        - Can only be an organization that is associated with the sports prediction.
    2. <p_t>, any target entity in the sports domain.
	    - Can be a person (with a name) or a sports person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc).
        - Can only be an organization that is associated with the sports prediction.
    3. <p_d>, date or time range when <p> is expected to come to fruition or when one should observe the <p>.
        - Forecast can

In [5]:
# Configuring parameters
terms_for_query = OpenMeasuresDirector
query_string_by = 'llm'
limit = 1000
# Use default since and until: https://api.smat-app.com/docs#/default/content_content_get
since = '2024-08-26'
until = '2025-02-26' 
esquery = 'query_string' # Elasticsearch across all fields

sites = ["tiktok_comment", "bluesky", "truth_social"]
# sites = ["truth_social"]
hits_per_site_dfs = []
for site in sites:
    hits_for_site_df = OpenMeasuresDirector.construct_from_dataset(query_string=prompt, query_string_by=query_string_by, limit=limit, site=site, start_date=since, end_date=until, querytype=esquery)
    hits_per_site_dfs.append(hits_for_site_df)

### RESET ###
### LLM GENERATE QUERY STRINGS ###
	Query String: sports prediction AND (score OR touchdown OR goal OR points OR win OR lose) AND (NBA OR NFL OR MLB OR NHL OR soccer OR football OR tennis OR golf OR basketball OR baseball OR hockey)

### SET QUERY ###
	Query's URL: http://api.smat-app.com/content?term=sports prediction AND (score OR touchdown OR goal OR points OR win OR lose) AND (NBA OR NFL OR MLB OR NHL OR soccer OR football OR tennis OR golf OR basketball OR baseball OR hockey)&limit=1000&site=tiktok_comment&since=2024-08-26&until=2025-02-26&querytype=query_string

### REGENERATE QUERY ###

------

	Regenerations: 0
	Hits: 200
Hits retrieved:
        author            author_id  author_pin             aweme_id  \
0  geannyfendy  7253345897873851399       False  7466701767862963474   

                   cid  collect_stat          collected_by comment_language  \
0  7466820936068547345             0  smat-scrapy-crawlers               ms   

  comment_post_item_ids  cre

In [6]:
hits_per_site_dfs

[[        author            author_id  author_pin             aweme_id  \
  0  geannyfendy  7253345897873851399       False  7466701767862963474   
  
                     cid  collect_stat          collected_by comment_language  \
  0  7466820936068547345             0  smat-scrapy-crawlers               ms   
  
    comment_post_item_ids  create_time datatype  digg_count image_list  \
  0                  None   1738504743  comment           0       None   
  
     is_author_digged  is_comment_translatable label_list  no_show  \
  0             False                     True       None    False   
  
                                                                                                                                                                                                                                                                                                                                                                                                      

- in json i'm saving collect more metadataa - use all cols given from each site
1. time for each query using query process with Dr. Grant


In [7]:
len(hits_per_site_dfs)

3

In [None]:
tiktok_dfs = hits_per_site_dfs[0]
tiktok_df = DataProcessing.concat_dfs(tiktok_dfs)

bluesky_dfs = hits_per_site_dfs[1]
bluesky_df = DataProcessing.concat_dfs(bluesky_dfs)

true_social_dfs = hits_per_site_dfs[2]
true_social_df = DataProcessing.concat_dfs(true_social_dfs)
true_social_df['text'] = true_social_df['content_cleaned']
true_social_df.tail(3)

In [9]:
true_social_df["Query Params"][0]

{'term': 'sports prediction OR forecast OR speculate OR predict OR envision OR project OR anticipate OR estimate OR suggest OR "outcome will" OR "will likely" OR "may rise" OR "should stay" OR "is expected to"  AND (NBA OR NFL OR MLB OR NHL OR soccer OR football OR tennis OR golf OR basketball OR cricket OR rugby)',
 'limit': 1000,
 'site': 'truth_social',
 'since': '2024-08-26',
 'until': '2025-02-26',
 'querytype': 'query_string',
 'query_string': 'Generate a query string using boolean logic and keywords (related to sports predictions) to search a database. I define a prediction <p> = (<p_s>, <p_t>, <p_d>, <p_o>), where it consists of the following four properties:\n\n    1. <p_s>, any source entity in the sports domain.\n        - Can be a person (with a name) or a sports domain person such as a sports reporter, sports analyst, sports expert, sports top executive, sports senior level person, etc), civilian.\n        - Can only be an organization that is associated with the sports pr

In [10]:
len(bluesky_df.loc[0:, 'text'].to_list())

84

In [73]:
tgmf = TextGenerationModelFactory()

# Groq Cloud (https://console.groq.com/docs/overview)
gemma_29b_generation_model = tgmf.create_instance('gemma2-9b-it') 
llama_318b_instant_generation_model = tgmf.create_instance('llama-3.1-8b-instant') 
llama_3370b_versatile_generation_model = tgmf.create_instance('llama-3.3-70b-versatile')  
llama_guard_4_12b_generation_model = tgmf.create_instance('meta-llama/llama-guard-4-12b')  

# models = [gemma_29b_generation_model, llama_318b_instant_generation_model, llama_3370b_versatile_generation_model, llama_guard_4_12b_generation_model]
models = [gemma_29b_generation_model, llama_318b_instant_generation_model, llama_guard_4_12b_generation_model]

In [None]:
import json
import os


def get_next_file_number(directory, prefix, extensions=('.json', '.log', '.csv')):
    """
    Scans the directory for files starting with the given prefix and ending with one of the specified extensions.
    Extracts the numeric suffix and returns the next available number.
    """
    numbers = []
    for name in os.listdir(directory):
        if name.startswith(prefix) and name.endswith(extensions):
            try:
                # Assumes format: prefix-N.ext (e.g., siteA-3.json)
                number_part = name[len(prefix)+1:].split('.')[0]  # +1 for the dash
                number = int(number_part)
                numbers.append(number)
            except ValueError:
                continue
    return max(numbers, default=0) + 1


def save_to_json(data, path, prefix):
    """
    Saves data to a JSON file with an incremented filename based on existing files.
    """
    next_number = get_next_file_number(path, prefix)
    file_name = f"{prefix}-{next_number}.json"
    file_path = os.path.join(path, file_name)
    # print(f"The json file is saving at: {file_path}")

    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

    print(f"\tSaved to {file_path}")




In [67]:
# give our prediction defition for context

In [None]:
def detect_predictions_with_llms(df: pd.DataFrame, notebook_dir: str, site: str):
    labels = []
    batch_size = 20
    show_data = 1

    sentences = DataProcessing.df_to_list(df, col='text')
    
    for batch_idx in tqdm(range(0, len(sentences), batch_size)):
        batch = sentences[batch_idx:batch_idx+batch_size]
        for sentences_idx in tqdm(range(len(batch))):
            sentence = batch[sentences_idx]
            if show_data <=3: 
                print(f"\>>>Sentence: {sentence} --- {site}\n")
                show_data += 1
            prompt = f"Given this sentence ({sentence}), state if the sentence is a prediction, not a prediction, or not enough information. Also, if it is a prediction, state the prediction domain if any are finance, health, weather, policy, sports, or miscellaneous. Do not explain or provide any other details. Remember, your responses are discrete corresponding to the states in the list; [prediction, not a prediction, not enough information]."
            # print(f"Prompt: {prompt}")
            for model in models:  
                input_prompt = model.user(prompt)
                # print(input_prompt)  
                
                raw_text_llm_generation = model.chat_completion([input_prompt])
                # print(raw_text_llm_generation)
                # print("====================================")
                for line in raw_text_llm_generation.split("\n"):
                    # print(line)
                    if line.strip():
                        labels.append({"sentence": sentence, "model": model.__name__(), "label": line, "site": site})
        save_dir = os.path.dirname(notebook_dir)
        # print(f"Site: {site}")
        save_dir = os.path.join(save_dir, 'data', 'open_measures', f"{site}s")
        save_to_json(labels, save_dir, site)
    return labels

In [75]:
tiktok_site = tiktok_df['Site'][0]
llms_generated_for_tiktok_comments = detect_predictions_with_llms(tiktok_df, notebook_dir, tiktok_site)
llms_generated_for_tiktok_comments_df = pd.DataFrame(llms_generated_for_tiktok_comments).rename(columns={'sentence': 'Text', 'model': 'Model', 'label': 'Label'})
llms_generated_for_tiktok_comments_df['Site'] = tiktok_site
llms_generated_for_tiktok_comments_df.head(7)

  0%|          | 0/1 [00:00<?, ?it/s]

	Sentence: pahang 1-2 sabah 🔥🔥🔥🔥 --- tiktok_comment





	Sentence: pahang 1-2 sabah 🔥🔥🔥🔥 --- tiktok_comment



100%|██████████| 2/2 [00:05<00:00,  2.56s/it]
100%|██████████| 1/1 [00:05<00:00,  5.11s/it]

Site: tiktok_comment
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/tiktok_comments/tiktok_comment-1.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/tiktok_comments/tiktok_comment-1.json





Unnamed: 0,Text,Model,Label,site,Site
0,pahang 1-2 sabah 🔥🔥🔥🔥,gemma2-9b-it,not enough information,tiktok_comment,tiktok_comment
1,pahang 1-2 sabah 🔥🔥🔥🔥,llama-3.1-8b-instant,not enough information,tiktok_comment,tiktok_comment
2,pahang 1-2 sabah 🔥🔥🔥🔥,meta-llama/llama-guard-4-12b,safe,tiktok_comment,tiktok_comment
3,pahang 1-2 sabah 🔥🔥🔥🔥,gemma2-9b-it,not enough information,tiktok_comment,tiktok_comment
4,pahang 1-2 sabah 🔥🔥🔥🔥,llama-3.1-8b-instant,not enough information,tiktok_comment,tiktok_comment
5,pahang 1-2 sabah 🔥🔥🔥🔥,meta-llama/llama-guard-4-12b,safe,tiktok_comment,tiktok_comment


In [77]:
bluesky_site = bluesky_df['Site'][0]
# bluesky_site = 'bluesky'
llms_generated_for_bluesocial_comments = detect_predictions_with_llms(bluesky_df, notebook_dir, bluesky_site)
llms_generated_for_bluesocial_comments_df = pd.DataFrame(llms_generated_for_bluesocial_comments).rename(columns={'sentence': 'Text', 'model': 'Model', 'label': 'Label'})
llms_generated_for_bluesocial_comments_df.head(7)

  0%|          | 0/5 [00:00<?, ?it/s]

	Sentence: British Columbia’s population is projected to reach 7.9 million by 2046, up 44% from 2023 --- bluesky





	Sentence: This is the lineup for the Boston Bruins in their preseason game against the New York Rangers. If Poitras is playing wing and is projected to play 2nd line RW with Coyle and Marchand, then Lettieri and Lysell are in the fight for 3rd line RW. Duran is reportedly a dark horse for that slot too.
#NHL --- bluesky





	Sentence: fantasy football update: i'm starting to think that being born close to 9/11 or 4/20 might make you worse at football, not better

the mr. blue skyren williams are now 0-3, and the josh allen parsons project is winning this week only because my opponent is projected to get 64 points --- bluesky



100%|██████████| 20/20 [00:33<00:00,  1.70s/it]
 20%|██        | 1/5 [00:33<02:15, 33.99s/it]

Site: bluesky
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-0.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-0.json


100%|██████████| 20/20 [00:43<00:00,  2.19s/it]
 40%|████      | 2/5 [01:17<01:59, 39.77s/it]

Site: bluesky
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-1.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-1.json


100%|██████████| 20/20 [01:01<00:00,  3.10s/it]
 60%|██████    | 3/5 [02:19<01:39, 49.90s/it]

Site: bluesky
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-2.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-2.json


100%|██████████| 20/20 [00:56<00:00,  2.84s/it]
 80%|████████  | 4/5 [03:16<00:52, 52.64s/it]

Site: bluesky
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-3.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-3.json


100%|██████████| 4/4 [00:12<00:00,  3.16s/it]
100%|██████████| 5/5 [03:29<00:00, 41.85s/it]

Site: bluesky
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-4.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/blueskys/bluesky-4.json





Unnamed: 0,Text,Model,Label,site
0,"British Columbia’s population is projected to reach 7.9 million by 2046, up 44% from 2023",gemma2-9b-it,prediction,bluesky
1,"British Columbia’s population is projected to reach 7.9 million by 2046, up 44% from 2023",llama-3.1-8b-instant,prediction,bluesky
2,"British Columbia’s population is projected to reach 7.9 million by 2046, up 44% from 2023",llama-3.1-8b-instant,domain: policy,bluesky
3,"British Columbia’s population is projected to reach 7.9 million by 2046, up 44% from 2023",meta-llama/llama-guard-4-12b,safe,bluesky
4,"This is the lineup for the Boston Bruins in their preseason game against the New York Rangers. If Poitras is playing wing and is projected to play 2nd line RW with Coyle and Marchand, then Lettieri and Lysell are in the fight for 3rd line RW. Duran is reportedly a dark horse for that slot too.\n#NHL",gemma2-9b-it,prediction,bluesky
5,"This is the lineup for the Boston Bruins in their preseason game against the New York Rangers. If Poitras is playing wing and is projected to play 2nd line RW with Coyle and Marchand, then Lettieri and Lysell are in the fight for 3rd line RW. Duran is reportedly a dark horse for that slot too.\n#NHL",gemma2-9b-it,sports,bluesky
6,"This is the lineup for the Boston Bruins in their preseason game against the New York Rangers. If Poitras is playing wing and is projected to play 2nd line RW with Coyle and Marchand, then Lettieri and Lysell are in the fight for 3rd line RW. Duran is reportedly a dark horse for that slot too.\n#NHL",llama-3.1-8b-instant,"prediction, sports",bluesky


In [62]:
# llms_generated_for_bluesocial_comments_df

In [78]:
true_social_site = true_social_df['Site'][0]
# true_social_site = 'Site'
llms_generated_for_true_social_comments = detect_predictions_with_llms(true_social_df, notebook_dir, true_social_site)
llms_generated_for_true_social_comments_df = pd.DataFrame(llms_generated_for_true_social_comments).rename(columns={'sentence': 'Text', 'model': 'Model', 'label': 'Label'})
llms_generated_for_true_social_comments_df.head(7)

  0%|          | 0/6 [00:00<?, ?it/s]

	Sentence: Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html --- truth_social





	Sentence: Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html --- truth_social





	Sentence: Nordi Mukiele set to join Bayer Leverkusen on a season-long loanhttps://sports.yahoo.com/nordi-mukiele-set-join-bayer-175400290.html --- truth_social



100%|██████████| 20/20 [00:36<00:00,  1.81s/it]
 17%|█▋        | 1/6 [00:36<03:01, 36.25s/it]

Site: truth_social
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-0.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-0.json


100%|██████████| 20/20 [00:52<00:00,  2.62s/it]
 33%|███▎      | 2/6 [01:28<03:03, 45.79s/it]

Site: truth_social
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-1.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-1.json


100%|██████████| 20/20 [00:58<00:00,  2.92s/it]
 50%|█████     | 3/6 [02:27<02:34, 51.57s/it]

Site: truth_social
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-2.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-2.json


100%|██████████| 20/20 [00:53<00:00,  2.67s/it]
 67%|██████▋   | 4/6 [03:20<01:44, 52.32s/it]

Site: truth_social
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-3.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-3.json


100%|██████████| 20/20 [00:53<00:00,  2.68s/it]
 83%|████████▎ | 5/6 [04:14<00:52, 52.80s/it]

Site: truth_social
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-4.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-4.json


100%|██████████| 1/1 [00:02<00:00,  2.74s/it]
100%|██████████| 6/6 [04:17<00:00, 42.84s/it]

Site: truth_social
The json file is saving at: /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-5.json
Saved to /Users/detraviousjamaribrinkley/Documents/Development/research_labs/uf_ds/predictions/data/open_measures/truth_socials/truth_social-5.json





Unnamed: 0,Text,Model,Label,site
0,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,gemma2-9b-it,not a prediction,truth_social
1,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,llama-3.1-8b-instant,prediction,truth_social
2,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,llama-3.1-8b-instant,sports,truth_social
3,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,meta-llama/llama-guard-4-12b,safe,truth_social
4,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,gemma2-9b-it,not a prediction,truth_social
5,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,llama-3.1-8b-instant,prediction,truth_social
6,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,llama-3.1-8b-instant,sports,truth_social


In [None]:
llms_generated_for_true_social_comments_df

Unnamed: 0,Text,Model,Label
0,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,gemma2-9b-it,not a prediction
1,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,llama-3.1-8b-instant,prediction
2,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,llama-3.1-8b-instant,sports
3,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,llama-3.3-70b-versatile,Not a prediction
4,Real Madrid youngster has a ‘more than interesting’ offer from new suitors in La Ligahttps://sports.yahoo.com/real-madrid-youngster-more-interesting-145000880.html,meta-llama/llama-guard-4-12b,safe
5,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,gemma2-9b-it,not a prediction
6,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,llama-3.1-8b-instant,prediction
7,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,llama-3.1-8b-instant,sports
8,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,llama-3.3-70b-versatile,not a prediction
9,Leverkusen reach agreement with PSG for Nordi Mukielehttps://sports.yahoo.com/leverkusen-reach-agreement-psg-nordi-165100770.html,meta-llama/llama-guard-4-12b,safe
