In [1]:
import datetime
import duckdb
import enum
import json
import ollama
import os
import pandas as pd
import pydantic
import requests
import time
import tqdm
from typing import Dict, List

In [32]:
MODEL = "gemma3:1b"


class ExpectedWorkloadChangeEnum(enum.Enum):
    much_higher = "much_higher"
    somewhat_higher = "somewhat_higher"
    similar = "similar"
    somewhat_lower = "somewhat_lower"
    much_lower = "much_lower"
    unknown = "unknown"


class ReasonCategoryEnum(enum.Enum):
    performance = "performance"
    injury = "injury"
    opponent = "opponent"
    anomaly = "anomaly"
    rumor = "rumor"
    unknown = "unknown"


class ExpectedWorkloadChange(pydantic.BaseModel):
    target_player: str
    expected_workload_change: ExpectedWorkloadChangeEnum
    reason_category: ReasonCategoryEnum


def strip_code_block(raw: str) -> str:
    return raw.lstrip("```json").lstrip("```").rstrip("```")


def extract_expected_workload_change(prompt_template: str, report_content: str) -> Dict:
    kwargs = { "report_content": report_content }
    prompt = prompt_template.format(**kwargs)
    res = ollama.chat(
        model=MODEL,
        messages=[{ "role": "user", "content": prompt}],
        format=ExpectedWorkloadChange.model_json_schema()
    )
    raw_message = res.message.content
    raw_json = strip_code_block(raw_message)
    data = json.loads(raw_json)
    return data


def process_report(prompt_template: str, r: Dict) -> Dict:
    report_content = r.get("description")
    expected = extract_expected_workload_change(prompt_template=prompt_template, report_content=report_content)
    value = {
        "report_id": r.get("report_id"),
        "target_player": expected.get("target_player", "Unknown"),
        "expected_workload_change": expected.get("expected_workload_change", "unknown"),
        "reason_category": expected.get("reason_category"),
    }
    return value


def get_report_predictions(prompt_template: str, reports: List[Dict]) -> List[Dict]:
    result_iterator = map(lambda r: process_report(prompt_template, r), reports)
    output = list(tqdm.tqdm(result_iterator, total=len(reports)))
    return output

In [33]:
CONTENT = """
Mike DeFabo of The Athletic reported on Wednesday that Pittsburgh Steelers quarterback Aaron Rodgers spoke highly of running back Jaylen Warren, saying that "We might have to keep giving (Warren) him more opportunities in the pass game, because every time he touches it, something good happens." Warren has always been a solid fantasy asset in the passing game in his career, with 133 receptions in four NFL seasons; however, last Sunday against the Seattle Seahawks, he was very productive through the air, hauling in all four of his targets for 86 yards, for a total of 134 all-purpose yards on 18 touches (14 on the ground and four through the air), This is an excellent sign for Warren fantasy owners, as rookie third round pick, Kaleb Johnson, does not appear to be getting any opportunities of the backfield. His only other competition for opportunities is Kenneth Gainwell, who went from a 54% snap share in Week 1 to a 42% snap share in Week 2. Warren should be considered a solid RB2 heading into Week 3 against the New England Patriots.
"""

In [40]:
PROMPT = """
# Instructions

You are an expert in semantics and NFL football.

Read the following news report about an NFL player and extract structured data that captures what
the report thinks will happen to that player's workload in the next game.

Your output should be JSON with the following fields:
- "target_player"
  - Out of all the players named in the report, this should be the name of the player whose workload is
    discussed the most
  - Output "Unknown" if the report is not about a player's workload or if the player cannot be determined
- "expected_workload_change"
  - The expected change in workload (carries, targets) for the target player
  - Value can be: "much_higher", "somewhat_higher", "similar", "somewhat_lower", "much_lower", or "unknown"
- "reason_category"
  - Categorization of the primary reason for the expected workload change
  - Value can be one of the following:
    - "performance" because of the target player or another player's strong or weak play
    - "injury" because of an injury to or return from injury by the target player or another player
    - "opponent" because of the strength or weakness of the team they are playing against next
    - "anomaly" because the previous workload was out of the ordinary
    - "rumor" because of hearsay about players, coaches, or strategy
    - "unknown" if the reason cannot be determined

# Examples

## Example #1

News Report:
Chicago Bears RB James Lefever said that WR Antonio Gibson is playing at the top of his game. Expect him to get more targets next week.

Output:
{{
 "target_player": "Antonio Gibson",
 "expected_workload_change": "somewhat_higher",
 "reason_category": "performance"
}}

## Example #2

News Report:
Arizona Cardinals RB Trent Mcduffy had 18 carries for 135 yards and a touchdown last night against the Seahawks. He should be a solid RB1/2 next week against the Giants.

Output:
{{
 "target_player": "Trent Mcduffy",
 "expected_workload_change": "similar",
 "reason_category": "performance"
}}

## Example #3

News Report:
Cleveland Browns RB Marcus Higgins had two touchdowns on the ground, one through the air, and even a passing touchdown as he found WR Preston Smith for the game-winning score in overtime. Can't expect this kind of performance every game, but Higgins will continue to take the lion's share of rushes when the Browns return from their bye and face the Houston Texans in week 13.

Output:
{{
 "target_player": "Marcus Higgins",
 "expected_workload_change": "somewhat_lower",
 "reason_category": "anomaly"
}}

# Your Turn

News Report:
{report_content}

Output:
"""

In [41]:
process_report(prompt_template=PROMPT, r={ "report_id": 42, "description": CONTENT })

{'report_id': 42,
 'target_player': 'Jaylen Warren',
 'expected_workload_change': 'similar',
 'reason_category': 'performance'}