In [43]:
import datetime
import duckdb
import enum
import json
import ollama
import os
import pandas as pd
import pydantic
import requests
import time
import tqdm
from typing import Dict, List

In [78]:
MODEL = "gemma3:1b"
# MODEL = "gpt-oss:20b"


class ExpectedWorkloadEnum(enum.Enum):
    high = "high"
    medium = "medium"
    low = "low"
    unknown = "unknown"


class ReasonCategoryEnum(enum.Enum):
    performance = "performance"
    injury = "injury"
    opponent = "opponent"
    anomaly = "anomaly"
    unknown = "unknown"


class ExpectedWorkloadReport(pydantic.BaseModel):
    target_player: str
    expected_workload: ExpectedWorkloadEnum
    reason_category: ReasonCategoryEnum


def strip_code_block(raw: str) -> str:
    return raw.lstrip("```json").lstrip("```").rstrip("```")


def extract_expected_workload_change(prompt_template: str, report_content: str) -> Dict:
    kwargs = { "report_content": report_content }
    prompt = prompt_template.format(**kwargs)
    res = ollama.chat(
        model=MODEL,
        messages=[{ "role": "user", "content": prompt}],
        format=ExpectedWorkloadReport.model_json_schema(),
        # think="low"
    )
    raw_message = res.message.content
    duration_secs = res.total_duration / 1000000000
    print(f"Total Duration: {duration_secs:.1f} secs")
    print()
    print(res.message.thinking)
    raw_json = strip_code_block(raw_message)
    data = json.loads(raw_json)
    print()
    print(json.dumps(data))
    return data


def process_report(prompt_template: str, r: Dict) -> Dict:
    report_content = r.get("description")
    expected = extract_expected_workload_change(prompt_template=prompt_template, report_content=report_content)
    value = {
        "report_id": r.get("report_id"),
        "target_player": expected.get("target_player", "Unknown"),
        "expected_workload": expected.get("expected_workload", "unknown"),
        "reason_category": expected.get("reason_category", "unknown"),
    }
    return value


def get_report_predictions(prompt_template: str, reports: List[Dict]) -> List[Dict]:
    result_iterator = map(lambda r: process_report(prompt_template, r), reports)
    output = list(tqdm.tqdm(result_iterator, total=len(reports)))
    return output

In [79]:
CONTENT = """
New York Giants running back Devin Singletary had 14 carries for 47 yards in Week 12's 34-27 loss in overtime to the Detroit Lions. While the 14 carries appear encouraging, the 3.36 YPC does not. Additionally, Tyrone Tracy Jr. played 71 percent of the snaps and out-touched Singletary 23 to 14. Additionally, Tracy Jr. continues to be a running back involved in the passing game and getting a majority of the carries, with Singletary being brought in as a goal-line or change-of-pace back. Given Singletary's 14 carries and the possibility of being the team's goal-line back, he is still worth a hold in fantasy, but not someone that can confidently be started unless we see a shift in snap percentage or Tracy were to go down with an injury. Week 13 presents a challenging task: going to Foxboro to take on New England's stiff run defense, which has allowed the fewest fantasy points to opposing running backs this season. Singletary will not be a recommended start in Week 13, but could be held on fantasy managers' bench if they have the spot available.
"""

In [80]:
PROMPT = """
# Instructions

You are an expert in NFL football and semantics.

Read the following news report about an NFL player and extract structured data that captures what the report
thinks will happen to that player's workload in the next game.

Your output should be JSON with the following fields:
- "target_player"
  - Out of all the players named in the report, this should be the name of the player whose workload is
    discussed the most
  - Output "Unknown" if the report is not about a player's workload or if the player cannot be determined
- "expected_workload"
  - The expected workload in the next game (carries, targets) for the target player
  - Value can be one of the following:
    - "high" if significant workload (20+ touches), such as being the lead player in their role
    - "medium" if workload is in the middle (10-20 touches), such as being split with another key player
    - "low" if even less workload than that (<10 touches) or no workload at all
    - "unknown" if it cannot be determined
- "reason_category"
  - Categorization of the primary reason for the expected workload change
  - Value can be one of the following:
    - "performance" because of the target player or another player's strong or weak play
    - "injury" because of an injury to or return from injury by the target player or another player
    - "opponent" because of the strength or weakness of the team they are playing against next
    - "anomaly" because the previous workload was out of the ordinary
    - "rumor" because of hearsay about players, coaches, or strategy
    - "unknown" if the reason cannot be determined

# Examples

## Example 1

Report:
Denver Broncos running back J.K. Dobbins delivered a solid performance in Week 3 despite his team's last-second 23-20 loss to the Los Angeles Chargers. Dobbins was effective on the ground, rushing for 83 yards and a touchdown on 11 carries, including a 41-yard run that helped set up his score. He also recorded one catch for zero yards. After three games, Dobbins has been highly productive, averaging 5.4 yards per carry with over 220 rushing yards and three touchdowns. He seems to be secured as the lead back and is likely to continue seeing a significant workload as rookie R.J. Harvey struggles to find his rhythm. Dobbins can be viewed as a reliable RB2 option moving forward.

Output:
{{
  'target_player': 'J.K. Dobbins',
  'expected_workload': 'high',
  'reason_category': 'performance'
}}

## Example 2

Report:
Baltimore Ravens head coach John Harbaugh said on Monday that running back Justice Hill (neck) will be out three to four weeks with a disc issue in his neck, but there's still a chance that he will return this year, according to Jonas Shaffer of The Baltimore Sun. Hill injured his neck in practice early last week and landed on Injured Reserve before the Thanksgiving Day loss to the Cincinnati Bengals on Thursday night. The 28-year-old pass-catching back will miss at least three more games, but he will be eligible to return from IR in Week 17 on the road at Lambeau Field against the Green Bay Packers. Keaton Mitchell was starting to take on a bigger role behind starter Derrick Henry even before Hill was injured, so if he does return before the end of the 2025 regular season, Hill could be Baltimore's RB3 on passing downs. He only has 18 rushing attempts on the year for 93 yards and two TDs, adding 169 receiving yards and one score.

Output:
{{
  'target_player': 'Justice Hill',
  'expected_workload': 'low',
  'reason_category': 'injury'
}}

## Example 3

Report:
New York Giants running back Devin Singletary had 14 carries for 47 yards in Week 12's 34-27 loss in overtime to the Detroit Lions. While the 14 carries appear encouraging, the 3.36 YPC does not. Additionally, Tyrone Tracy Jr. played 71 percent of the snaps and out-touched Singletary 23 to 14. Additionally, Tracy Jr. continues to be a running back involved in the passing game and getting a majority of the carries, with Singletary being brought in as a goal-line or change-of-pace back. Given Singletary's 14 carries and the possibility of being the team's goal-line back, he is still worth a hold in fantasy, but not someone that can confidently be started unless we see a shift in snap percentage or Tracy were to go down with an injury. Week 13 presents a challenging task: going to Foxboro to take on New England's stiff run defense, which has allowed the fewest fantasy points to opposing running backs this season. Singletary will not be a recommended start in Week 13, but could be held on fantasy managers' bench if they have the spot available.

Output:
{{
  'target_player': 'Devin Singletary',
  'expected_workload': 'medium',
  'reason_category': 'opponent'
}}

# Your Turn

Report:
{report_content}

Output:
"""

In [81]:
process_report(prompt_template=PROMPT, r={ "report_id": 42, "description": CONTENT })

Total Duration: 1.6 secs

None

{"target_player": "Devin Singletary", "expected_workload": "medium", "reason_category": "opponent"}


{'report_id': 42,
 'target_player': 'Devin Singletary',
 'expected_workload': 'medium',
 'reason_category': 'opponent'}