In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re
import dspy
import json
import time
from datetime import datetime
from zoneinfo import ZoneInfo  # Python 3.9+
import numpy as np
import os
from dotenv import load_dotenv

# Load .env from project root (install python-dotenv if needed: pip install python-dotenv)
load_dotenv()
# OpenAI key used by dspy; allow several variable names for flexibility
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') or os.getenv('OPENAI_KEY') or os.getenv('OPENAI')
if not OPENAI_API_KEY:
    raise RuntimeError('OpenAI API key not found. Set OPENAI_API_KEY in your .env file.')

In [7]:
BASE_URL = "https://www.prnewswire.com/news-releases/all-public-company-news/"

def try_request(url, timeout=10):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        r = requests.get(url, headers=headers, timeout=timeout)
        return r if r.status_code == 200 else None
    except:
        return None

def extract_time_and_clean_title(raw_title: str):
    # Matches "09:50 ETTitle..." or "9:05 AM ET Title..."
    m = re.match(r"^(\d{1,2}:\d{2}\s*(?:[AP]M\s*)?ET)\s*(.*)", raw_title, re.IGNORECASE)
    if m:
        return m.group(1).strip(), m.group(2).strip()
    return None, raw_title.strip()

def get_article_content(article_url: str):
    resp = try_request(article_url)
    if not resp:
        return ""
    soup = BeautifulSoup(resp.content, 'html.parser')
    paragraphs = [
        p.get_text(strip=True)
        for p in soup.find_all('p')
        if len(p.get_text(strip=True)) > 50
    ]
    content = "\n\n".join(paragraphs[:5])

    # Trim at DISCLAIMER (case-insensitive)
    cut = content.upper().find("DISCLAIMER")
    if cut != -1:
        content = content[:cut].strip()
    return content

def flag_has_exchange(text: str):
    pattern = re.compile(r'(NYSE|DOW|NASDAQ)', re.IGNORECASE)
    return 1 if pattern.search(text or "") else 0

def fetch_new_until_seen(df_base: pd.DataFrame, max_pages=10, sleep_between=0.2):
    """
    Stream today's items (America/Chicago). Stop at the first already-seen link.
    Returns a DataFrame with only the newly fetched rows (same schema as df_base).
    """
    now_ct = datetime.now(ZoneInfo("America/Chicago"))
    year, month, day = now_ct.year, now_ct.month, now_ct.day
    date_str = f"{year:04d}-{month:02d}-{day:02d}"
    print(f"Fetching PR Newswire items for {date_str}...")

    # ✅ Only keep today's links for faster lookup
    if df_base is not None and not df_base.empty and "link" in df_base.columns and "date" in df_base.columns:
        seen_links = set(df_base.loc[df_base["date"] == date_str, "link"].astype(str))
    else:
        seen_links = set()

    collected = []
    seen_this_run = set()  # avoid duplicates within the same crawl
    stop = False

    for page in range(1, max_pages + 1):
        if stop:
            break

        list_url = f"{BASE_URL}?month={month:02d}&day={day:02d}&year={year}&page={page}&pagesize=100"
        resp = try_request(list_url)
        if not resp:
            break

        soup = BeautifulSoup(resp.content, "html.parser")
        anchors = soup.find_all("a", href=re.compile(r"^/news-releases/.*\.html$"))
        if not anchors:
            break

        for a in anchors:
            href = a.get("href")
            if not href:
                continue
            full_url = "https://www.prnewswire.com" + href

            # ✅ Early stop: first already-seen *today* link => stop scanning
            if full_url in seen_links:
                stop = True
                break

            if full_url in seen_this_run:
                continue
            seen_this_run.add(full_url)

            raw_title = a.get_text(strip=True)
            news_time, title = extract_time_and_clean_title(raw_title)

            content = get_article_content(full_url)
            has_exch = flag_has_exchange(content)

            collected.append({
                "date": date_str,
                "time": news_time,
                "title": title,
                "link": full_url,
                "content": content,
                "has_exchange": has_exch,
            })

            time.sleep(sleep_between)

    return pd.DataFrame(collected)

def main(df_base: pd.DataFrame):
    """
    df_base must have columns:
    ['date','time','title','link','content','has_exchange']
    """
    # Ensure schema (in case caller passes empty/partial df)
    required_cols = ['date','time','title','link','content','has_exchange']
    if df_base is None or df_base.empty:
        df_base = pd.DataFrame(columns=required_cols)
    else:
        for c in required_cols:
            if c not in df_base.columns:
                df_base[c] = None

    df_new = fetch_new_until_seen(df_base, max_pages=10, sleep_between=0.15)
    if df_new.empty:
        print("No new items found (hit an already-seen link right away).")
        return df_base

    df_out = pd.concat([df_base, df_new], ignore_index=True)
    print(f"Added {len(df_new)} new items. Total now: {len(df_out)}")
    return df_out, df_new

# ---------- Example usage ----------
# Initialize once (or load your existing df_base)
columns = ['date','time','title','link','content','has_exchange']
df_base = pd.DataFrame(columns=columns)

# Update df_base efficiently with early-stop scanning
df_out, df_new = main(df_base)
print(df_new.head())


Fetching PR Newswire items for 2025-09-14...
Added 200 new items. Total now: 200
         date      time                                              title  \
0  2025-09-14  12:00 ET  LNTH INVESTOR ALERT: Bronstein, Gewirtz & Gros...   
1  2025-09-14  10:38 ET  ADNOC Gas wird in den FTSE Emerging Index aufg...   
2  2025-09-14  08:30 ET  SHAREHOLDER REMINDER: Faruqi & Faruqi, LLP Inv...   
3  2025-09-14  00:24 ET  Supermicro begint met bulkleveringen van NVIDI...   
4  2025-09-14      None  Sep 13, 2025, 23:57 ETParkland Corporation anu...   

                                                link  \
0  https://www.prnewswire.com/news-releases/lnth-...   
1  https://www.prnewswire.com/news-releases/adnoc...   
2  https://www.prnewswire.com/news-releases/share...   
3  https://www.prnewswire.com/news-releases/super...   
4  https://www.prnewswire.com/news-releases/parkl...   

                                             content  has_exchange  
0  NEW YORK,Sept. 14, 2025/PRNewswire/ -- At

In [8]:
test_news = df_new[df_new['has_exchange'] == 1]['content'][5]
print(test_news)

KeyError: 5

In [None]:
test_news = """VANCOUVER, BC, Sept. 10, 2025 /PRNewswire/ -- Corporate treasury companies surge an average of 150% within 24 hours of announcing crypto adoption strategies, according to a 2025 Animoca Brands report[1], as digital asset treasuries amass $113 billion in Bitcoin stockpiles through September 2025[2]. The explosive momentum reflects a fundamental shift from traditional cash reserves to strategic cryptocurrency accumulation, with Crypto.com reporting over 90 public companies now holding Bitcoin on their balance sheets[3] as corporate America embraces digital assets as inflation hedges and growth catalysts. This treasury transformation is helping to position companies embracing it, including CEA Industries, Inc. (NASDAQ: BNC), Kindly MD, Inc. (NASDAQ: NAKA), Metaplanet Inc. (OTCQX: MTPLF), The Smarter Web Company PLC (OTCQB: TSWCF), and Cipher Mining Inc. (NASDAQ: CIFR).

Institutional demand accelerates as BlackRock's Bitcoin ETF attracted $289.8 million in fresh inflows on September 4 alone[4], while Strategy's treasury model has delivered 257% returns by aggressively accumulating over 582,000 BTC worth $62 billion[5]. The convergence of regulatory clarity, ETF accessibility, and corporate adoption creates unprecedented supply-demand dynamics, with analysts projecting continued institutional accumulation as pension funds and sovereign wealth entities prepare their own digital asset allocations, thus rewarding companies that establish strategic positions before the broader institutional wave arrives.

CEA Industries (NASDAQ: BNC) recently strengthened its institutional foundation through the appointment of Dr. Russell Read, Ph.D., CFA, as a non-executive board member, further cementing its transformation into a premier BNB treasury operation. Dr. Read's extensive background managing capital at CalPERS, Alaska Permanent Fund Corporation, and Gulf Investment Corporation—where he oversaw hundreds of billions across global markets—brings heavyweight institutional expertise to the Colorado-based firm's aggressive BNB accumulation strategy.

This leadership enhancement coincides with CEA Industries' rapid expansion of its BNB position, which has grown to 388,888 tokens worth approximately $330 million. The company maintains its ambitious goal of securing 1% of BNB's total circulating supply by early 2026, representing a concentrated bet on the world's most actively used blockchain network for daily transactions.

"Since the announcement of their BNB Treasury, CEA Industries has swiftly established itself as a global leader in digital asset treasury management," said Dr. Read. "I look forward to working with David [Namdar, CEO of CEA Industries (BNC)] and the Board to further strengthen governance, expand institutional engagement, and position CEA Industries for long-term success."

BNC's strategic approach centers entirely on BNB's foundational role within the most utilized blockchain ecosystem for everyday transactions and decentralized finance operations. Instead of diversifying across multiple digital assets, the company has committed exclusively to BNB's ecosystem potential, believing this focused strategy maximizes exposure to network effects while enabling direct participation in on-chain yield opportunities.

This concentrated approach emerged from a transformative private placement of $500 million that completely restructured the company from its previous operations into a pure-play BNB treasury vehicle. The funding round drew over 140 institutional and crypto-native investors, including Pantera Capital, Arche Capital, ExodusPoint Capital Management, and Blockchain.com, with Cantor Fitzgerald & Co. acting as exclusive placement agent and lead financial advisor.

BNB's attraction lies in its distinctive blend of utility and deflationary mechanics. The token powers millions of daily transactions while delivering staking rewards and benefiting from quarterly supply burns through automated reduction mechanisms. Unlike speculative digital assets, BNB demonstrates consistent economic utility across trading infrastructure, payment networks, and decentralized application environments.

CEA Industries' leadership team merges profound crypto expertise with traditional finance experience. CEO David Namdar previously co-founded Galaxy Digital and helped develop institutional crypto trading infrastructure. Dr. Read's addition provides sovereign wealth fund management expertise spanning decades and multiple regions. Hans Thomas of 10X Capital oversees treasury operations, contributing public company and capital markets knowledge to the BNB accumulation framework.

The company fills a critical market void for U.S. investors lacking direct BNB access through traditional brokerage platforms. BNC delivers regulated market exposure to BNB's performance without requiring cryptocurrency wallets, exchange registrations, or technical blockchain expertise. Investors gain BNB ecosystem exposure through conventional equity ownership in a NASDAQ-listed entity.

Current market dynamics favor BNC's strategy, with BNB recently approaching $900 price levels[6] while the broader BNB Chain ecosystem maintains over $120 billion in total market capitalization[7]. Should warrant exercises reach their maximum potential of $750 million in additional capital, CEA Industries could accumulate BNB holdings exceeding $1.25 billion in aggregate value.

BNC represents a strategic wager on blockchain infrastructure adoption within traditional financial systems. For investors seeking regulated exposure to cryptocurrency markets without direct digital asset ownership, CEA Industries provides institutional-grade access to one of the most actively utilized blockchain networks in global finance.

CONTINUED… Read this and more news for CEA Industries at: https://equity-insider.com/2025/08/13/beat-wall-street-to-the-trade-that-500-million-just-backed/

Kindly MD, Inc. (NASDAQ: NAKA) has made a significant strategic investment with its subsidiary Nakamoto Holdings Inc. committing up to $30 million to Metaplanet Inc.'s (OTCQX: MTPLF), international equity financing, marking Nakamoto's largest single investment to date and its first in an Asian public company with a Bitcoin treasury strategy.

The investment in Japan's first and leading Bitcoin treasury company is expected to be funded on September 16, 2025, with proceeds allocated primarily to Bitcoin purchases. This transaction reinforces KindlyMD's position as both a healthcare services provider and Bitcoin treasury vehicle following its August 2025 merger with Nakamoto Holdings Inc.

"Metaplanet has established itself as a leader in Japan's Bitcoin landscape through its commitment to advancing financial innovation and driving the global adoption of Bitcoin," said David Bailey, Chairman and CEO of KindlyMD. "By positioning Bitcoin as the cornerstone of its financial approach, Metaplanet has become one of Japan's leading public companies and a global leader in corporate Bitcoin strategies. We are proud to support their mission and believe this investment will further strengthen the global network of companies placing Bitcoin at the center of institutional finance."

Metaplanet continues to pioneer Japan's Bitcoin-backed fixed income market with shareholders recently authorizing two classes of perpetual preferred shares designed to optimize long-term Bitcoin accumulation. The strategic partnership unites KindlyMD's healthcare expertise with Nakamoto's vision of integrating Bitcoin into global capital markets, creating a diversified entity focused on both healthcare innovation and Bitcoin treasury management.

The Smarter Web Company PLC (OTCQB: TSWCF) has purchased an additional 30 Bitcoin at an average price of £83,404.85 per Bitcoin ($112,846 per Bitcoin), bringing total holdings to 2,470 Bitcoin as part of "The 10 Year Plan" ongoing treasury policy. The London-based company has achieved an impressive Year-to-Date BTC Yield of 56,796% on its treasury and a 30 Day BTC Yield of 18%, with total Bitcoin purchases now valued at £203,580,051. The web design and development company has adopted Bitcoin as a core component of its financial strategy while maintaining approximately £400,000 in net cash available for future Bitcoin deployment.

The company's board considers Bitcoin to be a strategic store of value and growth vehicle for reserves, though they acknowledge the high-risk nature and volatility associated with cryptocurrency investments. The Smarter Web Company continues to explore opportunities through organic growth and corporate acquisitions while integrating its Bitcoin Treasury Policy into its overall business strategy.

With Bitcoin forming what the company believes is a core part of the future global financial system, the firm maintains transparency with investors regarding both the opportunities and risks associated with its substantial cryptocurrency holdings.

Cipher Mining Inc. (NASDAQ: CIFR) has produced 241 BTC in August while maintaining 1,414 BTC in treasury holdings after selling 42 BTC as part of regular treasury management processes, with the company's Black Pearl Phase I facility accounting for approximately 39% of total Bitcoin production.

The industrial-scale Bitcoin mining operation deployed 115,000 mining rigs generating 23.0 EH/s of operating hashrate at 17.3 J/TH fleet efficiency, positioning the company as a significant Bitcoin accumulator through mining operations. Cipher continues scaling production with Black Pearl Phase I expected to reach approximately 10 EH/s by the end of the third quarter, bringing total self-mining hashrate to approximately 23.5 EH/s.

The company focuses on developing and operating industrial-scale data centers for Bitcoin mining and HPC hosting, aiming to be a market leader in innovation and a hosting partner to major HPC companies. With Black Pearl Phase I steadily ramping production as new rigs continue delivery, Cipher maintains its dual strategy of Bitcoin accumulation through mining operations while building infrastructure capabilities for the expanding digital asset ecosystem.

The company's treasury management approach balances Bitcoin retention with operational cash flow needs, positioning Cipher as both a Bitcoin miner and strategic holder of the digital asset."""

In [None]:
import dspy
lm = dspy.LM('openai/gpt-5-mini',
             api_key=OPENAI_API_KEY,
             temperature= 1.0, 
             max_tokens=16000)
lm(messages=[{"role": "user", 
              "content": "What is this company? Price range in short run? Opening in early September, Coyote Creek will offer affordable single-family homes inFort Lupton\n\nFORT\xa0LUPTON, Colo.,Sept. 10, 2025/PRNewswire/ -- Century Communities, Inc. (NYSE:CCS)—a top national homebuilder, industry leader in online home sales, and featured on America's Most Trustworthy Companies and World's Most Trustworthy Companies by Newsweek—is excited to announce the Company's upcoming return to Coyote Creek inFort Lupton, boasting homesites located adjacent to the 18-hole Coyote Creek Golf Course.\n\nA Grand Opening celebration will take place onSaturday, September 13, from11 a.m. to 3 p.m.Attendees will enjoy light refreshments, the opportunity to win a prize giveaway, and will be among the first to tour the community's brand-new, professionally decorated model home—showcasing the community's two-story Empress floor plan.\n\nA Grand Opening celebration will take place onSaturday, September 13, from11 a.m. to 3 p.m.Attendees will enjoy light refreshments, the opportunity to win a prize giveaway, and will be among the first to tour the community's brand-new, professionally decorated model home—showcasing the community's two-story Empress floor plan.\n\nLearn more and join the community interest list atwww.CenturyCommunities.com/CoyoteCreekCO."}])


In [None]:
dspy.configure(
    lm=dspy.LM(
        model='openai/gpt-5-mini',
        api_key=OPENAI_API_KEY,
        temperature=1.0,
        max_tokens = 16000
    )
)

class price_prediction_based_on_news(dspy.Signature):
    """
    You are a senior finance analyst. I am passing you the news of a public company. 
    {
      "company": "company name or 'N/A'",
      "ticker": "company ticker or 'N/A'",
      "ai_comments": "focus on stock performance",
      "short_run_days": <float or NaN> number of days of the short run,
      "short_run_range_low_percent": <float or NaN> lower end prediction of Numeric value in percentage (%) only and can be negative,
      "short_run_range_high_percent": <float or NaN> higher end prediction of Numeric value in percentage (%) only and can be negative,
      "long_run_range_percent": <float or NaN> long run prediction of Numeric value in percentage (%) only and can be negative
    }
    Include ONLY occurrences that match the target concept using provided aliases and units.
    Do NOT include explanations; JSON array ONLY.
    """
    text: str = dspy.InputField(desc="source text snippet")
    records_json: str = dspy.OutputField(desc="JSON array of record objects; no prose")
    
pred = dspy.ChainOfThought(price_prediction_based_on_news)(text=test_news)

In [None]:
# raw = getattr(pred, "records_json", "[]") or "[]"
# arr = json.loads(raw)
# all_records = []
# if arr:
#     for item in arr:
#         output = {
#                     'company': item.get('company'),
#                     'ticker': item.get('ticker', np.nan),
#                     'short_run_days': item.get('short_run_days', np.nan),
#                     'short_run_range_low_percent': item.get('short_run_range_low_percent', np.nan),
#                     'short_run_range_high_percent': item.get('short_run_range_high_percent', np.nan),
#                     'long_run_range_percent': item.get('long_run_range_percent', np.nan),
#                     'ai_comments': item.get('ai_comments', np.nan)
#                     }
#         all_records.append(output)

In [None]:
# len(all_records)

In [None]:
# arr

In [None]:
print(df_base.shape[0])
df_focus = df_base[df_base['has_exchange'] == 1]
print(df_focus.shape[0])

In [None]:
print(df_base.shape[0])
df_focus = df_base[df_base['has_exchange'] == 1]
print(df_focus.shape[0])

cnt = 0
for idx, row in df_focus.iterrows():
    cnt += 1
    all_records = []
    if cnt <= 2:
        print(f"Index: {idx}")
        print(f"Title: {row['title']}")
        print(f"Link: {row['link']}")
        print("-" * 40)
        pred = dspy.ChainOfThought(price_prediction_based_on_news)(text=row['content'])
        raw = getattr(pred, "records_json", "[]") or "[]"
        arr = json.loads(raw)
        if arr:
            for item in arr:
                output = {  'date': row['date'],
                            'time': row['time'],
                            'title': row['title'],
                            'link': row['link'],
                            'content': row['content'],
                            'company': item.get('company'),
                            'ticker': item.get('ticker', np.nan),
                            'short_run_days': item.get('short_run_days', np.nan),
                            'short_run_range_low_percent': item.get('short_run_range_low_percent', np.nan),
                            'short_run_range_high_percent': item.get('short_run_range_high_percent', np.nan),
                            'long_run_range_percent': item.get('long_run_range_percent', np.nan),
                            'ai_comments': item.get('ai_comments', np.nan)
                            }
                all_records.append(output)
        continue
    else:
        break
    


In [None]:
all_records

In [None]:
df_focus.head()