In [1]:
from dotenv import load_dotenv
load_dotenv()  # loads .env into os.environ

import os
print(os.getenv("LANGSMITH_PROJECT"))

news-agent


In [6]:
from typing import List,Optional

from pydantic import BaseModel, Field


class Company(BaseModel):
    """Extracted data about company's news."""

    # Creates a model so that we can extract multiple entities.
    name: Optional[str] = Field(default=None, description="Name of the company")
    ticker: Optional[str] = Field(default=None, description="Ticker symbol of the company")
    title: Optional[str] = Field(default=None, description="Title of the news article")
    description: List[str] = Field(default=None, description="News content description below the title")
    link: Optional[str] = Field(default=None, description="Link to the news article")

class News(BaseModel):
    """Extracted data about news."""
    company: List[Company] = Field(
        default=None, description="Extracted data about companies' news."
    )

In [7]:
from scrapegraphai.graphs import SmartScraperMultiGraph
from datetime import date

link = "https://theedgemalaysia.com/categories/Corporate"
smart_scraper_multi_graph = SmartScraperMultiGraph(
   prompt=f"""You are a website scraper and you are now asked to extract news that is related to Malaysia/ Bursa Related news from the link provided. Link is {link}. You should only from today's date only. 
   Date for today is {date.today().strftime("%Y-%m-%d")}.
   Clues that shows that the news are Malaysia/ Bursa related: 
   1. It is being published by theedgemalaysia.com.
   2. The news is about companies listed in Bursa.
   
   Clues that shows that the news are NOT Malaysia/ Bursa related:
   1. The news is about companies not listed in Bursa.
   2. The news is about countries other than Malaysia.

   Extract the news that is related to Malaysia/ Bursa Related news only.
   """,
   source= [
        "https://theedgemalaysia.com/categories/Corporate"
     ],
   config=
         {"llm": {"model": "openai/gpt-5-nano"}, "headless": False, "verbose": True, 
            "loader_kwargs":{
            # Pagination / click control (new, supported by ChromiumLoader after patch)
            "click_selectors": [
               "div.LoadMoreButton_btnWrapper__CtkKX",
               "div.LoadMoreButton_btnWrapper__CtkKX > span",
               "text='Load More'",
               "text='Load More'"
            ],
            "max_clicks": 3,
            "item_selector": "div.NewsList_newsListItemWrap__XovMP",
            "wait_after_click": 1000,  # milliseconds
            }
         },
   schema=News,
)

In [4]:
# cell 1 - restart kernel before running this cell
import sys, asyncio
if sys.platform.startswith("win"):
    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())

import nest_asyncio
nest_asyncio.apply()

# now import/play with your code that uses Playwright
result = smart_scraper_multi_graph.run()

--- Executing GraphIterator Node with batchsize 16 ---
processing graph instances:   0%|          | 0/1 [00:00<?, ?it/s]--- Executing Fetch Node ---
--- (Fetching HTML from: https://theedgemalaysia.com/categories/Corporate) ---
--- Executing ParseNode Node ---
--- Executing GenerateAnswer Node ---
processing graph instances: 100%|██████████| 1/1 [00:52<00:00, 52.97s/it]
--- Executing MergeAnswers Node ---


{'company': [{'name': 'Lianson Fleet Group Bhd', 'ticker': 'NA', 'description': ['Brokers Digest: Local Equities - Lianson Fleet Group Bhd, UWC Bhd, EWI Capital Bhd, ViTrox Corp Bhd Lianson Fleet Group Bhd Fair value RM2.40 BUY.'], 'link': 'https://theedgemalaysia.com/node/771196'}, {'name': 'UWC Bhd', 'ticker': 'NA', 'description': ['Brokers Digest: Local Equities - Lianson Fleet Group Bhd, UWC Bhd, EWI Capital Bhd, ViTrox Corp Bhd Lianson Fleet Group Bhd Fair value RM2.40 BUY.'], 'link': 'https://theedgemalaysia.com/node/771196'}, {'name': 'EWI Capital Bhd', 'ticker': 'NA', 'description': ['Brokers Digest: Local Equities - Lianson Fleet Group Bhd, UWC Bhd, EWI Capital Bhd, ViTrox Corp Bhd Lianson Fleet Group Bhd Fair value RM2.40 BUY.'], 'link': 'https://theedgemalaysia.com/node/771196'}, {'name': 'ViTrox Corp Bhd', 'ticker': 'KL:VITROX', 'description': ['Brokers Digest: Local Equities - Lianson Fleet Group Bhd, UWC Bhd, EWI Capital Bhd, ViTrox Corp Bhd Lianson Fleet Group Bhd Fair v

In [9]:
result

{'company': [{'name': 'ViTrox Corp Bhd',
   'ticker': 'KL:VITROX',
   'description': ['Brokers Digest: Local Equities - Lianson Fleet Group Bhd, UWC Bhd, EWI Capital Bhd, ViTrox Corp Bhd Lianson Fleet Group Bhd Fair value RM2.40 BUY.'],
   'link': 'https://theedgemalaysia.com/node/771196'},
  {'name': 'Panasonic Manufacturing Malaysia Bhd',
   'ticker': 'KL:PANAMY',
   'description': ['Panasonic Manufacturing Malaysia seeks new edge amid price wars.'],
   'link': 'https://theedgemalaysia.com/node/771164'},
  {'name': 'Gamuda Bhd',
   'ticker': 'KL:GAMUDA',
   'description': ['HLIB: Singapore private condo project could add up to RM520m in profits to Gamuda.'],
   'link': 'https://theedgemalaysia.com/node/772104'},
  {'name': 'Selangor Dredging Bhd',
   'ticker': 'KL:SDRED',
   'description': ['Selangor Dredging’s landed mid-rise project in Taman Melawati 80% taken up.'],
   'link': 'https://theedgemalaysia.com/node/772101'},
  {'name': 'Eco World Development Group Bhd',
   'ticker': 'K

In [None]:
print(f"Found {len(result['company'])} items to extract (sample url: {result['company'][0].get('link') if result.get('company') else 'N/A'})")
from extract_original_news import extract_from_results

df = extract_from_results(result, output_xlsx='scraper_originals.xlsx')
print('Extraction finished. Preview:')
display(df.head())

Found 8 items to extract (sample url: https://theedgemalaysia.com/node/771196)


Fetching: 100%|██████████| 8/8 [00:08<00:00,  1.10s/it]
2025-09-30 22:35:06,374 INFO Saved output to scraper_originals.xlsx (rows=8)

2025-09-30 22:35:06,374 INFO Saved output to scraper_originals.xlsx (rows=8)


Extraction finished. Preview:


Unnamed: 0,url,status_code,title,text,publication_date,source_domain,notes,source_pdf,company_name,company_ticker,orig_description
0,https://theedgemalaysia.com/node/771196,,"Lianson Fleet Group Bhd, UWC Bhd, EWI Capital ...","This article first appeared in Capital, The Ed...",2025-09-30T14:30:00+08:00,theedgemalaysia.com,newspaper3k,,ViTrox Corp Bhd,KL:VITROX,[Brokers Digest: Local Equities - Lianson Flee...
1,https://theedgemalaysia.com/node/771164,,Panasonic Manufacturing Malaysia seeks new edg...,This article first appeared in The Edge Malays...,2025-09-30T15:00:00+08:00,theedgemalaysia.com,newspaper3k,,Panasonic Manufacturing Malaysia Bhd,KL:PANAMY,[Panasonic Manufacturing Malaysia seeks new ed...
2,https://theedgemalaysia.com/node/772104,,HLIB: Singapore private condo project could ad...,KUALA LUMPUR (Sept 30): Gamuda Bhd (KL:GAMUDA)...,2025-09-30T11:36:34+08:00,theedgemalaysia.com,newspaper3k,,Gamuda Bhd,KL:GAMUDA,[HLIB: Singapore private condo project could a...
3,https://theedgemalaysia.com/node/772101,,Selangor Dredging’s landed mid-rise project in...,KUALA LUMPUR (Sept 30): Selangor Dredging Bhd ...,2025-09-30T11:22:22+08:00,theedgemalaysia.com,newspaper3k,,Selangor Dredging Bhd,KL:SDRED,[Selangor Dredging’s landed mid-rise project i...
4,https://theedgemalaysia.com/node/772116,,EcoWorld signs two MOUs to bring Japan SMEs to...,KUALA LUMPUR (Sept 30): Eco World Development ...,2025-09-30T12:27:57+08:00,theedgemalaysia.com,newspaper3k,,Eco World Development Group Bhd,KL:ECOWLD,[Eco World signs two MOUs to bring Japan SMEs ...


: 