## **Importing resources**

In [None]:
# Suppress FutureWarning messages to avoid cluttering the output
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
# Install the google-colab-selenium package to use Selenium within Google Colab
%pip install -q google-colab-selenium

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.3/486.3 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import google_colab_selenium as gs      # for setting up Selenium in Google Colab

import urllib.request                   # for opening and reading URLs
import requests                         # to make HTTP requests for retrieving web content
from bs4 import BeautifulSoup           # for parsing HTML and extracting data
import pandas as pd                     # for data manipulation in DataFrames

from datetime import date, datetime, timedelta               # to work with date objects
import time                             # for time-related functions like pausing execution
import sys                              # to manage system-specific parameters
import re                               # for working with regular expressions

import random

In [None]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Used 'showscreen' to display screenshots of the site while scrolling (might not be necessary)
# Shows ModuleNotFoundError: No module named 'selenium_profiles'
# from selenium_profiles.utils.colab_utils import display, showscreen, show_html

## **Mounting to Google Drive**

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
dir = '/content/gdrive/MyDrive/CSCI 199/Methodology/Datasets/Raw/'

## **Extract Source Function**

In [None]:
def extract_source(url):
    agent = {"User-Agent":"Chrome/105.0.0.0"}
    try:
      source=requests.get(url, headers=agent)
    except Exception as e:
        error_type, error_obj, error_info = sys.exc_info()
        print(f'ERROR FOR LINK: {url}')
        print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

    return source

# **News Scraping**

## **Abante News Online (03/2023 - 05/2023)**

### **Site Scraper**

In [None]:
# Define variables
site = "Abante"
abanteData = pd.DataFrame(columns=['Statement', 'Link', 'Date'])
total_articles_to_scrape = 5000  # Number of articles you want to scrape
articles_scraped = 0  # Counter for scraped articles
start_year = 2023  # Start year
start_month = 5  # Start month (May)

driver = gs.Chrome()  # Create WebDriver instance

def safe_get(driver, url, retries=3):
    for attempt in range(retries):
        try:
            print(f"Attempt {attempt + 1}/{retries} for URL: {url}")
            driver.get(url)

            # Increase delay to slow down requests
            wait_time = random.uniform(10, 30)  # Wait 10-30 seconds randomly
            print(f"Waiting {wait_time:.2f} seconds before next request...")
            time.sleep(wait_time)

            return True  # Successfully loaded
        except Exception as e:
            wait_time = (2 ** attempt) + random.uniform(5, 15)  # Exponential backoff
            print(f"Retry {attempt + 1}/{retries} failed: {e}. Retrying in {wait_time:.2f} sec...")
            time.sleep(wait_time)

    print(f"Skipping {url} after {retries} failed attempts.")
    return False  # Indicate failure

try:
    # Loop through years and months starting from the specified date, going backward
    for year in range(start_year, 2019, -1):  # Stop at a reasonable lower limit (e.g., 2019)
        for month in range(start_month if year == start_year else 12, 0, -1):  # Reverse month order
            current_month = f"{month:02d}"  # Format month as 01, 02, etc.
            page = 1

            while articles_scraped < total_articles_to_scrape:
                try:
                    print('Processing page:', page, 'Year:', year, 'Month:', current_month)
                    url = f'https://www.abante.com.ph/{year}/{current_month}/page/{page}'
                    print(f'URL: {url}')

                    # Navigate to the page
                    # driver.get(url)
                    # time.sleep(5)

                    # Use safe_get() instead of driver.get(url)
                    if not safe_get(driver, url):
                        page += 1 # Increment page number for the next iteration
                        continue  # Skip this page if it fails after retries

                    # Wait for articles to load
                    WebDriverWait(driver, 120).until(EC.visibility_of_element_located((By.CLASS_NAME, 'post')))

                    # Get the page source and parse it
                    source = driver.page_source
                    soup = BeautifulSoup(source, 'html.parser')
                    links = soup.find('div', {'class': 'page-content'}).find_all('article', {'class': 'post'})
                    print(f'Found {len(links)} articles on page {page}.')

                    if len(links) == 0:
                        # If no articles found, exit the loop for the current month
                        break

                    # Iterate over each article
                    rows = []
                    for j in links:
                        if articles_scraped >= total_articles_to_scrape:
                            break

                        try:
                            Statement = j.find('h2', {'class': 'entry-title'}).find('a').text.strip()
                            Link = j.find('h2', {'class': 'entry-title'}).find('a')['href'].strip()
                            Date = ''  # You can extract the date if available in the article or URL

                            # Append the article data to the rows
                            rows.append({'Statement': Statement, 'Link': Link, 'Date': Date})
                            articles_scraped += 1

                        except Exception as e:
                            error_type, error_obj, error_info = sys.exc_info()
                            print(f'ERROR FOR LINK: {url}')
                            print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

                    # Add rows to the DataFrame
                    rows_df = pd.DataFrame(rows)
                    abanteData = pd.concat([abanteData, rows_df], ignore_index=True)

                    # Increment page number for the next iteration
                    page += 1

                except Exception as e:
                    error_type, error_obj, error_info = sys.exc_info()
                    print(f'ERROR FOR LINK: {url}')
                    print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')
                    break

            # If article limit is reached, stop the outer loop
            if articles_scraped >= total_articles_to_scrape:
                break

        # Reset start month for subsequent years
        start_month = 12

except Exception as e:
    error_type, error_obj, error_info = sys.exc_info()
    print(f'ERROR: {error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

finally:
    # Close the driver
    driver.quit()

# Drop duplicates and reset index
abanteData.drop_duplicates(inplace=True)
abanteData.reset_index(drop=True, inplace=True)

# Save the file
filename = f"{dir}{site}_{date.today()}_NEWS_LinkList.csv"
abanteData.to_csv(filename, index=False)

# Display the first few rows
print(abanteData.head())

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processing page: 1 Year: 2023 Month: 05
URL: https://www.abante.com.ph/2023/05/page/1
Attempt 1/3 for URL: https://www.abante.com.ph/2023/05/page/1
Retry 1/3 failed: HTTPConnectionPool(host='localhost', port=45635): Read timed out. (read timeout=120). Retrying in 6.93 sec...
Attempt 2/3 for URL: https://www.abante.com.ph/2023/05/page/1
Waiting 18.29 seconds before next request...
Found 10 articles on page 1.
Processing page: 2 Year: 2023 Month: 05
URL: https://www.abante.com.ph/2023/05/page/2
Attempt 1/3 for URL: https://www.abante.com.ph/2023/05/page/2
Waiting 15.97 seconds before next request...
Found 10 articles on page 2.
Processing page: 3 Year: 2023 Month: 05
URL: https://www.abante.com.ph/2023/05/page/3
Attempt 1/3 for URL: https://www.abante.com.ph/2023/05/page/3
Waiting 19.73 seconds before next request...
Found 10 articles on page 3.
Processing page: 4 Year: 2023 Month: 05
URL: https://www.abante.com.ph/2023/05/page/4
Attempt 1/3 for URL: https://www.abante.com.ph/2023/05/pag

### **Article Scraper (Test)**

In [None]:
# Test article scraper. Working.

url = 'https://www.abante.com.ph/2024/08/31/inflation-pinabagal-ng-malakas-na-piso/'

soup = BeautifulSoup(extract_source(url).text, 'html.parser')

Section = soup.find('div', {'class': 'elementor-heading-title elementor-size-default'}).text.strip()
print(Section)

Title = soup.find('h1',{'class':'elementor-heading-title elementor-size-default'}).text.strip()
print(Title)

Author = soup.find('div', {'class':'elementor-element elementor-element-0255291 elementor-widget elementor-widget-heading'}).find('div').find('h2').text.strip()
print(Author)

Date = soup.find('span', {'class': 'elementor-icon-list-text elementor-post-info__item elementor-post-info__item--type-date'}).find('time').text.strip()
print(Date)

textList =soup.find('div', attrs={'data-widget_type':'theme-post-content.default'}).find('div').find_all("p")
body = ""
for t in textList:
    body += (t.text) +"\n"
print(body)

NEWS
Inflation pinabagal ng malakas na piso
Abante News
August 31, 2024
Maaaring bumagal ang pagtaas ng inflation nitong Agosto dahil sa malakas na piso, pagbaba sa presyo ng produktong petrolyo at mas murang bigas, karne at isda, ayon sa pagtaya ng Bangko Sentral ng Pilipinas (BSP).
Sa tantiya ng BSP, ang inflation nitong Agosto ay naglalaro lamang sa pagitan ng 3.2 hanggang 4 percent, mas mababa sa 4.4 percent naitala noong Hulyo.
Malalaman lamang ito kapag nilabas na ng Philippine Statistics Authority (PSA) ang opisyal na inflation data sa Setyembre 5.
“Higher electricity rates and higher prices for agricultural commodities, owing to unfavorable weather conditions, are the primary sources of upward price pressures for the month,” ayon sa BSP.
“These factors are expected to be offset by lower domestic oil prices as well as lower rice, fish and meat prices along with the peso appreciation,” dugtong pa nila.
Kung magkakatotoo ang prediksiyon ng BSP, magkakaroon ng katuwiran ang desisyo

### **Article Scraper**

In [None]:
def abanteArticleScraper(url):
    try:
        soup=BeautifulSoup(extract_source(url).text, 'html.parser')
        row=[]

        Section = soup.find('div', {'class': 'elementor-heading-title elementor-size-default'}).text.strip()
        Title = soup.find('h1',{'class':'elementor-heading-title elementor-size-default'}).text.strip()

        try:
            Author = soup.find('div', {'class':'elementor-element elementor-element-0255291 elementor-widget elementor-widget-heading'}).find('div').find('h2').text.strip()
        except AttributeError:
            Author = ""
        except Exception as e:
          error_type, error_obj, error_info = sys.exc_info()
          print(f'ERROR FOR LINK: {url}')
          print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')
          Author = ""

        Date = soup.find('span', {'class': 'elementor-icon-list-text elementor-post-info__item elementor-post-info__item--type-date'}).find('time').text.strip()

        textList =soup.find('div', attrs={'data-widget_type':'theme-post-content.default'}).find('div').find_all("p")
        body = ""
        for t in textList:
            body += (t.text) +"\n"

        row.extend((Section, Title, Author, Date, body))

    except Exception as e:
        error_type, error_obj, error_info = sys.exc_info()
        print(f'ERROR FOR LINK: {url}')
        print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

    return row

site = "Abante"
filename= f"{dir+site}_{date.today()}_NEWS.csv"

df = pd.read_csv(f'{dir}{site}_{date.today()}_NEWS_LinkList.csv')
rows_list = []

for index, r in df.iterrows():
  url = r["Link"]
  print(index, url)
  row = abanteArticleScraper(url)
  rows_list.append(row)

abanteData2 = pd.DataFrame(rows_list, columns=["Section", "Title", "Author", "Date", "Text"])

# Save file
abanteData2.to_csv(filename)
abanteData2.head()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
174 https://www.abante.com.ph/2023/05/29/juday-bet-ipalit-kay-charo-sa-mmk/
175 https://www.abante.com.ph/2023/05/29/britney-spears-nanay-nagkaayos-na/
176 https://www.abante.com.ph/2023/05/29/chie-hinampas-ng-alon/
177 https://www.abante.com.ph/2023/05/29/ivana-namigay-ng-datung-grocery-sa-mga-di-mahadera/
178 https://www.abante.com.ph/2023/05/29/richard-bibitbitin-si-sarah-sa-dubai-uk-at-madrid/
179 https://www.abante.com.ph/2023/05/29/rex-kinabog-mga-kalaban-sa-the-clash/
180 https://www.abante.com.ph/2023/05/29/romnick-hanga-sa-pagiging-marespeto-ng-kathniel/
181 https://www.abante.com.ph/2023/05/29/james-reid-alden-richards-bet-tikman-ng-seksing-tiktoker/
182 https://www.abante.com.ph/2023/05/29/abante-front-page-balita-ngayong-mayo-29-2023/
ERROR FOR LINK: https://www.abante.com.ph/2023/05/29/abante-front-page-balita-ngayong-mayo-29-2023/
AttributeError occurred on Line 21: 'NoneType' object has no attribute 'find'


Unnamed: 0,Section,Title,Author,Date,Text
0,ABANTV,AbanTV – Showbiz | Planong paglipat ng Eat Bul...,,"May 31, 2023","PLANONG PAGLIPAT NG EAT BULAGA SA TV5, SUMINGA..."
1,NEWS,Mga negosyante mula Bangkok type mamuhunan sa PH,Abante News,"May 31, 2023",Maglalagak ng $2.5 billion o P140 bilyon na pa...
2,NEWS,Marcos nanawagan na pabakunahan mga tsikiting,Abante News,"May 31, 2023",Nanawagan si Pangulong Ferdinand “Bongbong” Ma...
3,NEWS,Tulfo nakahanda 120 panukalang batas,Abante News,"May 31, 2023",Itutulak ni ACT-CIS party-list Rep. Erwin Tulf...
4,NEWS,BBM inarbor mga ‘tambaloslos’ kay Sara,Abante News,"May 31, 2023",NAGPAABOT ng pagbati si Pangulong Ferdinand “B...


## **Abante News Online (??/20?? - 02/2023)**

### **Site Scraper**

In [None]:
# Define variables
site = "Abante"
abanteData = pd.DataFrame(columns=['Statement', 'Link', 'Date'])
total_articles_to_scrape = 6500  # Number of articles you want to scrape
articles_scraped = 0  # Counter for scraped articles
start_year = 2023  # Start year
start_month = 2  # Start month (February)

driver = gs.Chrome()  # Create WebDriver instance

def safe_get(driver, url, retries=3):
    for attempt in range(retries):
        try:
            print(f"Attempt {attempt + 1}/{retries} for URL: {url}")
            driver.get(url)

            # Increase delay to slow down requests
            wait_time = random.uniform(10, 30)  # Wait 10-30 seconds randomly
            print(f"Waiting {wait_time:.2f} seconds before next request...")
            time.sleep(wait_time)

            return True  # Successfully loaded
        except Exception as e:
            wait_time = (2 ** attempt) + random.uniform(5, 15)  # Exponential backoff
            print(f"Retry {attempt + 1}/{retries} failed: {e}. Retrying in {wait_time:.2f} sec...")
            time.sleep(wait_time)

    print(f"Skipping {url} after {retries} failed attempts.")
    return False  # Indicate failure

try:
    # Loop through years and months starting from the specified date, going backward
    for year in range(start_year, 2019, -1):  # Stop at a reasonable lower limit (e.g., 2019)
        for month in range(start_month if year == start_year else 12, 0, -1):  # Reverse month order
            current_month = f"{month:02d}"  # Format month as 01, 02, etc.
            page = 1

            while articles_scraped < total_articles_to_scrape:
                try:
                    print('Processing page:', page, 'Year:', year, 'Month:', current_month)
                    url = f'https://www.abante.com.ph/{year}/{current_month}/page/{page}'
                    print(f'URL: {url}')

                    # Navigate to the page
                    # driver.get(url)
                    # time.sleep(5)

                    # Use safe_get() instead of driver.get(url)
                    if not safe_get(driver, url):
                        page += 1 # Increment page number for the next iteration
                        continue  # Skip this page if it fails after retries

                    # Wait for articles to load
                    WebDriverWait(driver, 120).until(EC.visibility_of_element_located((By.CLASS_NAME, 'post')))

                    # Get the page source and parse it
                    source = driver.page_source
                    soup = BeautifulSoup(source, 'html.parser')
                    links = soup.find('div', {'class': 'page-content'}).find_all('article', {'class': 'post'})
                    print(f'Found {len(links)} articles on page {page}.')

                    if len(links) == 0:
                        # If no articles found, exit the loop for the current month
                        break

                    # Iterate over each article
                    rows = []
                    for j in links:
                        if articles_scraped >= total_articles_to_scrape:
                            break

                        try:
                            Statement = j.find('h2', {'class': 'entry-title'}).find('a').text.strip()
                            Link = j.find('h2', {'class': 'entry-title'}).find('a')['href'].strip()
                            Date = ''  # You can extract the date if available in the article or URL

                            # Append the article data to the rows
                            rows.append({'Statement': Statement, 'Link': Link, 'Date': Date})
                            articles_scraped += 1

                        except Exception as e:
                            error_type, error_obj, error_info = sys.exc_info()
                            print(f'ERROR FOR LINK: {url}')
                            print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

                    # Add rows to the DataFrame
                    rows_df = pd.DataFrame(rows)
                    abanteData = pd.concat([abanteData, rows_df], ignore_index=True)

                    # Increment page number for the next iteration
                    page += 1

                except Exception as e:
                    error_type, error_obj, error_info = sys.exc_info()
                    print(f'ERROR FOR LINK: {url}')
                    print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')
                    break

            # If article limit is reached, stop the outer loop
            if articles_scraped >= total_articles_to_scrape:
                break

        # Reset start month for subsequent years
        start_month = 12

except Exception as e:
    error_type, error_obj, error_info = sys.exc_info()
    print(f'ERROR: {error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

finally:
    # Close the driver
    driver.quit()

# Drop duplicates and reset index
abanteData.drop_duplicates(inplace=True)
abanteData.reset_index(drop=True, inplace=True)

# Save the file
filename = f"{dir}{site}_{date.today()}_NEWS_LinkList.csv"
abanteData.to_csv(filename, index=False)

# Display the first few rows
print(abanteData.head())

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Processing page: 1 Year: 2023 Month: 02
URL: https://www.abante.com.ph/2023/02/page/1
Attempt 1/3 for URL: https://www.abante.com.ph/2023/02/page/1
Retry 1/3 failed: HTTPConnectionPool(host='localhost', port=49099): Read timed out. (read timeout=120). Retrying in 7.60 sec...
Attempt 2/3 for URL: https://www.abante.com.ph/2023/02/page/1
Waiting 24.88 seconds before next request...
Found 10 articles on page 1.
Processing page: 2 Year: 2023 Month: 02
URL: https://www.abante.com.ph/2023/02/page/2
Attempt 1/3 for URL: https://www.abante.com.ph/2023/02/page/2
Waiting 28.70 seconds before next request...
Found 10 articles on page 2.
Processing page: 3 Year: 2023 Month: 02
URL: https://www.abante.com.ph/2023/02/page/3
Attempt 1/3 for URL: https://www.abante.com.ph/2023/02/page/3
Waiting 20.75 seconds before next request...
Found 10 articles on page 3.
Processing page: 4 Year: 2023 Month: 02
URL: https://www.abante.com.ph/2023/02/page/4
Attempt 1/3 for URL: https://www.abante.com.ph/2023/02/pag

### **Article Scraper (Test)**

In [None]:
# Test article scraper. Working.

url = 'https://www.abante.com.ph/2024/08/31/inflation-pinabagal-ng-malakas-na-piso/'

soup = BeautifulSoup(extract_source(url).text, 'html.parser')

Section = soup.find('div', {'class': 'elementor-heading-title elementor-size-default'}).text.strip()
print(Section)

Title = soup.find('h1',{'class':'elementor-heading-title elementor-size-default'}).text.strip()
print(Title)

Author = soup.find('div', {'class':'elementor-element elementor-element-0255291 elementor-widget elementor-widget-heading'}).find('div').find('h2').text.strip()
print(Author)

Date = soup.find('span', {'class': 'elementor-icon-list-text elementor-post-info__item elementor-post-info__item--type-date'}).find('time').text.strip()
print(Date)

textList =soup.find('div', attrs={'data-widget_type':'theme-post-content.default'}).find('div').find_all("p")
body = ""
for t in textList:
    body += (t.text) +"\n"
print(body)

NEWS
Inflation pinabagal ng malakas na piso
Abante News
August 31, 2024
Maaaring bumagal ang pagtaas ng inflation nitong Agosto dahil sa malakas na piso, pagbaba sa presyo ng produktong petrolyo at mas murang bigas, karne at isda, ayon sa pagtaya ng Bangko Sentral ng Pilipinas (BSP).
Sa tantiya ng BSP, ang inflation nitong Agosto ay naglalaro lamang sa pagitan ng 3.2 hanggang 4 percent, mas mababa sa 4.4 percent naitala noong Hulyo.
Malalaman lamang ito kapag nilabas na ng Philippine Statistics Authority (PSA) ang opisyal na inflation data sa Setyembre 5.
“Higher electricity rates and higher prices for agricultural commodities, owing to unfavorable weather conditions, are the primary sources of upward price pressures for the month,” ayon sa BSP.
“These factors are expected to be offset by lower domestic oil prices as well as lower rice, fish and meat prices along with the peso appreciation,” dugtong pa nila.
Kung magkakatotoo ang prediksiyon ng BSP, magkakaroon ng katuwiran ang desisyo

### **Article Scraper**

In [None]:
def abanteArticleScraper(url):
    try:
        soup=BeautifulSoup(extract_source(url).text, 'html.parser')
        row=[]

        Section = soup.find('div', {'class': 'elementor-heading-title elementor-size-default'}).text.strip()
        Title = soup.find('h1',{'class':'elementor-heading-title elementor-size-default'}).text.strip()

        try:
            Author = soup.find('div', {'class':'elementor-element elementor-element-0255291 elementor-widget elementor-widget-heading'}).find('div').find('h2').text.strip()
        except AttributeError:
            Author = ""
        except Exception as e:
          error_type, error_obj, error_info = sys.exc_info()
          print(f'ERROR FOR LINK: {url}')
          print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')
          Author = ""

        Date = soup.find('span', {'class': 'elementor-icon-list-text elementor-post-info__item elementor-post-info__item--type-date'}).find('time').text.strip()

        textList =soup.find('div', attrs={'data-widget_type':'theme-post-content.default'}).find('div').find_all("p")
        body = ""
        for t in textList:
            body += (t.text) +"\n"

        row.extend((Section, Title, Author, Date, body))

    except Exception as e:
        error_type, error_obj, error_info = sys.exc_info()
        print(f'ERROR FOR LINK: {url}')
        print(f'{error_type.__name__} occurred on Line {error_info.tb_lineno}: {e}')

    return row

site = "Abante"
filename= f"{dir+site}_{date.today()}_NEWS.csv"

df = pd.read_csv(f'{dir}{site}_{date.today()}_NEWS_LinkList.csv')
rows_list = []

for index, r in df.iterrows():
  url = r["Link"]
  print(index, url)
  row = abanteArticleScraper(url)
  rows_list.append(row)

abanteData2 = pd.DataFrame(rows_list, columns=["Section", "Title", "Author", "Date", "Text"])

# Save file
abanteData2.to_csv(filename)
abanteData2.head()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
1686 https://www.abante.com.ph/2023/01/29/dedma-sa-patutsada-ni-bea-julia-gerald-todo-yakapan-halikan-sa-ig/
1687 https://www.abante.com.ph/2023/01/29/ginto-sa-france-ej-obiena-patuloy-ang-bagsik-sa-2023/
1688 https://www.abante.com.ph/2023/01/29/umanib-sa-uniteam-bamboo-isinusuka-ng-kakampink/
1689 https://www.abante.com.ph/2023/01/29/aspin-ginawang-vendor-ng-tinapa/
1690 https://www.abante.com.ph/2023/01/29/kahit-40-anyos-na-nonito-donaire-jr-may-ibubuga-pa-rin/
1691 https://www.abante.com.ph/2023/01/29/staff-pinababa-ng-senador-sa-chopper-sa-delikadong-lugar/
1692 https://www.abante.com.ph/2023/01/29/jessy-luis-niyabang-ganda-ni-isabella/
1693 https://www.abante.com.ph/2023/01/29/joel-embiid-atakeng-mvp-kontra-denver/
1694 https://www.abante.com.ph/2023/01/29/gm-wesley-so-tabla-uli-sa-round-12-ng-tata-steel/
1695 https://www.abante.com.ph/2023/01/29/daniel-herbert-binawalang-dumikit-kina-kathryn-ruffa/
1696 https://www

Unnamed: 0,Section,Title,Author,Date,Text
0,NEWS,Transport group: Ginipit nila kami! Bautista t...,Abante News,"February 28, 2023",Sinisisi ng mga transport group si Department ...
1,NEWS,Bagitong mambabatas hilig magpalibre,Abante News,"February 28, 2023",Sino raw itong mambabatas ang hilig magpalibre...
2,NEWS,"Makati, Parañaque, Pasay 3 araw walang tubig",Abante News,"February 28, 2023",Makakaranas ng water interruption sa loob ng 3...
3,NEWS,‘Pinas kikita ng P8B sa durian export,Abante News,"February 28, 2023",Tinatayang aabot sa $150 milyon o higit P8 bil...
4,NEWS,Trabaho sa mga purdoy isasabatas,Abante News,"February 28, 2023",Inaprubahan ng House committee on rural develo...
