In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time 

import pandas as pd 

In [2]:
browser = webdriver.Firefox() 
url = 'https://www.bbc.com/news'
browser.get(url)
print(f"Successfully visited: {browser.title}")


Successfully visited: BBC News - Breaking news, video and the latest top stories from the U.S. and around the world


In [5]:
browser.quit()

Removing Cookies 


In [3]:
browser.switch_to.frame(browser.find_element(By.CSS_SELECTOR, "iframe[id^='sp_message_iframe']"))

button1 = browser.find_element(By.CSS_SELECTOR, "button[title='I agree']")
button1.click()
browser.switch_to.default_content()


Changes Region-Section 


In [26]:
button2 = browser.find_element(By.LINK_TEXT , 'Europe')
button2.click()


In [13]:

def scrape_via_next_button(driver, num_pages):
    wait = WebDriverWait(driver, 10)
    data = []
    regions = ['US & Canada', 'UK', 'Africa', 'Asia', 'Australia', 'Europe', 'Latin America', 'Middle East']
    for region in regions :
        print (f'Currently proccessing {region}')
        try :
            region_btn = wait.until(EC.element_to_be_clickable((By.LINK_TEXT, region)))
            region_btn.click()
            time.sleep(3)
        except Exception as e:
            print(f"Could not click region '{region}'. Skipping... Error: {e}")
            continue

        for i in range(1, num_pages + 1):
            print(f"--- Processing Page {i} ---")
            # 1. SCRAPE THE CURRENT PAGE
            # (Your scraping logic goes here)
            links = browser.find_elements(By.TAG_NAME, 'a')
            for link in links:
                try:
                    # 1. Try to find a headline INSIDE this link
                    # (This will fail for menu buttons(headers,footers...), but work for article cards (trageted news "cards")
                    hd = link.find_element(By.CSS_SELECTOR, "[data-testid='card-headline']").text
                    summ= link.find_element(By.CSS_SELECTOR,"[data-testid='card-description']").text
                    url = link.get_attribute('href')
                    data.append  ({
                        'Headlines' : hd ,
                        'Summary' : summ,
                        'URL' : url})
    
                    # print(f"Headline: {hd}")
                    # print(f"Summary: {summ}")
                    # print(f"URL:    {url}")
                    # print("-" * 20)
                except:
                    # If the link doesn't have a headline inside it, just skip it!
                    continue    
            # 2. CLICK NEXT (Don't click on the very last page!)
            if i < num_pages:
                try:
                    # Find the 'Next' button (replace with actual ID/Class/Text)
                    next_btn = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,f"button[aria-label='Go to page {i+1}']")))
                    # Scroll to it if necessary
                    driver.execute_script("arguments[0].scrollIntoView();", next_btn)
                    next_btn.click()
                    # Wait for old content to disappear (optional but recommended)
                    time.sleep(2) 
                except Exception as e:
                    print(f"Could not find next button: {e}")
                    break
        
    df = pd.DataFrame(data) 
    return df
    



In [18]:
df = scrape_via_next_button(browser, 12)

Currently proccessing US & Canada
--- Processing Page 1 ---
--- Processing Page 2 ---
--- Processing Page 3 ---
--- Processing Page 4 ---
--- Processing Page 5 ---
--- Processing Page 6 ---
--- Processing Page 7 ---
--- Processing Page 8 ---
--- Processing Page 9 ---
--- Processing Page 10 ---
--- Processing Page 11 ---
--- Processing Page 12 ---
Currently proccessing UK
--- Processing Page 1 ---
--- Processing Page 2 ---
--- Processing Page 3 ---
--- Processing Page 4 ---
--- Processing Page 5 ---
--- Processing Page 6 ---
--- Processing Page 7 ---
--- Processing Page 8 ---
--- Processing Page 9 ---
--- Processing Page 10 ---
--- Processing Page 11 ---
--- Processing Page 12 ---
Currently proccessing Africa
--- Processing Page 1 ---
--- Processing Page 2 ---
--- Processing Page 3 ---
--- Processing Page 4 ---
--- Processing Page 5 ---
--- Processing Page 6 ---
--- Processing Page 7 ---
--- Processing Page 8 ---
--- Processing Page 9 ---
--- Processing Page 10 ---
--- Processing Page 1

In [19]:
df 

Unnamed: 0,Headlines,Summary,URL
0,Why haven't more Americans faced charges in th...,"Sarah Smith explains why so far, no Americans,...",https://www.bbc.com/news/videos/ce8w279kz6xo
1,Two sisters among those killed in Lake Tahoe a...,More is being learned about the victims even a...,https://www.bbc.com/news/articles/c4g02nxx2plo
2,USA superstar Liu wins women's skating Olympic...,American superstar Alysa Liu adds Olympic figu...,https://www.bbc.com/sport/articles/c33jz026mg3o
3,'Who's next?' - American lawmakers call for 'j...,US lawmakers urge their government to follow t...,https://www.bbc.com/news/articles/c86yj2vjp5go
4,US clinch ice hockey gold with overtime winner,Megan Keller scores in overtime to clinch Wint...,https://www.bbc.com/sport/ice-hockey/articles/...
...,...,...,...
2727,Watch: BBC in Tehran for first time since prot...,"Lyse Doucet reports from Iran, where she says ...",https://www.bbc.com/news/videos/c5yr82796j4o
2728,Watch: Inside Gaza hospital struggling to prov...,More aid has been allowed into Gaza since the ...,https://www.bbc.com/news/videos/c62wpd7wj3ro
2729,Inside Syrian camp holding wives and children ...,"Kurdish-run prisons hold about 8,000 suspected...",https://www.bbc.com/news/videos/c3ve0xd451go
2730,BBC visits UN compound Israel is demolishing i...,John Sudworth says the sounds of heavy machine...,https://www.bbc.com/news/videos/cj0n127y9eqo


In [20]:
print(df.duplicated())

0       False
1       False
2       False
3       False
4       False
        ...  
2727     True
2728     True
2729     True
2730     True
2731    False
Length: 2732, dtype: bool


In [None]:
## Remove duplicates based on the 'Headlines' column and reset the index
df_clean = df.drop_duplicates(subset=['Headlines']).reset_index(drop=True)


In [36]:
df_clean

Unnamed: 0,Headlines,Summary,URL
0,Why haven't more Americans faced charges in th...,"Sarah Smith explains why so far, no Americans,...",https://www.bbc.com/news/videos/ce8w279kz6xo
1,Two sisters among those killed in Lake Tahoe a...,More is being learned about the victims even a...,https://www.bbc.com/news/articles/c4g02nxx2plo
2,USA superstar Liu wins women's skating Olympic...,American superstar Alysa Liu adds Olympic figu...,https://www.bbc.com/sport/articles/c33jz026mg3o
3,'Who's next?' - American lawmakers call for 'j...,US lawmakers urge their government to follow t...,https://www.bbc.com/news/articles/c86yj2vjp5go
4,US clinch ice hockey gold with overtime winner,Megan Keller scores in overtime to clinch Wint...,https://www.bbc.com/sport/ice-hockey/articles/...
...,...,...,...
872,Iran leader says protesters are vandals trying...,The largest demonstrations in years have left ...,https://www.bbc.com/news/articles/c4g49djqqjgo
873,Protesters take to the streets of Tehran on Fr...,"The protests, now in their 13th night, erupted...",https://www.bbc.com/news/videos/cj6w82wg416o
874,Why are there huge protests going on in Iran?,Anti-government protests in Iran have continue...,https://www.bbc.com/news/videos/cje1v852evlo
875,Trump's warning looms over Iran protests response,Iranian authorities appear to be unusually res...,https://www.bbc.com/news/articles/c23r4yeyxl9o


In [37]:
df_clean.to_csv('BBC_news.csv', index=False,header=True)