In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_imdb_top_100_indian_movies(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        movie_container = soup.find('div', class_='lister-list')
        
        names = []
        ratings = []
        years = []
        
        for movie in movie_container.find_all('div', class_='lister-item-content'):
            name = movie.h3.a.text.strip()
            names.append(name)
            
            rating = movie.find('span', class_='ipl-rating-star__rating').text.strip()
            ratings.append(float(rating))
            
            year = movie.h3.find('span', class_='lister-item-year').text.strip('()')
            years.append(year)
        
        df = pd.DataFrame({'Name': names, 'Rating': ratings, 'Year': years})
        
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

url = "https://www.imdb.com/list/ls056092300/"
imdb_top_100_df = scrape_imdb_top_100_indian_movies(url)
print(imdb_top_100_df)

                                 Name  Rating  Year
0                     Ship of Theseus     8.0  2012
1                              Iruvar     8.4  1997
2                     Kaagaz Ke Phool     7.8  1959
3   Lagaan: Once Upon a Time in India     8.1  2001
4                     Pather Panchali     8.2  1955
..                                ...     ...   ...
95                        Apur Sansar     8.4  1959
96                        Kanchivaram     8.2  2008
97                    Monsoon Wedding     7.3  2001
98                              Black     8.1  2005
99                            Deewaar     8.0  1975

[100 rows x 3 columns]


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_peachmode_products(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        product_container = soup.find_all('div', class_='product-item')
        
        names = []
        prices = []
        discounts = []
        
        for product in product_container:
            name = product.find('a', class_='product-title').text.strip()
            names.append(name)
            
            price = product.find('span', class_='money').text.strip()
            prices.append(price)
            
            discount_tag = product.find('span', class_='discount')
            discount = discount_tag.text.strip() if discount_tag else 'No Discount'
            discounts.append(discount)
        
        df = pd.DataFrame({'Product Name': names, 'Price': prices, 'Discount': discounts})
        
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

url = "https://peachmode.com/search?q=bags"
peachmode_products_df = scrape_peachmode_products(url)
print(peachmode_products_df)


Empty DataFrame
Columns: [Product Name, Price, Discount]
Index: []


In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_odi_teams_rankings(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        teams_data = []
        for team in soup.find_all('tr', class_='rankings-block__banner')[:1] + soup.find_all('tr', class_='table-body')[:9]:
            team_name = team.find('span', class_='u-hide-phablet').text.strip()
            matches = team.find_all('td', class_='table-body__cell')[0].text.strip()
            points = team.find_all('td', class_='table-body__cell')[1].text.strip()
            rating = team.find('td', class_='table-body__cell u-text-right rating').text.strip()
            
            teams_data.append({'Team': team_name, 'Matches': matches, 'Points': points, 'Rating': rating})
        
        df = pd.DataFrame(teams_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

def scrape_odi_batsmen_rankings(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        batsmen_data = []
        for batsman in soup.find_all('tr', class_='rankings-block__banner')[:1] + soup.find_all('tr', class_='table-body')[:9]:
            player_name = batsman.find('div', class_='rankings-block__banner--name').text.strip()
            team = batsman.find('div', class_='rankings-block__banner--nationality').text.strip()
            rating = batsman.find('div', class_='rankings-block__banner--rating').text.strip()
            
            batsmen_data.append({'Player': player_name, 'Team': team, 'Rating': rating})
        
        df = pd.DataFrame(batsmen_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

def scrape_odi_bowlers_rankings(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        bowlers_data = []
        for bowler in soup.find_all('tr', class_='rankings-block__banner')[:1] + soup.find_all('tr', class_='table-body')[:9]:
            player_name = bowler.find('div', class_='rankings-block__banner--name').text.strip()
            team = bowler.find('div', class_='rankings-block__banner--nationality').text.strip()
            rating = bowler.find('div', class_='rankings-block__banner--rating').text.strip()
            
            bowlers_data.append({'Player': player_name, 'Team': team, 'Rating': rating})
        
        df = pd.DataFrame(bowlers_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Example usage:
odi_teams_url = "https://www.icc-cricket.com/rankings/mens/team-rankings/odi"
odi_batsmen_url = "https://www.icc-cricket.com/rankings/mens/player-rankings/odi/batting"
odi_bowlers_url = "https://www.icc-cricket.com/rankings/mens/player-rankings/odi/bowling"

odi_teams_df = scrape_odi_teams_rankings(odi_teams_url)
odi_batsmen_df = scrape_odi_batsmen_rankings(odi_batsmen_url)
odi_bowlers_df = scrape_odi_bowlers_rankings(odi_bowlers_url)

# Display the DataFrames
print("ODI Teams Rankings:")
print(odi_teams_df)

print("\nODI Batsmen Rankings:")
print(odi_batsmen_df)

print("\nODI Bowlers Rankings:")
print(odi_bowlers_df)


ODI Teams Rankings:
Empty DataFrame
Columns: []
Index: []

ODI Batsmen Rankings:
Empty DataFrame
Columns: []
Index: []

ODI Bowlers Rankings:
Empty DataFrame
Columns: []
Index: []


In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_patreon_posts(url):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        posts_data = []
        
        # Iterate through each post
        for post in soup.find_all('div', class_='post-container'):
            # Get post heading
            heading = post.find('h2', class_='post-title').text.strip()
            
            # Get post date
            date = post.find('time', class_='post-date')['datetime'].strip()
            
            # Get post content
            content = post.find('div', class_='post-description').text.strip()
            
            # Get the link for the YouTube video
            youtube_link = post.find('a', class_='post-attachment')['href']
            
            # Extract likes for the associated YouTube video
            youtube_likes = scrape_youtube_likes(youtube_link)
            
            posts_data.append({'Heading': heading, 'Date': date, 'Content': content, 'YouTube Likes': youtube_likes})
        
        df = pd.DataFrame(posts_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

def scrape_youtube_likes(youtube_link):
    # Send a GET request to the YouTube link
    response = requests.get(youtube_link)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract likes from the YouTube page
        likes = soup.find('button', class_='yt-uix-button-icon yt-uix-button-icon-subscribe yt-uix-button-icon-large').text.strip()
        
        return likes
    
    else:
        print(f"Failed to retrieve YouTube likes. Status code: {response.status_code}")
        return None

# Example usage:
patreon_url = "https://www.patreon.com/coreyms"
patreon_posts_df = scrape_patreon_posts(patreon_url)

# Display the DataFrame
print("Patreon Posts:")
print(patreon_posts_df)

Patreon Posts:
Empty DataFrame
Columns: []
Index: []


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_nobroker_houses(url, localities):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        houses_data = []
        
        for locality in localities:
            locality_url = f"{url}property/sale/{locality}"
            locality_response = requests.get(locality_url)
            
            if locality_response.status_code == 200:
                locality_soup = BeautifulSoup(locality_response.text, 'html.parser')
                
                for house in locality_soup.find_all('div', class_='card'):
                    title = house.find('h2', class_='heading-6').text.strip()
                    
                    location = house.find('div', class_='nb__2NPHR').text.strip()
                    
                    area = house.find('div', class_='nb__3oNyC').text.strip()
                    
                    emi_tag = house.find('div', class_='font-semi-bold heading-6', text='₹')
                    emi = emi_tag.text.strip() if emi_tag else 'N/A'
                    
                    price = house.find('div', class_='nb__2NPHR').find_next_sibling('div', class_='font-semi-bold heading-6').text.strip()
                    
                    houses_data.append({'Title': title, 'Location': location, 'Area': area, 'EMI': emi, 'Price': price})
            
            else:
                print(f"Failed to retrieve data for {locality}. Status code: {locality_response.status_code}")
        
        df = pd.DataFrame(houses_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Example usage:
nobroker_url = "https://www.nobroker.in/"
localities_list = ['indira-nagar', 'jayanagar', 'rajaji-nagar']

nobroker_houses_df = scrape_nobroker_houses(nobroker_url, localities_list)

print("Nobroker Houses Details:")
print(nobroker_houses_df)

Nobroker Houses Details:
Empty DataFrame
Columns: []
Index: []


In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_bewakoof_bestsellers(url, num_products=10):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        products_data = []
        
        # Iterate through the first 10 product containers
        for product in soup.find_all('div', class_='productCardWrapper')[:num_products]:
            # Get product name
            name = product.find('div', class_='productCardDetail').h3.text.strip()
            
            # Get product price
            price = product.find('div', class_='productCardDetail').find('span', class_='originalPrice').text.strip()
            
            # Get product image URL
            image_url = product.find('div', class_='productCardImgWrapper').img['src']
            
            products_data.append({'Name': name, 'Price': price, 'Image URL': image_url})
        
        df = pd.DataFrame(products_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Example usage:
bewakoof_url = "https://www.bewakoof.com/bestseller?sort=popular"
bewakoof_bestsellers_df = scrape_bewakoof_bestsellers(bewakoof_url)

# Display the DataFrame
print("Bewakoof Bestsellers:")
print(bewakoof_bestsellers_df)

Bewakoof Bestsellers:
Empty DataFrame
Columns: []
Index: []


In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_cnbc_headlines(url, num_headlines=5):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        headlines_data = []
        
        for headline in soup.find_all('li', class_='CardList-story'):
            heading = headline.find('div', class_='Card-headline').text.strip()
            
            date = headline.find('time', class_='Card-timestamp').text.strip()
            
            news_link = headline.find('a')['href']
            
            headlines_data.append({'Heading': heading, 'Date': date, 'News Link': news_link})
            
            if len(headlines_data) == num_headlines:
                break
        
        df = pd.DataFrame(headlines_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Example usage:
cnbc_url = "https://www.cnbc.com/world/?region=world"
cnbc_headlines_df = scrape_cnbc_headlines(cnbc_url, num_headlines=5)

# Display the DataFrame
print("CNBC World Headlines:")
print(cnbc_headlines_df)

CNBC World Headlines:
Empty DataFrame
Columns: []
Index: []


In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_most_downloaded_articles(url, num_articles=5):
    response = requests.get(url)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        articles_data = []
        
        # Iterate through the first 5 articles
        for article in soup.find_all('div', class_='article-info')[:num_articles]:
            # Get paper title
            title = article.find('h5', class_='card-title').text.strip()
            
            # Get date
            date = article.find('span', class_='meta-date').text.strip()
            
            # Get author
            author = article.find('span', class_='meta-author').text.strip()
            
            articles_data.append({'Paper Title': title, 'Date': date, 'Author': author})
        
        df = pd.DataFrame(articles_data)
        return df
    
    else:
        print(f"Failed to retrieve data. Status code: {response.status_code}")
        return None

# Example usage:
keaipublishing_url = "https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloaded-articles/"
most_downloaded_articles_df = scrape_most_downloaded_articles(keaipublishing_url, num_articles=5)

# Display the DataFrame
print("Most Downloaded Articles in Artificial Intelligence in Agriculture:")
print(most_downloaded_articles_df)

Most Downloaded Articles in Artificial Intelligence in Agriculture:
Empty DataFrame
Columns: []
Index: []
