# 1)	Write a python program to display IMDB’s Top rated 100 Indian movies’ data https://www.imdb.com/list/ls056092300/ (i.e. name, rating, year ofrelease) and make data frame. 

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_imdb_top_100_indian_movies():
    # URL of the IMDb list with movie details
    url = "https://www.imdb.com/list/ls056092300/"

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all movie entries
        movie_entries = soup.find_all('div', class_='lister-item-content')

        # Initialize lists to store movie details
        names = []
        ratings = []
        years = []

        # Extract data for each movie
        for movie in movie_entries:
            # Movie name
            name = movie.h3.a.text.strip()
            names.append(name)

            # Movie rating
            rating = movie.find('span', class_='ipl-rating-star__rating').text.strip()
            ratings.append(rating)

            # Year of release
            year = movie.find('span', class_='lister-item-year').text.strip('()')
            years.append(year)

        # Create a DataFrame
        df = pd.DataFrame({'Name': names, 'Rating': ratings, 'Year of Release': years})

        return df
    else:
        print("Failed to fetch the webpage. Status code:", response.status_code)
        return None

# Call the function to scrape IMDb top 100 Indian movies' details
imdb_top_100_indian_movies_df = scrape_imdb_top_100_indian_movies()

# Print the DataFrame
print(imdb_top_100_indian_movies_df)


                                 Name Rating Year of Release
0                     Ship of Theseus      8            2012
1                              Iruvar    8.4            1997
2                     Kaagaz Ke Phool    7.8            1959
3   Lagaan: Once Upon a Time in India    8.1            2001
4                     Pather Panchali    8.2            1955
..                                ...    ...             ...
95                        Apur Sansar    8.4            1959
96                        Kanchivaram    8.2            2008
97                    Monsoon Wedding    7.3            2001
98                              Black    8.1            2005
99                            Deewaar      8            1975

[100 rows x 3 columns]


# 4)	Write a python program to scrape details of all the posts from https://www.patreon.com/coreyms .Scrape the heading, date, content and the likes for the video from the link for the youtube video from the post. 

In [5]:
import requests
from bs4 import BeautifulSoup

def scrape_patreon_posts():
    # URL of the webpage with posts
    url = "https://www.patreon.com/coreyms"

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all post entries
        post_entries = soup.find_all('div', class_='post-container')

        # Initialize lists to store post details
        headings = []
        dates = []
        contents = []
        likes = []

        # Extract data for each post
        for post in post_entries:
            # Heading
            heading = post.find('h2', class_='post-header__title').text.strip()
            headings.append(heading)

            # Date
            date = post.find('time', class_='date').text.strip()
            dates.append(date)

            # Content
            content = post.find('div', class_='post-content').text.strip()
            contents.append(content)

            # Likes (if available)
            like = post.find('span', class_='icon-like-filled')
            if like:
                likes.append(like.next_sibling.strip())
            else:
                likes.append('Not available')

        # Return the lists of post details
        return headings, dates, contents, likes
    else:
        print("Failed to fetch the webpage. Status code:", response.status_code)
        return None, None, None, None

# Call the function to scrape post details
headings, dates, contents, likes = scrape_patreon_posts()

# Print the details
for i in range(len(headings)):
    print("Heading:", headings[i])
    print("Date:", dates[i])
    print("Content:", contents[i])
    print("Likes:", likes[i])
    print()

# 5)	Write a python program to scrape house details from mentioned URL. It should include house title, location, area, EMI and price from https://www.nobroker.in/ .Enter three localities which are Indira Nagar, Jayanagar, Rajaji Nagar. 

In [8]:
import requests
from bs4 import BeautifulSoup

def scrape_house_details(locality):
    # URL of the webpage with house details
    url = f"https://www.nobroker.in/property/sale/{locality}/?searchParam=W3sibGF0IjoxOS4zNDU0NzY3LCJsb24iOjc3LjY0NjAwOTUsInBsYWNlSWQiOiJDaElKMmJFcHhzQkJPSlNSV3loemM0cENlOWwiLCJwbGFjZU5hbWUiOiJDaElKMmJFcHhzQkJPSlNSV3loemM0cENlOWwifV0=&radius=2.0"

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all house entries
        house_entries = soup.find_all('div', class_='card')

        # Initialize lists to store house details
        titles = []
        locations = []
        areas = []
        emis = []
        prices = []

        # Extract data for each house
        for house in house_entries:
            # Title
            title = house.find('h2', class_='heading-6 font-semi-bold nb__1AShY').text.strip()
            titles.append(title)

            # Location
            location = house.find('div', class_='nb__2CMjv').text.strip()
            locations.append(location)

            # Area
            area = house.find('div', class_='nb__3oNyC').text.strip()
            areas.append(area)

            # EMI
            emi = house.find('div', class_='font-semi-bold heading-6', text='₹').next_sibling.strip()
            emis.append(emi)

            # Price
            price = house.find('div', class_='font-semi-bold heading-6').text.strip()
            prices.append(price)

        # Return the lists of house details
        return titles, locations, areas, emis, prices
    else:
        print(f"Failed to fetch the webpage for {locality}. Status code:", response.status_code)
        return None, None, None, None, None

# List of localities
localities = ['indira-nagar', 'jayanagar', 'rajaji-nagar']

# Scrape house details for each locality
for locality in localities:
    print(f"Scraping house details for {locality.capitalize()}:")
    titles, locations, areas, emis, prices = scrape_house_details(locality)
    
    # Print the details
    for i in range(len(titles)):
        print("Title:", titles[i])
        print("Location:", locations[i])
        print("Area:", areas[i])
        print("EMI:", emis[i])
        print("Price:", prices[i])
        print()


Scraping house details for Indira-nagar:
Scraping house details for Jayanagar:
Scraping house details for Rajaji-nagar:


# 6)	Write a python program to scrape first 10 product details which include product name , price , Image URL from https://www.bewakoof.com/bestseller?sort=popular . 

In [10]:
import requests
from bs4 import BeautifulSoup

def scrape_product_details():
    # URL of the webpage with product details
    url = "https://www.bewakoof.com/bestseller?sort=popular"

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all product entries
        product_entries = soup.find_all('div', class_='productCardWrapper-2RwRzX')

        # Initialize lists to store product details
        product_names = []
        prices = []
        image_urls = []

        # Extract data for each product (up to first 10)
        for product in product_entries[:10]:
            # Product name
            name = product.find('p', class_='productCardName').text.strip()
            product_names.append(name)

            # Price
            price = product.find('p', class_='productCardPrice').text.strip()
            prices.append(price)

            # Image URL
            image_div = product.find('div', class_='productCardImageWrapper-2N53yh')
            image_url = image_div.find('img')['src']
            image_urls.append(image_url)

        # Return the lists of product details
        return product_names, prices, image_urls
    else:
        print("Failed to fetch the webpage. Status code:", response.status_code)
        return None, None, None

# Call the function to scrape product details
product_names, prices, image_urls = scrape_product_details()

# Print the details
for i in range(len(product_names)):
    print("Product Name:", product_names[i])
    print("Price:", prices[i])
    print("Image URL:", image_urls[i])
    print()

# 7)	Please visit https://www.cnbc.com/world/?region=world and scrap- a) headings b)	date c)	News link 


In [19]:
import requests
from bs4 import BeautifulSoup

def scrape_cnbc_news():
    # URL of the webpage with news details
    url = "https://www.cnbc.com/world/?region=world"

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all news entries
        news_entries = soup.find_all('div', class_='Card-titleContainer')

        # Initialize lists to store news details
        headings = []
        dates = []
        news_links = []

        # Extract data for each news entry
        for entry in news_entries:
            # Heading
            heading= entry.find('a', class_='Card-titleLink').text.strip()
            headings.append(heading)

            # Date
            date = entry.find('span', class_='Card-time').text.strip()
            dates.append(date)

            # News link
            news_link = "https://www.cnbc.com" + entry.find('a', class_='Card-titleLink')['href']
            news_links.append(news_link)

        # Return the lists of news details
        return headings, dates, news_links
    else:
        print("Failed to fetch the webpage. Status code:", response.status_code)
        return None, None, None

# Call the function to scrape news details
headings, dates, news_links = scrape_cnbc_news()

# Print the details
for i in range(len(headings)):
    print("Heading:", headings[i])
    print("Date:", dates[i])
    print("News Link:", news_links[i])
    print()


AttributeError: 'NoneType' object has no attribute 'text'

# 8)	Please visit https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloaded-     articles/ and scrap-          a) Paper title b)	date c)	Author 


In [21]:
import requests
from bs4 import BeautifulSoup

def scrape_most_downloaded_articles():
    # URL of the webpage with article details
    url = "https://www.keaipublishing.com/en/journals/artificial-intelligence-in-agriculture/most-downloaded-articles/"

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the webpage
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find all article entries
        article_entries = soup.find_all('div', class_='list-item-content')

        # Initialize lists to store article details
        paper_titles = []
        dates = []
        authors = []

        # Extract data for each article entry
        for entry in article_entries:
            # Paper title
            paper_title = entry.find('a', class_='title').text.strip()
            paper_titles.append(paper_title)

            # Date
            date = entry.find('span', class_='date').text.strip()
            dates.append(date)

            # Author
            author = entry.find('span', class_='author').text.strip()
            authors.append(author)

        # Return the lists of article details
        return paper_titles, dates, authors
    else:
        print("Failed to fetch the webpage. Status code:", response.status_code)
        return None, None, None

# Call the function to scrape article details
paper_titles, dates, authors = scrape_most_downloaded_articles()

# Print the details
for i in range(len(paper_titles)):
    print("Paper Title:", paper_titles[i])
    print("Date:", dates[i])
    print("Author:", authors[i])
    print()
