From effd66f8f6706d658bebad34f349baaa016a6fd0 Mon Sep 17 00:00:00 2001 From: Charul00 Date: Thu, 3 Oct 2024 11:46:02 +0530 Subject: [PATCH 1/2] Web Scraper Script Added --- Web Scraper/README.md | 8 ++++++++ Web Scraper/Web_Scraper.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 Web Scraper/README.md create mode 100644 Web Scraper/Web_Scraper.py diff --git a/Web Scraper/README.md b/Web Scraper/README.md new file mode 100644 index 00000000..5c796460 --- /dev/null +++ b/Web Scraper/README.md @@ -0,0 +1,8 @@ +In this script, we use the `requests` library to send a GET request to the Python.org blogs page. We then use the `BeautifulSoup` library to parse the HTML content of the page. + +We find all the blog titles on the page by searching for `h2` elements with the class `blog-title`. We then print each title found and save them to a file named `blog_titles.txt`. + +To run this script, first install the required libraries: + +```bash +pip install requests beautifulsoup4 diff --git a/Web Scraper/Web_Scraper.py b/Web Scraper/Web_Scraper.py new file mode 100644 index 00000000..f1f0d62b --- /dev/null +++ b/Web Scraper/Web_Scraper.py @@ -0,0 +1,30 @@ +import requests +from bs4 import BeautifulSoup + +# URL to scrape data from +URL = "https://www.python.org/blogs/" + +# Send a GET request to the URL +response = requests.get(URL) + +# Parse the webpage content using BeautifulSoup +soup = BeautifulSoup(response.content, "html.parser") + +# Find all the blog titles on the page +titles = soup.find_all('h2', class_='blog-title') + +# Print each title found +print("Python.org Blog Titles:\n") +for i, title in enumerate(titles, start=1): + print(f"{i}. {title.get_text(strip=True)}") + +# Save the titles to a file +with open("blog_titles.txt", "w") as file: + for title in titles: + file.write(title.get_text(strip=True) + "\n") + +print("\nBlog titles saved to 'blog_titles.txt'.") + + + + \ No newline at end of file From d9ecdb12482b3cabf8ef3be5038cd143161b113a Mon Sep 17 00:00:00 2001 From: Charul00 Date: Thu, 3 Oct 2024 20:10:15 +0530 Subject: [PATCH 2/2] Update README.md to add Web Scraper script --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index b5090ad9..70b49df9 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,8 @@ More information on contributing and the general code of conduct for discussion | Weather GUI | [Weather GUI](https://github.com/DhanushNehru/Python-Scripts/tree/master/Weather%20GUI) | Displays information on the weather. | | Website Blocker | [Website Blocker](https://github.com/DhanushNehru/Python-Scripts/tree/master/Website%20Blocker) | Downloads the website and loads it on your homepage in your local IP. | | Website Cloner | [Website Cloner](https://github.com/DhanushNehru/Python-Scripts/tree/master/Website%20Cloner) | Clones any website and opens the site in your local IP. | +| Web Scraper | [Web Scraper](https://github.com/Charul00/Python-Scripts/tree/main/Web%20Scraper) | A Python script that scrapes blog titles from Python.org and saves them to a file. | + | Weight Converter | [Weight Converter](https://github.com/WatashiwaSid/Python-Scripts/tree/master/Weight%20Converter) | Simple GUI script to convert weight in different measurement units. | | Wikipedia Data Extractor | [Wikipedia Data Extractor](https://github.com/DhanushNehru/Python-Scripts/tree/master/Wikipedia%20Data%20Extractor) | A simple Wikipedia data extractor script to get output in your IDE. | | Word to PDF | [Word to PDF](https://github.com/DhanushNehru/Python-Scripts/tree/master/Word%20to%20PDF%20converter) | A Python script to convert an MS Word file to a PDF file. |