**A function to exctract topics and its description from github**

---



In [10]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_github_topics(filter_keyword=None):
    # URL of the page to scrape
    url = "https://github.com/topics"
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to retrieve the page. Status code: {response.status_code}")
        return

    print(f"Status code: {response.status_code}")

    # Parsing the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extracting topic titles and descriptions
    topics = soup.find_all('p', class_='f3 lh-condensed mb-0 mt-1 Link--primary')
    descriptions = soup.find_all('p', class_='f5 color-fg-muted mb-0 mt-1')

    topic_titles = [topic.text.strip() for topic in topics]
    topic_descriptions = [desc.text.strip() for desc in descriptions]

    # Ensuring both lists have the same length
    min_length = min(len(topic_titles), len(topic_descriptions))
    topic_titles = topic_titles[:min_length]
    topic_descriptions = topic_descriptions[:min_length]

    # If filter_keyword is provided, filter the results
    if filter_keyword:
        filtered_data = [
            (title, description)
            for title, description in zip(topic_titles, topic_descriptions)
            if filter_keyword.lower() in title.lower() or filter_keyword.lower() in description.lower()
        ]
        topic_titles, topic_descriptions = zip(*filtered_data) if filtered_data else ([], [])

    # Verifying extraction
    print(f"Extracted {len(topic_titles)} topics:", topic_titles)
    print(f"Extracted {len(topic_descriptions)} descriptions:", topic_descriptions)

    # Creating a dictionary for the extracted data
    data = {
        "Title": topic_titles,
        "Description": topic_descriptions
    }

    # Converting the dictionary to a pandas DataFrame
    df = pd.DataFrame(data)

    # Saving to CSV
    output_file = "github_topics_filtered.csv" if filter_keyword else "github_topics.csv"
    df.to_csv(output_file, index=False)
    print(f"Data saved to {output_file}")

    print(df)


scrape_github_topics()


Status code: 200
Extracted 30 topics: ['3D', 'Ajax', 'Algorithm', 'Amp', 'Android', 'Angular', 'Ansible', 'API', 'Arduino', 'ASP.NET', 'Awesome Lists', 'Amazon Web Services', 'Azure', 'Babel', 'Bash', 'Bitcoin', 'Bootstrap', 'Bot', 'C', 'Chrome', 'Chrome extension', 'Command-line interface', 'Clojure', 'Code quality', 'Code review', 'Compiler', 'Continuous integration', 'C++', 'Cryptocurrency', 'Crystal']
Extracted 30 descriptions: ['3D refers to the use of three-dimensional graphics, modeling, and animation in various industries.', 'Ajax is a technique for creating interactive web applications.', 'Algorithms are self-contained sequences that carry out a variety of tasks.', 'Amp is a non-blocking concurrency library for PHP.', 'Android is an operating system built by Google designed for mobile devices.', 'Angular is an open source web application platform.', 'Ansible is a simple and powerful automation engine.', 'An API (Application Programming Interface) is a collection of protocols a

**Running a function for 2 filters 'Cryptocurrency' and '3D'**

---



In [9]:
scrape_github_topics('Cryptocurrency')
scrape_github_topics('3D')

Status code: 200
Extracted 2 topics: ('Bitcoin', 'Cryptocurrency')
Extracted 2 descriptions: ('Bitcoin is a cryptocurrency developed by Satoshi Nakamoto.', 'A cryptocurrency is a digital currency that uses cryptography.')
Data saved to github_topics_filtered.csv
            Title                                        Description
0         Bitcoin  Bitcoin is a cryptocurrency developed by Satos...
1  Cryptocurrency  A cryptocurrency is a digital currency that us...
Status code: 200
Extracted 1 topics: ('3D',)
Extracted 1 descriptions: ('3D refers to the use of three-dimensional graphics, modeling, and animation in various industries.',)
Data saved to github_topics_filtered.csv
  Title                                        Description
0    3D  3D refers to the use of three-dimensional grap...
