In [3]:
import requests
import json
import datetime
import openpyxl
from bs4 import BeautifulSoup

# List of search terms
search_terms = [
    "Algorithmic",
    "Fairness",
    "Bias",
    "Discrimination"
    "Ethics",
    "Responsible",
    "Ethics",
    "Fair"
]

# Only Stack Overflow site
sites = [
    "stackoverflow"
]

# Function to search StackExchange for a given keyword on a specific site
def search_stackexchange(keyword, site):
    url = "https://api.stackexchange.com/2.3/search/advanced"
    params = {
        'order': 'desc',
        'sort': 'relevance',
        'q': keyword,
        'site': site,
        'filter': 'withbody'
    }
    
    response = requests.get(url, params=params)
    if response.status_code == 200:
        try:
            return response.json()
        except json.JSONDecodeError:
            print(f"Error: Unable to decode JSON for keyword '{keyword}' on site '{site}'")
            return {}
    else:
        print(f"Error: Received status code {response.status_code} for keyword '{keyword}' on site '{site}'")
        return {}

# Function to filter results from 2017 onwards
def filter_by_date(items):
    filtered_items = []
    for item in items:
        creation_date = datetime.datetime.fromtimestamp(item['creation_date'])
        if creation_date.year >= 2017:
            filtered_items.append(item)
    return filtered_items

# Function to convert HTML to readable text
def html_to_text(html_content):
    soup = BeautifulSoup(html_content, "html.parser")
    return soup.get_text()

def main():
    results = []
    for term in search_terms:
        for site in sites:
            print(f"Searching for '{term}' on site '{site}'...")
            result = search_stackexchange(term, site)
            if result:
                filtered_items = filter_by_date(result.get('items', []))
                for item in filtered_items:
                    question_id = item['question_id']
                    question_url = item['link']
                    original_question = item['title']
                    full_question = html_to_text(item['body'])
                    creation_date = datetime.datetime.fromtimestamp(item['creation_date']).strftime('%Y-%m-%d %H:%M:%S')
                    number_of_views = item['view_count']
                    tags = ', '.join(item['tags'])
                    topic = site
                    is_answered = "Yes" if item['is_answered'] else "No"
                    
                    if 'answers' in item:
                        for answer in item['answers']:
                            result_entry = {
                                "Post ID": question_id,
                                "PostURL": question_url,
                                "Original Question": original_question,
                                "Full Question": full_question,
                                "Number of Answers": item['answer_count'],
                                "Answer": html_to_text(answer['body']),
                                "Date": creation_date,
                                "Number of votes (on answer)": answer['score'],
                                "Number of views": number_of_views,
                                "Tags": tags,
                                "Topic": topic,
                                "Answered (Yes/No)": is_answered
                            }
                            results.append(result_entry)
                            # Print the results in the console
                            print(f"Post ID: {result_entry['Post ID']}")
                            print(f"Post URL: {result_entry['PostURL']}")
                            print(f"Original Question: {result_entry['Original Question']}")
                            print(f"Full Question: {result_entry['Full Question']}")
                            print(f"Number of Answers: {result_entry['Number of Answers']}")
                            print(f"Answer: {result_entry['Answer']}")
                            print(f"Date: {result_entry['Date']}")
                            print(f"Number of votes (on answer): {result_entry['Number of votes (on answer)']}")
                            print(f"Number of views: {result_entry['Number of views']}")
                            print(f"Tags: {result_entry['Tags']}")
                            print(f"Topic: {result_entry['Topic']}")
                            print(f"Answered (Yes/No): {result_entry['Answered (Yes/No)']}")
                            print("\n" + "-"*80 + "\n")
                    else:
                        result_entry = {
                            "Post ID": question_id,
                            "PostURL": question_url,
                            "Original Question": original_question,
                            "Full Question": full_question,
                            "Number of Answers": item['answer_count'],
                            "Answer": "N/A",
                            "Date": creation_date,
                            "Number of votes (on answer)": "N/A",
                            "Number of views": number_of_views,
                            "Tags": tags,
                            "Topic": topic,
                            "Answered (Yes/No)": is_answered
                        }
                        results.append(result_entry)
                        # Print the results in the console
                        print(f"Post ID: {result_entry['Post ID']}")
                        print(f"Post URL: {result_entry['PostURL']}")
                        print(f"Original Question: {result_entry['Original Question']}")
                        print(f"Full Question: {result_entry['Full Question']}")
                        print(f"Number of Answers: {result_entry['Number of Answers']}")
                        print(f"Answer: {result_entry['Answer']}")
                        print(f"Date: {result_entry['Date']}")
                        print(f"Number of votes (on answer): {result_entry['Number of votes (on answer)']}")
                        print(f"Number of views: {result_entry['Number of views']}")
                        print(f"Tags: {result_entry['Tags']}")
                        print(f"Topic: {result_entry['Topic']}")
                        print(f"Answered (Yes/No): {result_entry['Answered (Yes/No)']}")
                        print("\n" + "-"*80 + "\n")
                print(f"Found {len(filtered_items)} results for '{term}' on site '{site}'\n")
            else:
                print(f"No results found for '{term}' on site '{site}'\n")

    # Save results to an Excel file
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "StackOverflow Search Results"
    headers = ["Post ID", "PostURL", "Original Question", "Full Question", "Number of Answers", "Answer", "Date", "Number of votes (on answer)", "Number of views", "Tags", "Topic", "Answered (Yes/No)"]
    ws.append(headers)
    
    for result in results:
        ws.append([
            result["Post ID"],
            result["PostURL"],
            result["Original Question"],
            result["Full Question"],
            result["Number of Answers"],
            result["Answer"],
            result["Date"],
            result["Number of votes (on answer)"],
            result["Number of views"],
            result["Tags"],
            result["Topic"],
            result["Answered (Yes/No)"]
        ])

    wb.save("stackoverflow_search_results.xlsx")
    print("Results have been saved to 'stackoverflow_search_results.xlsx'.")

if __name__ == "__main__":
    main()


Searching for 'Algorithmic' on site 'stackoverflow'...
Found 0 results for 'Algorithmic' on site 'stackoverflow'

Searching for 'Fairness' on site 'stackoverflow'...
Post ID: 64841211
Post URL: https://stackoverflow.com/questions/64841211/mvar-fairness-guarantees
Original Question: MVar fairness guarantees?
Full Question: I'm building a thread-safe shared state with MVar and due to requirements I need some fairness guarantees (If two threads asked a state under MVar one after the other then as soon as the state is available the threads will take it in the order they asked for it).
I didn't find any note in the MVar documentation.
So in case of fairness guarantees is it required to build some sort of a wrapper of ReentrantLock(true) fair lock?

Number of Answers: 1
Answer: N/A
Date: 2020-11-15 06:31:31
Number of votes (on answer): N/A
Number of views: 72
Tags: scala, concurrency, functional-programming, scala-cats
Topic: stackoverflow
Answered (Yes/No): Yes

----------------------------

Post ID: 69604403
Post URL: https://stackoverflow.com/questions/69604403/what-is-vertical-bias-or-horizontal-bias-used-for-in-androids-constraintla
Original Question: What is &#39;Vertical Bias&#39; or &#39;Horizontal Bias&#39; used for in Android&#39;s &#39;ConstraintLayout&#39;?
Full Question: I am quite new to Android development and today I wondered what the 'Vertical Bias' respectively the 'Horizontal Bias' is used for in the 'ConstraintLayout'.

Number of Answers: 1
Answer: N/A
Date: 2021-10-17 14:36:24
Number of votes (on answer): N/A
Number of views: 24139
Tags: android, android-constraintlayout
Topic: stackoverflow
Answered (Yes/No): Yes

--------------------------------------------------------------------------------

Post ID: 67948404
Post URL: https://stackoverflow.com/questions/67948404/pytorch-is-it-able-to-make-a-convolution-module-without-bias-have-bias-again
Original Question: Pytorch: Is it able to make a convolution module without bias have bias again?
Full Question:

Post ID: 66072158
Post URL: https://stackoverflow.com/questions/66072158/cleaning-data-in-a-column-using-sql
Original Question: Cleaning data in a column using SQL
Full Question: I have a column titled "Keywords" that has the keywords associated with each article that is in my dataset. I wrote a query to group the articles according to their keyword so I can make a simple visualization showing which keyword is used the most. The issue is, some of the articles have a secondary keywords, and I need to write a query to filter out those secondary keywords so that just the main one remains. For instance, the "Keywords" column looks like this:
KEYWORDS

Policy/Ethics
Policy/Ethics
Policy/Ethics: Employment
Policy/Ethics
Policy/Ethics: Business

I need help writing a query that would keep the main keywords (Policy/Ethics), but get rid of the secondary ones. I think I use CASE for this, but I'm not sure or where to begin. Any help I could get would be greatly appreciated!

Number of Answers: 1

Post ID: 66072158
Post URL: https://stackoverflow.com/questions/66072158/cleaning-data-in-a-column-using-sql
Original Question: Cleaning data in a column using SQL
Full Question: I have a column titled "Keywords" that has the keywords associated with each article that is in my dataset. I wrote a query to group the articles according to their keyword so I can make a simple visualization showing which keyword is used the most. The issue is, some of the articles have a secondary keywords, and I need to write a query to filter out those secondary keywords so that just the main one remains. For instance, the "Keywords" column looks like this:
KEYWORDS

Policy/Ethics
Policy/Ethics
Policy/Ethics: Employment
Policy/Ethics
Policy/Ethics: Business

I need help writing a query that would keep the main keywords (Policy/Ethics), but get rid of the secondary ones. I think I use CASE for this, but I'm not sure or where to begin. Any help I could get would be greatly appreciated!

Number of Answers: 1