In [1]:
!pip install selenium



In [19]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import time
from datetime import datetime
import requests
import os
import pandas as pd
import re

# ChromeDriver Path
chrome_driver_path = r"C:\Users\Syndictech\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--headless")  # Keep this commented to see the browser in action

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# List of AI companies
ai_companies = ['OpenAI', 'Google DeepMind', 'NVIDIA', 'Microsoft', 'IBM', 'Amazon Web Services', 'Facebook AI Research']

# User input for company names
user_input = input(f"Available AI Companies: {ai_companies}\nEnter company names separated by commas (or type 'all' to search all): ")
if user_input.strip().lower() == 'all':
    companies_to_search = ai_companies
else:
    companies_to_search = [company.strip() for company in user_input.split(',') if company.strip() in ai_companies]

# User input for number of links to process
num_links_to_process = int(input("Enter the number of links to process: "))

scrape_linkedin = input("Do you want to scrape LinkedIn for AI company news? (yes/no): ").strip().lower() == 'yes'

if scrape_linkedin:
    linkedin_username = input("Enter your LinkedIn username: ")
    linkedin_password = input("Enter your LinkedIn password: ")
    
    def linkedin_login(username, password):
        linkedin_login_url = "https://www.linkedin.com/login"
        driver.get(linkedin_login_url)
        time.sleep(3)

        # Locate the username and password fields and enter the credentials
        username_input = driver.find_element(By.ID, "username")
        password_input = driver.find_element(By.ID, "password")
        
        username_input.send_keys(username)
        password_input.send_keys(password)
        
        # Click the login button
        login_button = driver.find_element(By.XPATH, "//button[@type='submit']")
        login_button.click()
        
        time.sleep(5)  

    linkedin_login(linkedin_username, linkedin_password)

def save_to_notepad(data, file_name="ai_news_results.txt"):
    with open(file_name, 'a', encoding='utf-8') as file:
        file.write(data + "\n")

def extract_article_info(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    title = soup.title.string if soup.title else 'No title found'
    paragraphs = soup.find_all('p')
    
    # Combine text content and filter short or irrelevant content
    text_content = "\n".join(p.get_text() for p in paragraphs if len(p.get_text()) > 50)
    
    if len(text_content.split()) < 300:  # Check if the content is substantial enough (300 words)
        return None, None
    
    return title, text_content

def extract_links_and_info(search_query):
    search_url = f"https://www.google.com/search?q={search_query.replace(' ', '+')}&tbm=nws"
    processed_count = 0
    unique_links = set()  # To keep track of processed links
    page_number = 0
    
    while processed_count < num_links_to_process:
        driver.get(search_url + f"&start={page_number * 10}")  # Go to the next page if necessary
        time.sleep(2)  

        # Extract the news links and process them
        soup = BeautifulSoup(driver.page_source, 'lxml')
        links = soup.find_all('a', href=True)

        for link in links:
            href = link['href']
            
            # Skip links that are not actual URLs (e.g., Google internal links)
            if href.startswith('/'):
                continue
            
            if 'url?q=' in href:  # Google search result links are often in the form "url?q=..."
                href = href.split('url?q=')[1].split('&')[0]

            # Skip links that are not from news domains or look like product pages
            if not any(domain in href for domain in ['news', 'blog', 'article']):
                continue

            # Ensure the link is valid and hasn't been processed
            if 'https://' in href and href not in unique_links:
                unique_links.add(href)

                try:
                    title, content = extract_article_info(href)

                    # If content is None, skip this link
                    if title is None or content is None:
                        continue

                    data_to_save = f"Link: {href}\nTitle: {title}\nText Content:\n{content}\n{'='*80}\n"
                    save_to_notepad(data_to_save)
                    print(f"Processed: {title}")
                    processed_count += 1

                    if processed_count >= num_links_to_process:
                        break
                except Exception as e:
                    print(f"Error processing link: {e}")

        if processed_count >= num_links_to_process:
            break

        page_number += 1  # Increment to the next page

def scrape_linkedin(company_name):
    linkedin_search_url = f"https://www.linkedin.com/search/results/content/?keywords={company_name.replace(' ', '%20')}&origin=GLOBAL_SEARCH_HEADER"
    driver.get(linkedin_search_url)
    time.sleep(3)

    # Extract LinkedIn posts
    soup = BeautifulSoup(driver.page_source, 'lxml')
    posts = soup.find_all('div', class_='occludable-update')

    linkedin_results = []
    for post in posts:
        try:
            post_content = post.get_text(separator="\n").strip()

            # Clean up the content
            post_content = re.sub(r'\s+', ' ', post_content)
            
            # Extract the post link
            post_link_tag = post.find('a', href=True)
            post_link = post_link_tag['href'] if post_link_tag else "No link found"

            if post_content:
                linkedin_results.append({
                    'Company': company_name,
                    'Content': post_content,
                    'Post Link': post_link
                })
        except Exception as e:
            print(f"Error extracting LinkedIn post: {e}")

    if linkedin_results:
        df = pd.DataFrame(linkedin_results)
        excel_file_name = "linkedin_results.xlsx"

        # Check if the file exists
        if os.path.exists(excel_file_name):
            # If it exists, append data
            with pd.ExcelWriter(excel_file_name, mode='a', if_sheet_exists='new') as writer:
                df.to_excel(writer, sheet_name=company_name, index=False)
        else:
            # If it doesn't exist, create a new file
            with pd.ExcelWriter(excel_file_name, mode='w') as writer:
                df.to_excel(writer, sheet_name=company_name, index=False)

        print(f"LinkedIn scraping complete for {company_name}. Results saved in '{excel_file_name}'.")
    else:
        print(f"No relevant LinkedIn posts found for {company_name}")



# Start the search and extraction process for each company
for company in companies_to_search:
    print(f"Searching for: {company}")
    
    if scrape_linkedin:
        scrape_linkedin(company)
    
    search_query = f"{company} news"
    save_to_notepad(f"Search Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\nQuery: {search_query}\n{'='*80}")
    extract_links_and_info(search_query)


# Close the browser
driver.quit()
print(f"Results saved in 'ai_news_results.txt' and 'linkedin_results.csv' if applicable.")


Available AI Companies: ['OpenAI', 'Google DeepMind', 'NVIDIA', 'Microsoft', 'IBM', 'Amazon Web Services', 'Facebook AI Research']
Enter company names separated by commas (or type 'all' to search all):  OpenAI
Enter the number of links to process:  3
Do you want to scrape LinkedIn for AI company news? (yes/no):  yes
Enter your LinkedIn username:  waleedetawi111@gmail.com
Enter your LinkedIn password:  W@leed3tawi#2004


Searching for: OpenAI
LinkedIn scraping complete for OpenAI. Results saved in 'linkedin_results.xlsx'.
Processed: Several Top News Sites Shun OpenAI's SearchGPT Search Engine - Business Insider
Processed: OpenAI Blocks Iranian Influence Operation Using ChatGPT for U.S. Election Propaganda
Processed: You wanted to try OpenAI's SearchGPT? It's time to look for AI alternatives | ZDNET
Results saved in 'ai_news_results.txt' and 'linkedin_results.csv' if applicable.


In [None]:
pip install sounddevice numpy scipy SpeechRecognition pydub


In [51]:
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav
import speech_recognition as sr
import os
import threading
from datetime import datetime

class AudioRecorder:
    def __init__(self, fs=44100, channels=1, dtype='int16'):
        self.fs = fs
        self.channels = channels
        self.dtype = dtype
        self.recording = False
        self.audio_data = []
        self.audio_thread = None

    def record(self):
        self.recording = True
        self.audio_data = []
        self.audio_thread = threading.Thread(target=self._record_audio)
        self.audio_thread.start()

    def _record_audio(self):
        with sd.InputStream(samplerate=self.fs, channels=self.channels, dtype=self.dtype, callback=self._audio_callback):
            while self.recording:
                sd.sleep(100)  # Sleep for 100 ms to keep recording

    def _audio_callback(self, indata, frames, time, status):
        if status:
            print(status, flush=True)
        if self.recording:
            self.audio_data.append(indata.copy())

    def stop(self):
        self.recording = False
        if self.audio_thread:
            self.audio_thread.join()

    def save_to_wav(self, filename):
        if self.audio_data:
            audio_data = np.concatenate(self.audio_data, axis=0)
            wav.write(filename, self.fs, audio_data)
        else:
            print("No audio data to save.")

def generate_unique_filename(prefix="recording", extension=".wav"):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    return f"{prefix}_{timestamp}{extension}"

def record_and_transcribe(duration=None, output_file="meeting_minutes.txt", language="ar-SA"):
    recognizer = sr.Recognizer()
    recorder = AudioRecorder()

    print("Recording... Press Enter to stop.")
    recorder.record()

    try:
        input("Press Enter to stop recording...")
        recorder.stop()

        audio_filename = generate_unique_filename(prefix="recording", extension=".wav")
        recorder.save_to_wav(audio_filename)

        print(f"Audio file saved as '{audio_filename}'")

        # Convert WAV file to audio file for speech_recognition
        with sr.AudioFile(audio_filename) as source:
            audio_data = recognizer.record(source)

        print("Transcribing...")
        try:
            text = recognizer.recognize_google(audio_data, language=language)
            save_to_file(text, output_file)
            print(f"Transcription completed. Results saved in '{output_file}'")
        except sr.UnknownValueError:
            print("Speech Recognition could not understand the audio")
        except sr.RequestError as e:
            print(f"Could not request results from Google Speech Recognition service; {e}")

    finally:
        print("Exiting...")

def save_to_file(text, file_name):
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(text)
    print(f"Minutes saved to '{file_name}'")

def main():
    record_and_transcribe(duration=None, output_file="meeting_minutes.txt", language="ar-SA")

if __name__ == "__main__":
    main()


Recording... Press Enter to stop.


Press Enter to stop recording... l


Audio file saved as 'recording_20240820_140210.wav'
Transcribing...
Minutes saved to 'meeting_minutes.txt'
Transcription completed. Results saved in 'meeting_minutes.txt'
Exiting...
