In [1]:
# import libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from tqdm import tqdm
import mysql.connector

def create_twitter_profiles_table(cursor):
    # Drop table if it exists
    cursor.execute("DROP TABLE IF EXISTS twitter_profiles")

    # Create new table
    create_table_query = '''  
    CREATE TABLE twitter_profiles (
    id INT AUTO_INCREMENT PRIMARY KEY,
    profile VARCHAR(255),
    bio TEXT,
    following_count VARCHAR(255),
    followers_count VARCHAR(255),
    location VARCHAR(255),
    website VARCHAR(255)
)
    '''
    cursor.execute(create_table_query)

def scrape_twitter_profiles(profiles):
    # Set up the Chrome WebDriver
    driver = webdriver.Chrome()
    driver.maximize_window()

    # Connect to MySQL database
    connection = mysql.connector.connect(
        host='localhost',
        user='root',
        password='password',
        database='twitterprofiles',
        charset='utf8mb4'
    )

    try:
        # Create a MySQL cursor
        with connection.cursor() as cursor:
            # Create the 'twitter_profiles' table (drop and create if it exists)
            create_twitter_profiles_table(cursor)

            # Iterate through the Twitter profiles
            for profile in tqdm(profiles):
                # Visit the Twitter profile
                driver.get(f'https://twitter.com/{profile}')

                # Use WebDriverWait to wait for elements to be present before interacting with them
                try:
                    WebDriverWait(driver, 20).until(
                        EC.presence_of_element_located((By.XPATH, '//div[@data-testid="UserDescription"]'))
                    )
                except TimeoutException:
                    print(f"Timed out waiting for {profile} page to load.")
                    continue

                # Extract profile information with WebDriverWait
                try:
                    bio = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '//div[@data-testid="UserDescription"]'))
                    ).text
                except TimeoutException:
                    bio = 'Not available'

                try:
                    following_count = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '(//a[@role="link"]/span/span)[1]'))
                    ).text
                except TimeoutException:
                    following_count = '0'

                try:
                    followers_count = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '(//a[@role="link"]/span/span)[3]'))
                    ).text
                except TimeoutException:
                    followers_count = '0'

                try:
                    location = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '//span[@data-testid="UserLocation"]/span/span'))
                    ).text
                except TimeoutException:
                    location = 'Not available'

                try:
                    website = WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.XPATH, '//a[@data-testid="UserUrl"]/span'))
                    ).text
                except TimeoutException:
                    website = 'Not available'

                # Insert data into MySQL
                sql = "INSERT INTO twitter_profiles (profile, bio, following_count, followers_count, location, website) VALUES (%s, %s, %s, %s, %s, %s)"
                cursor.execute(sql, (profile, bio, following_count, followers_count, location, website))
                connection.commit()

        print("Data is stored in MySQL database.")

    except Exception as e:
        print(f"An error occurred: {e}")

    finally:
        # Close the MySQL connection
        connection.close()

        # Close the browser
        driver.quit()

# List of Twitter profiles to scrape
twitter_profiles = ['GTNUK1', 'whatsapp', 'aacb_CBPTrade', 'aacbdotcom', 'AAWindowPRODUCT',
                    'aandb_kia', 'ABHomeInc', 'Abrepro', 'endangeredprani', 'ACChristofiLtd',
                    'aeclothing1', 'mufaddal_vohra', 'AETechnologies1', 'wix', 'AGInsuranceLLC']

# Call the function to scrape profiles
scrape_twitter_profiles(twitter_profiles)


 87%|███████████████████████████████████████████████████████████████████████           | 13/15 [01:35<00:22, 11.36s/it]

Timed out waiting for AETechnologies1 page to load.


100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [01:41<00:00,  6.73s/it]


Data is stored in MySQL database.
