In [25]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
from datetime import datetime

# ChromeDriver Path
chrome_driver_path = r"C:\Users\Syndictech\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--headless")  # Uncomment to run headless

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open Gmail
driver.get('https://mail.google.com')

# Wait for the login page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'identifierId')))

# Login
driver.find_element(By.ID, 'identifierId').send_keys('waleed.uyi.bot@gmail.com')
driver.find_element(By.ID, 'identifierNext').click()

# Wait for the password page to load and enter the password
try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'Passwd')))
    password_input = driver.find_element(By.NAME, 'Passwd')
    password_input.send_keys('UYI@waleedBOT')
    driver.find_element(By.ID, 'passwordNext').click()
except TimeoutException:
    print("Password input field was not found. Please check the locator or wait time.")
    driver.quit()
    exit()

# Wait for Gmail to load
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[role="main"]')))

# Function to navigate to a label
def go_to_label(label_name):
    try:
        label_xpath = f'//a[contains(text(), "{label_name}")]'
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, label_xpath))).click()
        time.sleep(3)  # Adjust as necessary
    except TimeoutException:
        print(f"Error: Label '{label_name}' not found or took too long to load.")
    except Exception as e:
        print(f"Error navigating to label '{label_name}': {e}")

# Function to parse the date
def parse_email_date(date_str):
    try:
        date_str = date_str.replace('\u202f', ' ')
        date_obj = datetime.strptime(date_str, '%a, %b %d, %Y, %I:%M %p')
        return date_obj.strftime('%Y-%m-%d %H:%M:%S')  # Convert to a more standard format
    except ValueError as e:
        print(f"Error parsing date '{date_str}': {e}")
        return date_str  # Return the original string if parsing fails

# Updated function to extract all emails from the current label
def extract_all_emails():
    emails = []
    try:
        email_elements = driver.find_elements(By.CLASS_NAME, 'zA')

        if not email_elements:
            print("No emails found in this label.")
            return emails

        for email in email_elements:
            try:
                subject = email.find_element(By.CLASS_NAME, 'bog').text
                sender = email.find_element(By.CSS_SELECTOR, '.yX .zF, .yX .yP').get_attribute('email')
                snippet = email.find_element(By.CLASS_NAME, 'y2').text
                date_element = email.find_element(By.CSS_SELECTOR, '.xW.xY span')
                date = date_element.get_attribute('title')

                # Parsing the date string to a proper datetime object
                try:
                    email_date = datetime.strptime(date, '%b %d, %Y, %I:%M %p')
                except ValueError:
                    try:
                        email_date = datetime.strptime(date, '%b %d, %Y')
                    except ValueError:
                        email_date = date  # In case the format doesn't match expected patterns

                # Drop record if both subject and snippet are empty
                if subject.strip() == "" and snippet.strip() == "":
                    continue

                emails.append([subject, sender, snippet, email_date])
            except NoSuchElementException as e:
                print(f"Error extracting email details: {e}")
                continue

    except Exception as e:
        print(f"Error while extracting emails: {e}")

    return emails

# Function to retrieve emails from a specific label or all emails
def get_emails(label_name=None, label_names=None):
    all_emails = []

    if label_name and label_name.lower() != "all":
        # If a specific label is requested
        df_label = get_emails_from_label(label_name)
        if df_label is not None:
            df_label['Label'] = label_name
            return df_label

    elif label_names or (label_name and label_name.lower() == "all"):
        # If all labels are requested
        for label in label_names:
            df_label = get_emails_from_label(label)
            if df_label is not None:
                df_label['Label'] = label
                all_emails.append(df_label)

        if all_emails:
            return pd.concat(all_emails, ignore_index=True)
        else:
            print("No emails found in the selected labels.")
            return pd.DataFrame(columns=['Subject', 'Sender', 'Snippet', 'Date', 'Label'])

# Function to retrieve emails from a specific label
def get_emails_from_label(label_name):
    go_to_label(label_name)
    emails = extract_all_emails()
    if emails:
        return pd.DataFrame(emails, columns=['Subject', 'Sender', 'Snippet', 'Date'])
    else:
        return None
        
def move_email_to_label(driver, label_name):
    try:
        # Wait for the email checkbox to be clickable and select it
        email_checkbox = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.oZ-jc'))
        )
        
        # Click the checkbox to select the email
        is_checked = email_checkbox.get_attribute('aria-checked')
        if is_checked == 'false':
            email_checkbox.click()
            WebDriverWait(driver, 5).until(
                lambda d: email_checkbox.get_attribute('aria-checked') == 'true'
            )
        
        # Ensure the "Move to" button is visible and clickable
        move_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.T-I.J-J5-Ji.ns.T-I-ax7.T-I-Js-Gs.mA'))
        )
        move_button.click()

        # Wait for the label menu to appear
        label_menu = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.J-M.jQjAxd[role="menu"]'))
        )
        
        # Ensure the label element is visible and clickable
        label_xpath = f'//div[@class="J-N-Jz" and text()="{label_name}"]'
        label_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, label_xpath))
        )
        label_element.click()
        
        print(f"Successfully moved email to label '{label_name}'")
        
    except Exception as e:
        print(f"Error while moving email to label '{label_name}': {e}")




def categorize_and_move_unread_emails():
    try:
        go_to_label('Inbox')  # Go to the inbox to retrieve unread emails
        email_elements = driver.find_elements(By.CSS_SELECTOR, 'tr.zA.zE')  # Unread emails have the class 'zE'

        for email in email_elements:
            try:
                # Extract the sender's name from the email details
                email_details_element = email.find_element(By.CSS_SELECTOR, 'span.y2')
                email_details = email_details_element.text
                
                # Find the sender's name in the email details
                if "From:" in email_details:
                    sender_name = email_details.split("From:")[1].split(" <")[0].strip()
                else:
                    sender_name = "Unknown"

                print(f"Sender: {sender_name}")  # Debug: Print sender name

                # Click the checkbox to select the email
                checkbox = email.find_element(By.CSS_SELECTOR, 'div.oZ-jc')
                if checkbox.get_attribute('aria-checked') == 'false':
                    checkbox.click()

                # Determine the label based on the sender's name
                label_name = 'Careem' if 'Careem' in sender_name else 'Other'
                print(f"Labeling as: {label_name}")  # Debug: Print label name

                # Move the email to the determined label
                move_email_to_label(driver, label_name)
            except Exception as e:
                print(f"Error processing email: {e}")

    except Exception as e:
        print(f"Error categorizing and moving unread emails: {e}")


# Example usage:
categorize_and_move_unread_emails()

# Prompt user for the label name
label_names = ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools','Other']

user_label_input = input(f"Labels available: {label_names}\nEnter the label you want to retrieve emails from (type 'all' to retrieve from all labels): ")

# Retrieve emails based on user input
df_emails = get_emails(label_name=user_label_input, label_names=label_names)

# Save the retrieved emails to a CSV file
if df_emails is not None and not df_emails.empty:
    if user_label_input.lower() == 'all':
        df_emails.to_csv('all_emails_new.csv', index=False)
    else:
        df_emails.to_csv(f'{user_label_input}_emails.csv', index=False)
    print(f"Emails saved to {user_label_input.lower()}_emails.csv")
else:
    print("No emails were retrieved, so no CSV file was created.")

# Close the driver
driver.quit()


Sender: Unknown
Labeling as: Other
Successfully moved email to label 'Other'


Labels available: ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools', 'Other']
Enter the label you want to retrieve emails from (type 'all' to retrieve from all labels):  Other


Emails saved to other_emails.csv


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
from datetime import datetime

# ChromeDriver Path
chrome_driver_path = r"C:\Users\Syndictech\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--headless")  # Uncomment to run headless

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open Gmail
driver.get('https://mail.google.com')

# Wait for the login page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'identifierId')))

# Login
driver.find_element(By.ID, 'identifierId').send_keys('waleed.uyi.bot@gmail.com')
driver.find_element(By.ID, 'identifierNext').click()

# Wait for the password page to load and enter the password
try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'Passwd')))
    password_input = driver.find_element(By.NAME, 'Passwd')
    password_input.send_keys('UYI@waleedBOT')
    driver.find_element(By.ID, 'passwordNext').click()
except TimeoutException:
    print("Password input field was not found. Please check the locator or wait time.")
    driver.quit()
    exit()

# Wait for Gmail to load
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[role="main"]')))

# Function to navigate to a label
def go_to_label(label_name):
    try:
        label_xpath = f'//a[contains(text(), "{label_name}")]'
        WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, label_xpath))).click()
        time.sleep(3)  # Adjust as necessary
    except TimeoutException:
        print(f"Error: Label '{label_name}' not found or took too long to load.")
    except Exception as e:
        print(f"Error navigating to label '{label_name}': {e}")

# Function to parse the date
def parse_email_date(date_str):
    try:
        date_str = date_str.replace('\u202f', ' ')
        date_obj = datetime.strptime(date_str, '%a, %b %d, %Y, %I:%M %p')
        return date_obj.strftime('%Y-%m-%d %H:%M:%S')  # Convert to a more standard format
    except ValueError as e:
        print(f"Error parsing date '{date_str}': {e}")
        return date_str  # Return the original string if parsing fails
from PIL import Image
from io import BytesIO
import openpyxl
from openpyxl.drawing.image import Image as ExcelImage
import os

def extract_all_emails():
    emails = []
    screenshot_folder = "email_screenshots"
    os.makedirs(screenshot_folder, exist_ok=True)

    try:
        email_elements = driver.find_elements(By.CLASS_NAME, 'zA')

        if not email_elements:
            print("No emails found in this label.")
            return emails

        for idx, email in enumerate(email_elements):
            try:
                subject = email.find_element(By.CLASS_NAME, 'bog').text
                sender = email.find_element(By.CSS_SELECTOR, '.yX .zF, .yX .yP').get_attribute('email')
                snippet = email.find_element(By.CLASS_NAME, 'y2').text
                date_element = email.find_element(By.CSS_SELECTOR, '.xW.xY span')
                date = date_element.get_attribute('title')

                # Click the email to view the full content
                email.click()

                # Take a screenshot of the email body
                screenshot_filename = os.path.join(screenshot_folder, f'email_{idx + 1}.png')
                driver.save_screenshot(screenshot_filename)

                # Drop record if both subject and snippet are empty
                if subject.strip() == "" and snippet.strip() == "":
                    continue

                emails.append([subject, sender, snippet, date, screenshot_filename])

                # Return to the label after extracting the email content
                driver.back()
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'zA')))
                
            except NoSuchElementException as e:
                print(f"Error extracting email details: {e}")
                continue

    except Exception as e:
        print(f"Error while extracting emails: {e}")

    return emails



# Function to retrieve emails from a specific label or all emails
def get_emails(label_name=None, label_names=None):
    all_emails = []

    if label_name and label_name.lower() != "all":
        # If a specific label is requested
        df_label = get_emails_from_label(label_name)
        if df_label is not None:
            df_label['Label'] = label_name
            return df_label

    elif label_names or (label_name and label_name.lower() == "all"):
        # If all labels are requested
        for label in label_names:
            df_label = get_emails_from_label(label)
            if df_label is not None:
                df_label['Label'] = label
                all_emails.append(df_label)

        if all_emails:
            return pd.concat(all_emails, ignore_index=True)
        else:
            print("No emails found in the selected labels.")
            return pd.DataFrame(columns=['Subject', 'Sender', 'Snippet', 'Date', 'Label', 'Context'])

# Function to retrieve emails from a specific label
def get_emails_from_label(label_name):
    go_to_label(label_name)
    emails = extract_all_emails()
    if emails:
        return pd.DataFrame(emails, columns=['Subject', 'Sender', 'Snippet', 'Date', 'Context'])
    else:
        return None
        
def move_email_to_label(driver, label_name):
    try:
        # Wait for the email checkbox to be clickable and select it
        email_checkbox = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.oZ-jc'))
        )
        
        # Click the checkbox to select the email
        is_checked = email_checkbox.get_attribute('aria-checked')
        if is_checked == 'false':
            email_checkbox.click()
            WebDriverWait(driver, 5).until(
                lambda d: email_checkbox.get_attribute('aria-checked') == 'true'
            )
        
        # Ensure the "Move to" button is visible and clickable
        move_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.T-I.J-J5-Ji.ns.T-I-ax7.T-I-Js-Gs.mA'))
        )
        move_button.click()

        # Wait for the label menu to appear
        label_menu = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.J-M.jQjAxd[role="menu"]'))
        )
        
        # Ensure the label element is visible and clickable
        label_xpath = f'//div[@class="J-N-Jz" and text()="{label_name}"]'
        label_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, label_xpath))
        )
        label_element.click()
        
        print(f"Successfully moved email to label '{label_name}'")
        
    except Exception as e:
        print(f"Error while moving email to label '{label_name}': {e}")

def categorize_and_move_unread_emails():
    try:
        go_to_label('Inbox')  # Go to the inbox to retrieve unread emails
        email_elements = driver.find_elements(By.CSS_SELECTOR, 'tr.zA.zE')  # Unread emails have the class 'zE'

        for email in email_elements:
            try:
                # Extract the sender's name from the email details
                email_details_element = email.find_element(By.CSS_SELECTOR, 'span.y2')
                email_details = email_details_element.text
                
                # Find the sender's name in the email details
                if "From:" in email_details:
                    sender_name = email_details.split("From:")[1].split(" <")[0].strip()
                else:
                    sender_name = "Unknown"

                print(f"Sender: {sender_name}")  # Debug: Print sender name

                # Click the checkbox to select the email
                checkbox = email.find_element(By.CSS_SELECTOR, 'div.oZ-jc')
                if checkbox.get_attribute('aria-checked') == 'false':
                    checkbox.click()

                # Determine the label based on the sender's name
                label_name = 'Careem' if 'Careem' in sender_name else 'Other'
                print(f"Labeling as: {label_name}")  # Debug: Print label name

                # Move the email to the corresponding label
                move_email_to_label(driver, label_name)

            except Exception as e:
                print(f"Error processing email: {e}")
                continue

    except Exception as e:
        print(f"Error while categorizing and moving emails: {e}")

# Example usage:
categorize_and_move_unread_emails()

# Prompt user for the label name
label_names = ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools','Other']

user_label_input = input(f"Labels available: {label_names}\nEnter the label you want to retrieve emails from (type 'all' to retrieve from all labels): ")

# Retrieve emails based on user input
df_emails = get_emails(label_name=user_label_input, label_names=label_names)




import openpyxl
from openpyxl.drawing.image import Image as ExcelImage
def save_emails_to_excel(df_emails, output_filename):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "Emails"

    # Write the header
    for col_num, column_title in enumerate(df_emails.columns[:-1], 1):  # Skip the 'Context' column
        ws.cell(row=1, column=col_num, value=column_title)

    # Write the email data and add images
    for idx, row in df_emails.iterrows():
        for col_num, value in enumerate(row[:-1], 1):  # Skip the 'Context' column
            ws.cell(row=idx + 2, column=col_num, value=value)

        # Insert the image into the 'Context' column
        screenshot_filename = row['Context']
        if screenshot_filename and os.path.exists(screenshot_filename):
            img = ExcelImage(screenshot_filename)
            img.anchor = f'F{idx + 2}'  # Adjust to place image in the correct cell
            ws.add_image(img)

    # Adjust column widths
    for col in ws.columns:
        max_length = 0
        column = col[0].column_letter  # Get the column name
        for cell in col:
            try:
                if len(str(cell.value)) > max_length:
                    max_length = len(cell.value)
            except:
                pass
        adjusted_width = (max_length + 2)
        ws.column_dimensions[column].width = adjusted_width

    # Save the workbook
    wb.save(output_filename)

# Example usage:
output_filename = f'{user_label_input}_emails.xlsx' if user_label_input.lower() != 'all' else 'all_emails_new.xlsx'
if df_emails is not None and not df_emails.empty:
    save_emails_to_excel(df_emails, output_filename)
    print(f"Emails saved to {output_filename}")
else:
    print("No emails were retrieved, so no Excel file was created.")

# Close the driver
driver.quit()
# works but half of the ss


Sender: Unknown
Labeling as: Other
Successfully moved email to label 'Other'
Error processing email: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=128.0.6613.120); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7ABC09632+30946]
	(No symbol) [0x00007FF7ABBBE3C9]
	(No symbol) [0x00007FF7ABAB6FDA]
	(No symbol) [0x00007FF7ABABDA57]
	(No symbol) [0x00007FF7ABABFD31]
	(No symbol) [0x00007FF7ABABFDD0]
	(No symbol) [0x00007FF7ABB07F69]
	(No symbol) [0x00007FF7ABB0850C]
	(No symbol) [0x00007FF7ABAFBCDC]
	(No symbol) [0x00007FF7ABB2CAAF]
	(No symbol) [0x00007FF7ABAFBBB6]
	(No symbol) [0x00007FF7ABB2CC80]
	(No symbol) [0x00007FF7ABB4B041]
	(No symbol) [0x00007FF7ABB2C813]
	(No symbol) [0x00007FF7ABAFA6E5]
	(No symbol) [0x00007FF7ABAFB021]
	GetHandleVerifier [0x00007FF7ABD3F83D+1301229]
	GetHandle

Labels available: ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools', 'Other']
Enter the label you want to retrieve emails from (type 'all' to retrieve from all labels):  Research


Error while extracting emails: Message: element not interactable
  (Session info: chrome=128.0.6613.120)
Stacktrace:
	GetHandleVerifier [0x00007FF7ABC09632+30946]
	(No symbol) [0x00007FF7ABBBE3C9]
	(No symbol) [0x00007FF7ABAB6E09]
	(No symbol) [0x00007FF7ABB09712]
	(No symbol) [0x00007FF7ABAFC271]
	(No symbol) [0x00007FF7ABB2CA6A]
	(No symbol) [0x00007FF7ABAFBBB6]
	(No symbol) [0x00007FF7ABB2CC80]
	(No symbol) [0x00007FF7ABB4B041]
	(No symbol) [0x00007FF7ABB2C813]
	(No symbol) [0x00007FF7ABAFA6E5]
	(No symbol) [0x00007FF7ABAFB021]
	GetHandleVerifier [0x00007FF7ABD3F83D+1301229]
	GetHandleVerifier [0x00007FF7ABD4BDB7+1351783]
	GetHandleVerifier [0x00007FF7ABD42A03+1313971]
	GetHandleVerifier [0x00007FF7ABC3DD06+245686]
	(No symbol) [0x00007FF7ABBC758F]
	(No symbol) [0x00007FF7ABBC3804]
	(No symbol) [0x00007FF7ABBC3992]
	(No symbol) [0x00007FF7ABBBA3EF]
	BaseThreadInitThunk [0x00007FFE8E5D257D+29]
	RtlUserThreadStart [0x00007FFE9038AF28+40]

No emails were retrieved, so no Excel file was

In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
from datetime import datetime
import requests

# ChromeDriver Path
chrome_driver_path = r"C:\Users\Syndictech\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--headless")  # Uncomment to run headless

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open Gmail
driver.get('https://mail.google.com')

# Wait for the login page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'identifierId')))

# Login
driver.find_element(By.ID, 'identifierId').send_keys('waleed.uyi.bot@gmail.com')
driver.find_element(By.ID, 'identifierNext').click()

# Wait for the password page to load and enter the password
try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'Passwd')))
    password_input = driver.find_element(By.NAME, 'Passwd')
    password_input.send_keys('UYI@waleedBOT')
    driver.find_element(By.ID, 'passwordNext').click()
except TimeoutException:
    print("Password input field was not found. Please check the locator or wait time.")
    driver.quit()
    exit()

# Wait for Gmail to load
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[role="main"]')))

def click_print_button_and_save():
    try:
        # Wait for the print button to be clickable and then click it
        print_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'span.pYTkkf-JX-ank-Rtc0Jf span.bzc-ank'))
        )
        print_button.click()
        print("Print button clicked successfully.")
        
        # Wait for the Save button in the print dialog and click it
        save_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'cr-button.action-button'))
        )
        save_button.click()
        print("Save button clicked successfully.")
        
        # Wait for the file dialog to open (this step might require additional handling depending on OS)
        # Here, we're assuming the download is handled automatically by the browser settings.

        time.sleep(5)  # Adjust this sleep time depending on the time it takes for the file to download
        print("File should be saved.")
        
    except TimeoutException:
        print("Error: Print or Save button not found or took too long to load.")
    except NoSuchElementException:
        print("Error: Print or Save button not found in the DOM.")
    except Exception as e:
        print(f"Error clicking print or save button: {e}")


import requests

def convert_pdf_to_text(pdf_path):
    url = "https://api.pdfrest.com/word"
    
    # Prepare the payload
    files = {
        'file': open(pdf_path, 'rb')
    }

    headers = {
        'Api-Key': '34bf43a6-1bce-4dfe-878f-b785b3e0bc92'
    }

    try:
        # Make the request to the API
        response = requests.post(url, headers=headers, files=files)
        response.raise_for_status()  # Raise an error for bad responses
        
        # Assuming the API returns the text directly or within a JSON structure
        response_data = response.json()  # Adjust if the response is not JSON

        # Extract text from the response (depends on API response structure)
        if 'text' in response_data:
            return response_data['text']
        else:
            print("Text extraction failed: No 'text' field in the API response.")
            return None
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"An error occurred: {err}")
    
    return None


def extract_all_emails():
    emails = []
    try:
        email_elements = driver.find_elements(By.CLASS_NAME, 'zA')

        if not email_elements:
            print("No emails found in this label.")
            return emails

        for email in email_elements:
            try:
                subject = email.find_element(By.CLASS_NAME, 'bog').text
                sender = email.find_element(By.CSS_SELECTOR, '.yX .zF, .yX .yP').get_attribute('email')
                snippet = email.find_element(By.CLASS_NAME, 'y2').text
                date_element = email.find_element(By.CSS_SELECTOR, '.xW.xY span')
                date = date_element.get_attribute('title')

                # Click on the email to open it
                email.click()
                time.sleep(2)  # Adjust sleep as necessary

                # Click the print button and then save the email as PDF
                click_print_button_and_save()

                # Optional: Handle the downloaded PDF file path
                pdf_path = f"C:/path/to/downloads/{subject}.pdf"  # Adjust the path accordingly
                
                # Convert the PDF to text using your API
                pdf_text = convert_pdf_to_text(pdf_path) if pdf_path else "Failed to convert PDF"
                
                # Parse the date string to a proper datetime object
                try:
                    email_date = datetime.strptime(date, '%b %d, %Y, %I:%M %p')
                except ValueError:
                    try:
                        email_date = datetime.strptime(date, '%b %d, %Y')
                    except ValueError:
                        email_date = date  # In case the format doesn't match expected patterns

                emails.append([subject, sender, snippet, email_date, pdf_text])
            except NoSuchElementException as e:
                print(f"Error extracting email details: {e}")
                continue

    except Exception as e:
        print(f"Error while extracting emails: {e}")

    return emails

# Function to retrieve emails from a specific label or all emails
def get_emails(label_name=None, label_names=None):
    all_emails = []

    if label_name and label_name.lower() != "all":
        # If a specific label is requested
        df_label = get_emails_from_label(label_name)
        if df_label is not None:
            df_label['Label'] = label_name
            return df_label

    elif label_names or (label_name and label_name.lower() == "all"):
        # If all labels are requested
        for label in label_names:
            df_label = get_emails_from_label(label)
            if df_label is not None:
                df_label['Label'] = label
                all_emails.append(df_label)

        if all_emails:
            return pd.concat(all_emails, ignore_index=True)
        else:
            print("No emails found in the selected labels.")
            return pd.DataFrame(columns=['Subject', 'Sender', 'Snippet', 'Date', 'Label', 'Body'])

# Function to retrieve emails from a specific label
def get_emails_from_label(label_name):
    go_to_label(label_name)
    emails = extract_all_emails()
    if emails:
        return pd.DataFrame(emails, columns=['Subject', 'Sender', 'Snippet', 'Date', 'Body'])
    else:
        return None

       
def move_email_to_label(driver, label_name):
    try:
        # Wait for the email checkbox to be clickable and select it
        email_checkbox = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.oZ-jc'))
        )
        
        # Click the checkbox to select the email
        is_checked = email_checkbox.get_attribute('aria-checked')
        if is_checked == 'false':
            email_checkbox.click()
            WebDriverWait(driver, 5).until(
                lambda d: email_checkbox.get_attribute('aria-checked') == 'true'
            )
        
        # Ensure the "Move to" button is visible and clickable
        move_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.T-I.J-J5-Ji.ns.T-I-ax7.T-I-Js-Gs.mA'))
        )
        move_button.click()

        # Wait for the label menu to appear
        label_menu = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.J-M.jQjAxd[role="menu"]'))
        )
        
        # Ensure the label element is visible and clickable
        label_xpath = f'//div[@class="J-N-Jz" and text()="{label_name}"]'
        label_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, label_xpath))
        )
        label_element.click()
        
        print(f"Successfully moved email to label '{label_name}'")
        
    except Exception as e:
        print(f"Error while moving email to label '{label_name}': {e}")




def categorize_and_move_unread_emails():
    try:
        go_to_label('Inbox')  # Go to the inbox to retrieve unread emails
        email_elements = driver.find_elements(By.CSS_SELECTOR, 'tr.zA.zE')  # Unread emails have the class 'zE'

        for email in email_elements:
            try:
                # Extract the sender's name from the email details
                email_details_element = email.find_element(By.CSS_SELECTOR, 'span.y2')
                email_details = email_details_element.text
                
                # Find the sender's name in the email details
                if "From:" in email_details:
                    sender_name = email_details.split("From:")[1].split(" <")[0].strip()
                else:
                    sender_name = "Unknown"

                print(f"Sender: {sender_name}")  # Debug: Print sender name

                # Click the checkbox to select the email
                checkbox = email.find_element(By.CSS_SELECTOR, 'div.oZ-jc')
                if checkbox.get_attribute('aria-checked') == 'false':
                    checkbox.click()

                # Determine the label based on the sender's name
                label_name = 'Careem' if 'Careem' in sender_name else 'Other'
                print(f"Labeling as: {label_name}")  # Debug: Print label name

                # Move the email to the determined label
                move_email_to_label(driver, label_name)
            except Exception as e:
                print(f"Error processing email: {e}")

    except Exception as e:
        print(f"Error categorizing and moving unread emails: {e}")


# Example usage:
# Example usage:
categorize_and_move_unread_emails()

# Prompt user for the label name
label_names = ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools','Other']

user_label_input = input(f"Labels available: {label_names}\nEnter the label you want to retrieve emails from (type 'all' to retrieve from all labels): ")

# Retrieve emails based on user input
df_emails = get_emails(label_name=user_label_input, label_names=label_names)

# Save the retrieved emails to a CSV file
if df_emails is not None and not df_emails.empty:
    if user_label_input.lower() == 'all':
        df_emails.to_csv('all_emails_new.csv', index=False)
    else:
        df_emails.to_csv(f'{user_label_input}_emails.csv', index=False)
    print(f"Emails saved to {user_label_input.lower()}_emails.csv")
else:
    print("No emails were retrieved, so no CSV file was created.")

# Close the driver
driver.quit()


Sender: Unknown
Labeling as: Other
Successfully moved email to label 'Other'
Error processing email: Message: stale element reference: stale element not found in the current frame
  (Session info: chrome=127.0.6533.120); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#stale-element-reference-exception
Stacktrace:
	GetHandleVerifier [0x00007FF7C2CF9632+30946]
	(No symbol) [0x00007FF7C2CAE3C9]
	(No symbol) [0x00007FF7C2BA6FDA]
	(No symbol) [0x00007FF7C2BADA57]
	(No symbol) [0x00007FF7C2BAFD31]
	(No symbol) [0x00007FF7C2BAFDD0]
	(No symbol) [0x00007FF7C2BF7F69]
	(No symbol) [0x00007FF7C2BF850C]
	(No symbol) [0x00007FF7C2BEBCDC]
	(No symbol) [0x00007FF7C2C1CAAF]
	(No symbol) [0x00007FF7C2BEBBB6]
	(No symbol) [0x00007FF7C2C1CC80]
	(No symbol) [0x00007FF7C2C3B041]
	(No symbol) [0x00007FF7C2C1C813]
	(No symbol) [0x00007FF7C2BEA6E5]
	(No symbol) [0x00007FF7C2BEB021]
	GetHandleVerifier [0x00007FF7C2E2F83D+1301229]
	GetHandle

Labels available: ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools', 'Other']
Enter the label you want to retrieve emails from (type 'all' to retrieve from all labels):  Clients


Error while extracting emails: Message: element not interactable
  (Session info: chrome=127.0.6533.120)
Stacktrace:
	GetHandleVerifier [0x00007FF7C2CF9632+30946]
	(No symbol) [0x00007FF7C2CAE3C9]
	(No symbol) [0x00007FF7C2BA6E09]
	(No symbol) [0x00007FF7C2BF9712]
	(No symbol) [0x00007FF7C2BEC271]
	(No symbol) [0x00007FF7C2C1CA6A]
	(No symbol) [0x00007FF7C2BEBBB6]
	(No symbol) [0x00007FF7C2C1CC80]
	(No symbol) [0x00007FF7C2C3B041]
	(No symbol) [0x00007FF7C2C1C813]
	(No symbol) [0x00007FF7C2BEA6E5]
	(No symbol) [0x00007FF7C2BEB021]
	GetHandleVerifier [0x00007FF7C2E2F83D+1301229]
	GetHandleVerifier [0x00007FF7C2E3BDB7+1351783]
	GetHandleVerifier [0x00007FF7C2E32A03+1313971]
	GetHandleVerifier [0x00007FF7C2D2DD06+245686]
	(No symbol) [0x00007FF7C2CB758F]
	(No symbol) [0x00007FF7C2CB3804]
	(No symbol) [0x00007FF7C2CB3992]
	(No symbol) [0x00007FF7C2CAA3EF]
	BaseThreadInitThunk [0x00007FFE8E5D257D+29]
	RtlUserThreadStart [0x00007FFE9038AF28+40]

No emails were retrieved, so no CSV file was c

In [23]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
from datetime import datetime
import requests
download_dir = r"C:\Users\Syndictech\Downloads\EmailsPDF"  # Change this to your desired directory
# ChromeDriver Path
chrome_driver_path = r"C:\Users\Syndictech\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
# Setting up Chrome options for PDF printing
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
prefs = {
    "printing.print_preview_sticky_settings.appState": '{"version":2,"isGcpPromoDismissed":true,"selectedDestinationId":"Save as PDF","destinationSuggestions":[{"id":"Save as PDF","name":"Save as PDF","isRecent":true}],"recentDestinations":[{"id":"Save as PDF","name":"Save as PDF","isRecent":true}],"mediaSize":{"height_microns":297000,"name":"ISO_A4","width_microns":210000},"mediaSizeValue":{"height_microns":297000,"name":"ISO_A4","width_microns":210000}}',
    "savefile.default_directory": download_dir,
    "printing.default_destination_selection_rules": {
        "kind": "local",
        "namePattern": "Save as PDF"
    },
    "profile.default_content_setting_values.automatic_downloads": 1,
    "profile.managed_default_content_settings.popups": 1,
    "profile.default_content_setting_values.popups": 0,
}
chrome_options.add_experimental_option("prefs", prefs)
chrome_options.add_argument("--kiosk-printing")  # Automatically print without the print dialog

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
# chrome_options.add_argument("--headless")  # Uncomment to run headless

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open Gmail
driver.get('https://mail.google.com')

# Wait for the login page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'identifierId')))

# Login
driver.find_element(By.ID, 'identifierId').send_keys('waleed.uyi.bot@gmail.com')
driver.find_element(By.ID, 'identifierNext').click()

# Wait for the password page to load and enter the password
try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'Passwd')))
    password_input = driver.find_element(By.NAME, 'Passwd')
    password_input.send_keys('UYI@waleedBOT')
    driver.find_element(By.ID, 'passwordNext').click()
except TimeoutException:
    print("Password input field was not found. Please check the locator or wait time.")
    driver.quit()
    exit()

# Wait for Gmail to load
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[role="main"]')))

def click_print_button_and_save():
    try:
        # Wait for the print button to be clickable and then click it
        print_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'span.pYTkkf-JX-ank-Rtc0Jf span.bzc-ank'))
        )
        print_button.click()
        print("Print button clicked successfully.")
        
        # Wait for the Save button in the print dialog and click it
        save_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'cr-button.action-button'))
        )
        save_button.click()
        print("Save button clicked successfully.")
        
        # Wait for the file dialog to open (this step might require additional handling depending on OS)
        # Here, we're assuming the download is handled automatically by the browser settings.

        time.sleep(10)  # Adjust this sleep time depending on the time it takes for the file to download
        print("File should be saved.")
        
    except TimeoutException:
        print("Error: Print or Save button not found or took too long to load.")
    except NoSuchElementException:
        print("Error: Print or Save button not found in the DOM.")
    except Exception as e:
        print(f"Error clicking print or save button: {e}")

def convert_pdf_to_text(pdf_path):
    url = "https://api.pdfrest.com/word"
    
    # Prepare the payload
    files = {
        'file': open(pdf_path, 'rb')
    }

    headers = {
        'Api-Key': '34bf43a6-1bce-4dfe-878f-b785b3e0bc92'
    }

    try:
        # Make the request to the API
        response = requests.post(url, headers=headers, files=files)
        response.raise_for_status()  # Raise an error for bad responses
        
        # Assuming the API returns the text directly or within a JSON structure
        response_data = response.json()  # Adjust if the response is not JSON

        # Extract text from the response (depends on API response structure)
        if 'text' in response_data:
            return response_data['text']
        else:
            print("Text extraction failed: No 'text' field in the API response.")
            return None
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"An error occurred: {err}")
    
    return None

def extract_all_emails():
    emails = []
    try:
        email_elements = driver.find_elements(By.CSS_SELECTOR, 'tr.zA')

        if not email_elements:
            print("No emails found in this label.")
            return emails

        for email in email_elements:
            try:
                subject = email.find_element(By.CSS_SELECTOR, '.bog').text
                sender = email.find_element(By.CSS_SELECTOR, '.yX .zF, .yX .yP').get_attribute('email')
                snippet = email.find_element(By.CSS_SELECTOR, '.y2').text
                date_element = email.find_element(By.CSS_SELECTOR, '.xW.xY span')
                date = date_element.get_attribute('title')

                # Click on the email to open it
                email.click()
                time.sleep(2)  # Adjust sleep as necessary

                # Click the print button and then save the email as PDF
                click_print_button_and_save()

                # Optional: Handle the downloaded PDF file path
                pdf_path = f"C:/path/to/downloads/{subject}.pdf"  # Adjust the path accordingly
                
                # Convert the PDF to text using your API
                pdf_text = convert_pdf_to_text(pdf_path) if pdf_path else "Failed to convert PDF"
                
                # Parse the date string to a proper datetime object
                try:
                    email_date = datetime.strptime(date, '%b %d, %Y, %I:%M %p')
                except ValueError:
                    try:
                        email_date = datetime.strptime(date, '%b %d, %Y')
                    except ValueError:
                        email_date = date  # In case the format doesn't match expected patterns

                emails.append([subject, sender, snippet, email_date, pdf_text])
            except NoSuchElementException as e:
                print(f"Error extracting email details: {e}")
                continue

    except Exception as e:
        print(f"Error while extracting emails: {e}")

    return emails



def get_emails(label_name=None, label_names=None):
    all_emails = []

    if label_name and label_name.lower() != "all":
        # If a specific label is requested
        df_label = get_emails_from_label(label_name)
        if df_label is not None:
            df_label['Label'] = label_name
            return df_label

    elif label_names or (label_name and label_name.lower() == "all"):
        # If all labels are requested
        for label in label_names:
            df_label = get_emails_from_label(label)
            if df_label is not None:
                df_label['Label'] = label
                all_emails.append(df_label)

        if all_emails:
            return pd.concat(all_emails, ignore_index=True)
        else:
            print("No emails found in the selected labels.")
            return pd.DataFrame(columns=['Subject', 'Sender', 'Snippet', 'Date', 'Label', 'Body'])

def get_emails_from_label(label_name):
    go_to_label(label_name)
    emails = extract_all_emails()
    if emails:
        return pd.DataFrame(emails, columns=['Subject', 'Sender', 'Snippet', 'Date', 'Body'])
    else:
        return None

def move_email_to_label(driver, label_name):
    try:
        # Wait for the email checkbox to be clickable and select it
        email_checkbox = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.oZ-jc'))
        )
        
        # Click the checkbox to select the email
        is_checked = email_checkbox.get_attribute('aria-checked')
        if is_checked == 'false':
            email_checkbox.click()
            WebDriverWait(driver, 5).until(
                lambda d: email_checkbox.get_attribute('aria-checked') == 'true'
            )
        
        # Ensure the "Move to" button is visible and clickable
        move_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'div.T-I.J-J5-Ji.ns.T-I-ax7.T-I-Js-Gs.mA'))
        )
        move_button.click()

        # Wait for the label menu to appear
        label_menu = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.J-M.jQjAxd[role="menu"]'))
        )
        
        # Ensure the label element is visible and clickable
        label_xpath = f'//div[@class="J-N-Jz" and text()="{label_name}"]'
        label_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, label_xpath))
        )
        label_element.click()
        
        print(f"Successfully moved email to label '{label_name}'")
        
    except Exception as e:
        print(f"Error while moving email to label '{label_name}': {e}")

def categorize_and_move_unread_emails():
    try:
        go_to_label('Inbox')  # Go to the inbox to retrieve unread emails
        email_elements = driver.find_elements(By.CSS_SELECTOR, 'tr.zA.zE')  # Unread emails have the class 'zE'

        for email in email_elements:
            try:
                # Extract the sender's name from the email details
                email_details_element = email.find_element(By.CSS_SELECTOR, 'span.y2')
                email_details = email_details_element.text
                
                # Find the sender's name in the email details
                if "From:" in email_details:
                    sender_name = email_details.split("From:")[1].split(" <")[0].strip()
                else:
                    sender_name = "Unknown"

                print(f"Sender: {sender_name}")  # Debug: Print sender name

                # Click the checkbox to select the email
                checkbox = email.find_element(By.CSS_SELECTOR, 'div.oZ-jc')
                if checkbox.get_attribute('aria-checked') == 'false':
                    checkbox.click()

                # Determine the label based on the sender's name
                label_name = 'Careem' if 'Careem' in sender_name else 'Other'
                print(f"Labeling as: {label_name}")  # Debug: Print label name

                # Move the email to the determined label
                move_email_to_label(driver, label_name)
            except Exception as e:
                print(f"Error processing email: {e}")

    except Exception as e:
        print(f"Error categorizing and moving unread emails: {e}")

# Example usage:
categorize_and_move_unread_emails()

# Prompt user for the label name
label_names = ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools', 'Other']

user_label_input = input(f"Labels available: {label_names}\nEnter the label you want to retrieve emails from (type 'all' to retrieve from all labels): ")

# Retrieve emails based on user input
df_emails = get_emails(label_name=user_label_input, label_names=label_names)

# Save the retrieved emails to a CSV file
if df_emails is not None and not df_emails.empty:
    if user_label_input.lower() == 'all':
        df_emails.to_csv('all_emails_new.csv', index=False)
    else:
        df_emails.to_csv(f'{user_label_input}_emails.csv', index=False)
    print(f"Emails saved to {user_label_input.lower()}_emails.csv")
else:
    print("No emails were retrieved, so no CSV file was created.")

# Close the driver
driver.quit()


Error switching to label 'Inbox': Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7C2CF9632+30946]
	(No symbol) [0x00007FF7C2CAE3C9]
	(No symbol) [0x00007FF7C2BA6FDA]
	(No symbol) [0x00007FF7C2BF822C]
	(No symbol) [0x00007FF7C2BF850C]
	(No symbol) [0x00007FF7C2C3DCB7]
	(No symbol) [0x00007FF7C2C1CAAF]
	(No symbol) [0x00007FF7C2C3B041]
	(No symbol) [0x00007FF7C2C1C813]
	(No symbol) [0x00007FF7C2BEA6E5]
	(No symbol) [0x00007FF7C2BEB021]
	GetHandleVerifier [0x00007FF7C2E2F83D+1301229]
	GetHandleVerifier [0x00007FF7C2E3BDB7+1351783]
	GetHandleVerifier [0x00007FF7C2E32A03+1313971]
	GetHandleVerifier [0x00007FF7C2D2DD06+245686]
	(No symbol) [0x00007FF7C2CB758F]
	(No symbol) [0x00007FF7C2CB3804]
	(No symbol) [0x00007FF7C2CB3992]
	(No symbol) [0x00007FF7C2CAA3EF]
	BaseThreadInitThunk [0x00007FFE8E5D257D+29]
	RtlUserThreadStart [0x00007FFE9038AF28+40]

Sender: Unknown
Labeling as: Other
Error while moving email to label 'Other': Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7C2CF9632+30

Labels available: ['Clients', 'Partnerships', 'Research', 'Product Development', 'Marketing', 'Finance', 'Careem', 'AI Tools', 'Other']
Enter the label you want to retrieve emails from (type 'all' to retrieve from all labels):  Research


Error switching to label 'Research': Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF7C2CF9632+30946]
	(No symbol) [0x00007FF7C2CAE3C9]
	(No symbol) [0x00007FF7C2BA6FDA]
	(No symbol) [0x00007FF7C2BF822C]
	(No symbol) [0x00007FF7C2BF850C]
	(No symbol) [0x00007FF7C2C3DCB7]
	(No symbol) [0x00007FF7C2C1CAAF]
	(No symbol) [0x00007FF7C2C3B041]
	(No symbol) [0x00007FF7C2C1C813]
	(No symbol) [0x00007FF7C2BEA6E5]
	(No symbol) [0x00007FF7C2BEB021]
	GetHandleVerifier [0x00007FF7C2E2F83D+1301229]
	GetHandleVerifier [0x00007FF7C2E3BDB7+1351783]
	GetHandleVerifier [0x00007FF7C2E32A03+1313971]
	GetHandleVerifier [0x00007FF7C2D2DD06+245686]
	(No symbol) [0x00007FF7C2CB758F]
	(No symbol) [0x00007FF7C2CB3804]
	(No symbol) [0x00007FF7C2CB3992]
	(No symbol) [0x00007FF7C2CAA3EF]
	BaseThreadInitThunk [0x00007FFE8E5D257D+29]
	RtlUserThreadStart [0x00007FFE9038AF28+40]

Error saving email as PDF: Message: element click intercepted: Element <span class="bzc-ank" aria-hidden="true">...</span> is not clickabl

In [None]:
def press_print():
    

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
import pandas as pd
import time
from datetime import datetime
import pyautogui

# ChromeDriver Path
chrome_driver_path = r"C:\Users\Syndictech\Downloads\chromedriver-win64\chromedriver-win64\chromedriver.exe"
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
# chrome_options.add_argument("--headless")  # Uncomment to run headless

# Setting up the webdriver
service = Service(chrome_driver_path)
driver = webdriver.Chrome(service=service, options=chrome_options)

# Open Gmail
driver.get('https://mail.google.com')

# Wait for the login page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, 'identifierId')))

# Login
driver.find_element(By.ID, 'identifierId').send_keys('waleed.uyi.bot@gmail.com')
driver.find_element(By.ID, 'identifierNext').click()

# Wait for the password page to load and enter the password
try:
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'Passwd')))
    
    password_input = driver.find_element(By.NAME, 'Passwd')
    password_input.send_keys('UYI@waleedBOT')
    
    driver.find_element(By.ID, 'passwordNext').click()
    time.sleep(5)
    driver.get('https://mail.google.com/mail/u/0/#inbox/FMfcgzQVzXZqMZwfrwdxpQrjNpKhVcQV')
    
    element = driver.find_element(By.CLASS_NAME,"pYTkkf-JX-ano")
    element.click()
    
    

    # Send the ENTER key press
    element.send_keys(Keys.ENTER)
    pyautogui.press('enter')   
    pyautogui.press('enter')  
    
except TimeoutException:
    print("Password input field was not found. Please check the locator or wait time.")
    driver.quit()
    exit()

# Wait for Gmail to load
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div[role="main"]')))

NameError: name 'Keys' is not defined

In [3]:
!pip install pyautogui

Collecting pyautogui
  Downloading PyAutoGUI-0.9.54.tar.gz (61 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting pymsgbox (from pyautogui)
  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting pytweening>=1.0.4 (from pyautogui)
  Downloading pytweening-1.2.0.tar.gz (171 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'don

In [10]:
!pip install wkhtmltopdf

Collecting wkhtmltopdf
  Downloading wkhtmltopdf-0.2.tar.gz (9.7 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: wkhtmltopdf
  Building wheel for wkhtmltopdf (setup.py): started
  Building wheel for wkhtmltopdf (setup.py): finished with status 'done'
  Created wheel for wkhtmltopdf: filename=wkhtmltopdf-0.2-py3-none-any.whl size=11149 sha256=870550eeea09e7bdf0f08280847cce5b984fad4df407112ae8398e903a146f00
  Stored in directory: c:\users\syndictech\appdata\local\pip\cache\wheels\61\e8\ba\34bacc3d874358dc176fd67cc97eeaa9a7fb54c55479258fb8
Successfully built wkhtmltopdf
Installing collected packages: wkhtmltopdf
Successfully installed wkhtmltopdf-0.2


In [17]:
import email
import pdfkit

def convert_eml_to_pdf(eml_file, output_pdf):
    # Open and parse the .eml file
    with open(eml_file, 'r') as f:
        msg = email.message_from_file(f)
    
    # Extract email details
    email_subject = msg['Subject']
    email_from = msg['From']
    email_to = msg['To']
    email_date = msg['Date']

    # Extract the body of the email
    email_body = ""
    if msg.is_multipart():
        for part in msg.walk():
            if part.get_content_type() == "text/plain":
                email_body += part.get_payload(decode=True).decode()
    else:
        email_body = msg.get_payload(decode=True).decode()

    # Format HTML content for the PDF
    html_content = f"""
    <html>
    <head>
        <style>
            body {{
                font-family: Arial, sans-serif;
                margin: 20px;
            }}
            h1, h2, h3 {{
                color: #333;
            }}
            p {{
                font-size: 14px;
            }}
        </style>
    </head>
    <body>
        <h2>Email from {email_from}</h2>
        <h3>To: {email_to}</h3>
        <h4>Subject: {email_subject}</h4>
        <h4>Date: {email_date}</h4>
        <hr>
        <p>{email_body}</p>
    </body>
    </html>
    """

    # Convert HTML to PDF
    pdfkit.from_string(html_content, output_pdf)
    print(f"PDF saved as {output_pdf}")

# Convert the uploaded .eml file to .pdf
convert_eml_to_pdf(r"C:\Users\Syndictech\Downloads\Understanding Loans_ Your go-to guide 🧐.eml", r'output.pdf')


OSError: No wkhtmltopdf executable found: "b''"
If this file exists please check that this process can read it or you can pass path to it manually in method call, check README. Otherwise please install wkhtmltopdf - https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf

In [13]:
!pip install pdfkit

Collecting pdfkit
  Downloading pdfkit-1.0.0-py3-none-any.whl.metadata (9.3 kB)
Downloading pdfkit-1.0.0-py3-none-any.whl (12 kB)
Installing collected packages: pdfkit
Successfully installed pdfkit-1.0.0
