In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Initialize WebDriver
service = Service(ChromeDriverManager().install())


#Run this command on "run", (Win + R), %LOCALAPPDATA%\Google\Chrome\User Data

chrome_options = Options()
chrome_options.add_argument("--user-data-dir=C:\\Users\\HP\\AppData\\Local\\Google\\Chrome\\User Data\\Profile 2")
chrome_options.add_argument("--profile-directory=Profile 2")

driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get("https://web.whatsapp.com/")
print("✅ Scan QR code and press Enter once logged in.")
input()  # Wait for user confirmation after scanning

# Search for the group
group_name = input("Enter group name") # Change this to your group
search_box = WebDriverWait(driver, 20).until(
    EC.presence_of_element_located((By.XPATH, "//p[contains(@class, 'selectable-text copyable-text')]"))
)
search_box.click()
search_box.send_keys(group_name)

# Wait for search results to appear
time.sleep(3)  # Allow UI rendering
search_results = WebDriverWait(driver, 10).until(
    EC.presence_of_all_elements_located((By.XPATH, "//span[@title]"))
)

# Debugging: Print all found search results
found_groups = [result.get_attribute("title").strip() for result in search_results if result.get_attribute("title")]
print("🔍 Found groups:", found_groups)

# Locate the correct group and click it
group_clicked = False
for result in search_results:
    if result.get_attribute("title").strip() == group_name:
        try:
            driver.execute_script("arguments[0].scrollIntoView();", result)  # Scroll into view
            time.sleep(1)  # Allow UI to adjust
            result.click()  # Normal Click
            print(f"✅ Clicked on group: {group_name}")
            group_clicked = True
            break
        except:
            print("❌ Normal click failed, trying JavaScript click...")
            driver.execute_script("arguments[0].click();", result)  # JS Click
            print(f"✅ Clicked via JavaScript: {group_name}")
            group_clicked = True
            break

if not group_clicked:
    print("❌ Group not found or could not be clicked!")
    driver.quit()
    exit()

# Allow chat to fully open
time.sleep(2)

# Find all potential headers and force-click one
try:
    all_headers = driver.find_elements(By.XPATH, "//header//span[contains(@class, '_ao3e')]")
    print(f"🔍 Found {len(all_headers)} possible headers")
    
    for header in all_headers:
        driver.execute_script("arguments[0].scrollIntoView();", header)
        time.sleep(1)  # Allow UI to adjust
        driver.execute_script("arguments[0].click();", header)
        print("✅ Forced header click via JavaScript!")
        break  # Stop after clicking one

except Exception as e:
    print(f"❌ Failed to click group header: {e}")

# Wait for the contact list to be fully loaded
try:
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(@class, '_3j691')]"))  # Adjust if necessary
    )
    time.sleep(2)  # Allow UI to fully load
    print("✅ Contact list is visible!")
except:
    print("❌ Contact list did not appear, check XPath or increase wait time!")

# Extract contacts
contacts = driver.find_elements(By.XPATH, "//span[contains(@class, '_ao3e')]")  # Adjust based on HTML
contact_list = [contact.text.strip() for contact in contacts if contact.text.strip()]

# Debugging: Print all extracted contacts
print("🔍 Extracted contacts:", contact_list)

# Save to CSV only if contacts are found
if contact_list:
    df = pd.DataFrame({'Contacts': contact_list})
    df.to_csv('whatsapp_contacts.csv', index=False, encoding='utf-8')
    print("✅ Contacts saved to whatsapp_contacts.csv")
else:
    print("❌ No contacts found. Ensure the group list is visible before extracting.")

# Keep the browser open for debugging
input("🔎 Press Enter to close the browser after checking the results...")

driver.quit()


In [None]:
df = pd.read_csv("whatsapp_contacts.csv", header=None)

In [None]:
import re
def extract_phone_numbers(text):
    if pd.isna(text):  # Skip NaN values
        return None
    return re.findall(r"\+?\d{1,3}[-.\s]?\d{2,5}[-.\s]?\d{2,5}[-.\s]?\d{3,5}[-.\s]?\d{0,5}", str(text)) # Adjust regex as per your phone number format

# Apply extraction function
phone_numbers = df.applymap(extract_phone_numbers).values.flatten()

# Flatten and remove duplicates
unique_numbers = sorted(set(num for sublist in phone_numbers if sublist for num in sublist))

# Save to CSV
output_df = pd.DataFrame(unique_numbers, columns=["Phone Numbers"])
output_df.to_csv("extracted_numbers_test1.csv", index=False)

print(f"Extracted {len(unique_numbers)} unique phone numbers and saved to 'extracted_numbers_test1.csv'")