In [25]:
import smtplib
import dns.resolver
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed



In [27]:
def get_mx_records(domain):
    try:
        print(f"Resolving MX records for {domain}")
        answers = dns.resolver.resolve(domain, 'MX')
        mx_records = [str(rdata.exchange) for rdata in answers]
        print(f"MX records for {domain}: {mx_records}")
        return mx_records
    except Exception as e:
        print(f"Failed to get MX records for {domain}: {e}")
        return None


In [29]:
def verify_email(mx_records, email):
    try:
        for mx in mx_records:
            server = smtplib.SMTP()
            server.set_debuglevel(0)  # Disable debug output for speed
            server.connect(mx)
            server.helo(server.local_hostname)  # server.local_hostname(Get local server hostname)
            server.mail('test@example.com')
            code, message = server.rcpt(email)
            server.quit()
            if code == 250:
                return True
    except Exception as e:
        print(f"Failed to verify email {email}: {e}")
    return False


In [31]:
file_path = 'property_management_companies_unique_domains.xlsx'
df = pd.read_excel(file_path)

# Clean and prepare the domain list
domains = df['Domain'].drop_duplicates().str.strip()
emails_to_check = ["info@", "office@", "contact@", "support@", "admin@", "hello@", "sales@", "general@", "mail@", "service@"]


In [33]:
def check_emails_for_domain(domain):
    mx_records = get_mx_records(domain)
    if not mx_records:
        return None

    for prefix in emails_to_check:
        email = prefix + domain
        if verify_email(mx_records, email):
            return {'Domain': domain, 'Email': email}
    return None


In [35]:
results = []

# Use ThreadPoolExecutor to speed up the process
with ThreadPoolExecutor(max_workers=10) as executor:
    future_to_domain = {executor.submit(check_emails_for_domain, domain): domain for domain in domains}

    for future in as_completed(future_to_domain):
        domain = future_to_domain[future]
        try:
            result = future.result()
            if result:
                results.append(result)
        except Exception as e:
            print(f"Error checking domain {domain}: {e}")

# Debugging output to check the number of results
print(f"Number of valid emails found: {len(results)}")

# Save results to a new DataFrame and export to Excel
results_df = pd.DataFrame(results)
output_file = 'verified_emails.xlsx'
results_df.to_excel(output_file, index=False)
print(f"Results saved to '{output_file}'")


Resolving MX records for iccpropertymanagement.comResolving MX records for oxfordproperties.com

Resolving MX records for primefmi.ca
Resolving MX records for soundcloud.com
Resolving MX records for markhampropertymanagement.com
Resolving MX records for surimirfarsi.com
Resolving MX records for parfaitproperty.com
Resolving MX records for coldwellbanker.com
Resolving MX records for rentfaster.ca
Resolving MX records for caprent.com
MX records for oxfordproperties.com: ['mxa-002a0f01.gslb.pphosted.com.', 'mxb-002a0f01.gslb.pphosted.com.']
MX records for soundcloud.com: ['alt2.aspmx.l.google.com.', 'aspmx2.googlemail.com.', 'aspmx3.googlemail.com.', 'aspmx.l.google.com.', 'alt1.aspmx.l.google.com.']
MX records for iccpropertymanagement.com: ['iccpropertymanagement-com.mail.protection.outlook.com.']
MX records for rentfaster.ca: ['aspmx.l.google.com.', 'alt3.aspmx.l.google.com.', 'alt4.aspmx.l.google.com.', 'feedback-smtp.ca-central-1.amazonses.com.', 'alt1.aspmx.l.google.com.', 'alt2.asp