In [1]:
import re
import pandas as pd


In [2]:
# Read the email content from a file
with open('email.txt', 'r') as file:
    email_content = file.read()



In [3]:
# Define refined regex patterns
name_pattern = r'(?m)^[A-Z][a-z]+\s[A-Z][a-z]+$'  # Matches full name
position_pattern = r'(?m)^[A-Z][a-z]+\s[A-Z][a-z]+\n(.+)$'  # Matches position on the line after name
workplace_pattern = r'(?m)^(.*Training, Inc\.)$'  # Matches workplace ending with "Training, Inc."
email_pattern = r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'  # Matches email addresses
phone_pattern = r'P:\s*\((\d{3})\)\s*(\d{3})-(\d{4})'  # Matches phone numbers

# Extract information using regex
full_name_match = re.search(name_pattern, email_content)
position_match = re.search(position_pattern, email_content)
workplace_match = re.search(workplace_pattern, email_content)
email_match = re.search(email_pattern, email_content)
phone_match = re.search(phone_pattern, email_content)


In [4]:
# Assign extracted values (with graceful handling for missing data)
full_name = full_name_match.group(0).strip() if full_name_match else "N/A"
position = position_match.group(1).strip() if position_match else "N/A"  # Use group(1) for position, skipping name
workplace = workplace_match.group(1).strip() if workplace_match else "N/A"
email = email_match.group(0).strip() if email_match else "N/A"
phone_number = (
    f"({phone_match.group(1)}) {phone_match.group(2)}-{phone_match.group(3)}"
    if phone_match
    else "N/A"
)

# Extract domain from email to create webpage URL
domain = email.split('@')[1] if email != "N/A" else "N/A"
webpage = f"http://{domain}" if domain != "N/A" else "N/A"


In [5]:
# Debugging: Print extracted values before creating DataFrame
print("Extracted Information:")
print(f"Full Name: {full_name}")
print(f"Position: {position}")
print(f"Work Place: {workplace}")
print(f"Email: {email}")
print(f"Phone: {phone_number}")
print(f"Webpage: {webpage}\n")


Extracted Information:
Full Name: Jane Doe
Position: Senior Account Executive
Work Place: Microsoft Training, Inc.
Email: jane.doe@microsoft.com
Phone: (904) 777-3333
Webpage: http://microsoft.com



In [6]:
# Create a DataFrame
data = {
    'Full Name': [full_name],
    'Position': [position],
    'Work Place': [workplace],
    'Email': [email],
    'Phone': [phone_number],
    'Webpage': [webpage]
}
df = pd.DataFrame(data)

# Remove duplicates if any
df = df.drop_duplicates()


In [7]:
# Print cleaned DataFrame
print("\nFormatted DataFrame:\n")
print(df)

# Ask for user confirmation before saving
confirm = input("\nIs this information correct? (yes/no): ").strip().lower()



Formatted DataFrame:

  Full Name                  Position                Work Place  \
0  Jane Doe  Senior Account Executive  Microsoft Training, Inc.   

                    Email           Phone               Webpage  
0  jane.doe@microsoft.com  (904) 777-3333  http://microsoft.com  

Is this information correct? (yes/no): yes


In [8]:

if confirm == 'yes':
    # Write to CSV if confirmed
    df.to_csv('contacts.csv', index=False)
    print("Contact information saved to 'contacts.csv'.")
else:
    print("Operation cancelled. No changes were made.")


Contact information saved to 'contacts.csv'.
