In [1]:

!pip install google-generativeai selenium webdriver-manager beautifulsoup4 pandas pymupdf requests lxml openpyxl tqdm python-dotenv numpy



In [2]:
import google.generativeai as genai
import json
import os
# Set your API key (assumes you have a service account key JSON or API key)
api_key = os.getenv("GEMINI_API_KEY")

if api_key:
    genai.configure(api_key=api_key)
    print("Gemini API key configured successfully from environment variable.")
else:
    print("Error: GEMINI_API_KEY environment variable not found.")
    # You might want to raise an exception or exit the script here
    # raise ValueError("GEMINI_API_KEY environment variable not set.")


Gemini API key configured successfully from environment variable.


In [3]:
def ask_gemini(prompt, model="gemini-2.5-flash", temperature=0):
    """
    Call Gemini 2.5 Flash for a single-turn generative response.
    """
    model = genai.GenerativeModel(model_name=model)
    response = model.generate_content(
        prompt,
        generation_config=genai.types.GenerationConfig(temperature=temperature)
    )
    return response.text


In [21]:
import csv 
import time 

import json

def generate_field_answers(fields, resume_json):
    answers = []
    for field in fields:
        question = field["field_name"]
        options_text = "; ".join(field["options"]) if field["options"] else ""
        
        prompt = f"""
You are an expert LinkedIn Easy Apply assistant that auto-fills form fields intelligently.

🎯 **Your goal**: Provide the most accurate and recruiter-friendly answer for each field using the candidate’s resume data.

🧠 **Input Context**
- Resume JSON: {json.dumps(resume_json)}
- Field question: {question}
- Field type: {field['field_type']}
- Current value: {field['value']}
- Options (if any): {options_text}

---

### 🧩 **Core Principles**
1️⃣ Favor the candidate by highlighting skills, experience, and achievements.  
2️⃣ Favor the recruiter by ensuring the response looks relevant, confident, and professional.  
3️⃣ Always produce an answer that can be directly inserted into a form field — no commentary, no extra text.

---

### 🧠 **Intelligent Behavior Rules**

#### 📊 **1. Numerical / Experience-based questions**
- If the question asks:
  - “How many years of experience”, “Experience in”, or similar → output ONLY a number.
  - Example: "Experience in Gen-AI?" → `2`
  - Example: "How many years of experience in Python?" → `3`
- Use fractional years:
  - 2 years 3 months → 2
  - 2 years 8 months → 3
- Don’t include words like “years”, “yrs”, or “months” — just the number.

#### 💰 **2. Salary or compensation**
- If question involves salary, CTC, or pay expectations → return a **range** like:
  - `100000 - 200000`
- Use realistic and market-aligned numbers based on resume experience.

#### 💬 **3. Text / Paragraph fields**
- If question expects a descriptive answer (like “Why should we hire you?” or “Tell us about yourself”):
  - Write 1–3 sentences that sound professional and natural.
  - Use resume highlights (skills, projects, experience) to make it personal.
  - Example: "I'm a software engineer with strong expertise in Gen-AI, automation, and large-scale system design."

#### 🎯 **4. Select / Multi-select / Radio / Checkbox**
- Select the **most relevant option(s)** aligned with the candidate’s skills, job role, or experience.
- Return **only** the selected option(s) text, not an explanation.

#### 🧾 **5. Missing or unclear info**
- If resume doesn’t explicitly provide the answer:
  - Infer a realistic, professional value.
  - Avoid placeholders like “N/A”, “sample”, “not applicable”.

#### ⚡ **6. Strict output format**
- Return only the final value — no explanations, quotes, or formatting.
- The answer must be:
  - a number → for numeric questions
  - a text phrase → for open-ended questions
  - a valid option → for dropdowns/radio/multi-select
  - a salary range → for pay-related fields

---

### 🧩 **Output Expectation**
Return ONLY the answer text (no markdown, no commentary).
"""

        try:
            answer = ask_gemini(prompt)
        except Exception as e:
            print(f"❌ Error generating answer for field '{question}': {e}")
            answer = ""
        
        field_copy = field.copy()
        field_copy["generated_answer"] = answer.strip()
        answers.append(field_copy)
        time.sleep(0.5)  # rate-limit Gemini calls lightly
    return answers


# -----------------------------
# Save generated answers CSV
# -----------------------------
def save_answers_to_csv(fields_with_answers, filename="easy_apply_answers.csv"):
    file_exists = os.path.exists(filename)
    with open(filename, "a", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["field_name", "field_type", "value", "options", "generated_answer"])
        if not file_exists:
            writer.writeheader()
        for row in fields_with_answers:
            row_copy = row.copy()
            row_copy.pop("element", None)
            row_copy["options"] = "; ".join(row_copy.get("options", [])) if row_copy.get("options") else ""
            writer.writerow(row_copy)
    print(f"✅ Answers appended to {filename}")


In [22]:
import os
import time
import csv
import pickle
import json
import requests
import tempfile
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

# -----------------------------
# Configuration
# -----------------------------
LINKEDIN_EMAIL = os.getenv("LINKEDIN_EMAIL")
LINKEDIN_PASSWORD = os.getenv("LINKEDIN_PASSWORD")
COOKIE_FILE = "linkedin_cookies.pkl"
RESUME_DRIVE_URL = "https://drive.google.com/file/d/1QgFWJDJS84TmvyRJeapjRUEtcEn_6QL9/view?usp=sharing"
CSV_PATH = "csv/linkedin_jobs.csv"

# -----------------------------
# Launch Chrome
# -----------------------------
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
wait = WebDriverWait(driver, 10)

# -----------------------------
# Login functions
# -----------------------------
def try_login_with_cookies():
    if os.path.exists(COOKIE_FILE):
        driver.get("https://www.linkedin.com")
        with open(COOKIE_FILE, "rb") as f:
            cookies = pickle.load(f)
            for cookie in cookies:
                driver.add_cookie(cookie)
        driver.refresh()
        time.sleep(3)
        try:
            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "img.global-nav__me-photo")))
            print("✅ Logged in using cookies")
            return True
        except:
            print("❌ Cookies expired or invalid")
            return False
    return False

def login_with_credentials():
    driver.get("https://www.linkedin.com/login")
    username_input = wait.until(EC.presence_of_element_located((By.ID, "username")))
    username_input.send_keys(LINKEDIN_EMAIL)
    password_input = driver.find_element(By.ID, "password")
    password_input.send_keys(LINKEDIN_PASSWORD)
    password_input.send_keys(Keys.RETURN)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "img.global-nav__me-photo")))
    with open(COOKIE_FILE, "wb") as f:
        pickle.dump(driver.get_cookies(), f)
    print("✅ Logged in and cookies saved")

# -----------------------------
# Easy Apply click & fallback
# -----------------------------
def click_easy_apply():
    try:
        easy_apply_button = driver.find_element(By.CSS_SELECTOR, "button.jobs-apply-button")
        if "applied" in easy_apply_button.text.strip().lower():
            print("ℹ️ Already applied. Skipping this job.")
            return "Already applied"
        driver.execute_script("arguments[0].scrollIntoView(true);", easy_apply_button)
        time.sleep(1)
        driver.execute_script("arguments[0].click();", easy_apply_button)
        print("✅ Easy Apply clicked!")
        return "Clicked Easy Apply"
    except:
        print("ℹ️ Easy Apply button not found or job not available. Skipping.")
        return "Already applied"

# -----------------------------
# Resume download
# -----------------------------
import os
import requests
import tempfile

def download_resume_from_drive(drive_url):
    # Extract file ID
    file_id = drive_url.split("/d/")[1].split("/")[0]
    download_url = f"https://drive.google.com/uc?export=download&id={file_id}"

    # Step 1: get original filename (Google Drive sometimes sends it in headers)
    resp = requests.get(download_url, stream=True)
    if resp.status_code != 200:
        raise Exception(f"Failed to download resume, status code {resp.status_code}")

    # Try to get filename from content-disposition header
    cd = resp.headers.get("content-disposition", "")
    if "filename=" in cd:
        original_name = cd.split("filename=")[1].strip('"')
    else:
        original_name = f"resume_{file_id}.pdf"

    # Step 2: Append '-drive' before extension
    base, ext = os.path.splitext(original_name)
    new_name = f"{base}-drive{ext}"

    # Step 3: Save to temp directory (or wherever you like)
    temp_dir = tempfile.gettempdir()
    temp_path = os.path.join(temp_dir, new_name)

    with open(temp_path, "wb") as f:
        for chunk in resp.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)

    return temp_path


# -----------------------------
# Extract Easy Apply fields
# -----------------------------
def extract_easy_apply_fields():
    fields = []
    try:
        form_container = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "div.jobs-easy-apply-modal"))
        )

        # INPUTS
        for inp in form_container.find_elements(By.TAG_NAME, "input"):
            try:
                label_el = inp.find_element(By.XPATH, "ancestor::div[@data-test-single-line-text-form-component]//label")
                question_text = label_el.text.strip() if label_el else inp.get_attribute("aria-label") or inp.get_attribute("id")
                field_type = inp.get_attribute("type")
                value = inp.get_attribute("value") if field_type not in ["checkbox", "radio", "file"] else inp.is_selected()
                options = []

                if field_type in ["radio", "checkbox"]:
                    name_attr = inp.get_attribute("name")
                    group = form_container.find_elements(By.NAME, name_attr)
                    options = [el.get_attribute("aria-label") for el in group if el.get_attribute("aria-label")]

                fields.append({
                    "field_name": question_text,
                    "field_type": field_type,
                    "value": value,
                    "options": options,
                    "element": inp
                })
            except:
                continue

        # TEXTAREAS
        for ta in form_container.find_elements(By.TAG_NAME, "textarea"):
            try:
                label_el = ta.find_element(By.XPATH, "ancestor::div[@data-test-single-line-text-form-component]//label")
                question_text = label_el.text.strip() if label_el else ta.get_attribute("aria-label") or ta.get_attribute("id")
                fields.append({
                    "field_name": question_text,
                    "field_type": "textarea",
                    "value": ta.get_attribute("value"),
                    "options": [],
                    "element": ta
                })
            except:
                continue

        # SELECTS
        for sel in form_container.find_elements(By.TAG_NAME, "select"):
            try:
                label_el = sel.find_element(By.XPATH, "ancestor::div[@data-test-form-element]//label")
                question_text = label_el.text.strip() if label_el else sel.get_attribute("aria-label") or sel.get_attribute("id")
                options = [opt.text for opt in sel.find_elements(By.TAG_NAME, "option")]
                fields.append({
                    "field_name": question_text,
                    "field_type": "select",
                    "value": sel.get_attribute("value"),
                    "options": options,
                    "element": sel
                })
            except:
                continue
    except:
        pass

    return fields

# -----------------------------
# Fill fields
# -----------------------------
def fill_easy_apply_fields(fields):
    for field in fields:
        try:
            el = field.get("element")
            if not el:
                continue
            generated = field.get("generated_answer", "").strip()
            f_type = field["field_type"]

            if f_type in ["text", "textarea"] or el.get_attribute("contenteditable") == "true":
                if generated:
                    driver.execute_script("""
                        arguments[0].focus();
                        arguments[0].value = arguments[1];
                        arguments[0].dispatchEvent(new Event('input', { bubbles: true }));
                        arguments[0].dispatchEvent(new Event('change', { bubbles: true }));
                    """, el, generated)
                    time.sleep(0.2)

            elif f_type == "select" and generated:
                try:
                    Select(el).select_by_visible_text(generated)
                except:
                    el.click()
                    time.sleep(0.3)
                    option = el.find_element(By.XPATH, f".//li[normalize-space(text())='{generated}']")
                    option.click()
                    time.sleep(0.2)
        except:
            continue

# -----------------------------
# MAIN
# -----------------------------
if not try_login_with_cookies():
    login_with_credentials()

with open("resumes/Yeswanth_Yerra_CV_structured.json", "r") as f:
    resume_data = json.load(f)

with open(CSV_PATH, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    jobs = [row for row in reader if row["apply_type"].lower() == "easy apply"]

for idx, job in enumerate(jobs, 1):
    print(f"\n🎯 Processing job {idx}/{len(jobs)}: {job['title']} at {job['company']} ({job['location']})")
    driver.get(job["apply_link"])
    time.sleep(3)

    apply_status = click_easy_apply()
    if apply_status == "Already applied":
        continue

    step = 1
    while True:
        print(f"➡️ Step {step}...")
        fields = extract_easy_apply_fields()

        if not fields:
            print("ℹ️ No fields detected. Checking for resume upload or next step...")

            # 🔹 Check for resume upload
            try:
                upload_input = driver.find_element(By.CSS_SELECTOR, "input[type='file'].hidden")
                if upload_input.is_displayed() or "hidden" in upload_input.get_attribute("class"):
                    print("📎 Found resume upload input, uploading resume...")
                    local_resume = download_resume_from_drive(RESUME_DRIVE_URL)
                    upload_input.send_keys(local_resume)
                    time.sleep(2)
                    os.remove(local_resume)
                    print("✅ Resume uploaded successfully.")
            except:
                pass

            # 🔹 Try to go next
            try:
                next_button = driver.find_element(
                    By.XPATH,
                    "//button[contains(., 'Next') or contains(., 'Continue') or contains(., 'Review')]"
                )
                if next_button and next_button.is_enabled():
                    driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
                    time.sleep(0.5)
                    driver.execute_script("arguments[0].click();", next_button)
                    print("➡️ Proceeded to next step.")
                    time.sleep(2)
                    step += 1
                    continue
            except:
                print("⚠️ No next button found. Assuming final review.")
                break

            break

        fields_with_answers = generate_field_answers(fields, resume_data)
        fill_easy_apply_fields(fields_with_answers)

        # Go next after filling
        try:
            next_button = wait.until(
                EC.presence_of_element_located(
                    (By.XPATH, "//button[contains(., 'Next') or contains(., 'Review') or contains(., 'Continue')]")
                )
            )
            if "disabled" in next_button.get_attribute("class"):
                break
            driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
            time.sleep(0.5)
            driver.execute_script("arguments[0].click();", next_button)
            time.sleep(2)
            step += 1
        except:
            break

    # Final submit
    try:
        submit_button = wait.until(
            EC.presence_of_element_located(
                (By.XPATH, "//button[contains(., 'Submit') or contains(., 'Done')]")
            )
        )
        driver.execute_script("arguments[0].scrollIntoView(true);", submit_button)
        time.sleep(0.5)
        driver.execute_script("arguments[0].click();", submit_button)
        print("✅ Application submitted successfully!")
        time.sleep(2)
    except:
        print("⚠️ Could not find Submit button, skipped.")

driver.quit()
print("🎉 All Easy Apply jobs processed!")


✅ Logged in using cookies

🎯 Processing job 1/22: Data Scientist at AB InBev GCC India (Bengaluru, Karnataka, India)
ℹ️ Easy Apply button not found or job not available. Skipping.

🎯 Processing job 2/22: Associate - Data Scientist-Data Science-Data Scientist at EXL (Bengaluru, Karnataka, India)
ℹ️ Easy Apply button not found or job not available. Skipping.

🎯 Processing job 3/22: Senior Data Scientist at Tata Communications Transformation Services (TCTS) (Pune, Maharashtra, India)
ℹ️ Easy Apply button not found or job not available. Skipping.

🎯 Processing job 4/22: Machine Learning Engineer at Persistent Systems (Pune, Maharashtra, India)
ℹ️ Easy Apply button not found or job not available. Skipping.

🎯 Processing job 5/22: AI/ML Engineer at Impetus (Bengaluru, Karnataka, India)
✅ Easy Apply clicked!
➡️ Step 1...
➡️ Step 2...
ℹ️ No fields detected. Checking for resume upload or next step...
📎 Found resume upload input, uploading resume...
✅ Resume uploaded successfully.
➡️ Proceeded t

InvalidSessionIdException: Message: invalid session id; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
#0 0x556a81a5dfba <unknown>
#1 0x556a814e2523 <unknown>
#2 0x556a8152596f <unknown>
#3 0x556a81559d36 <unknown>
#4 0x556a81554800 <unknown>
#5 0x556a81553952 <unknown>
#6 0x556a814aa7e5 <unknown>
#7 0x556a81a22b28 <unknown>
#8 0x556a81a2687f <unknown>
#9 0x556a81a0ac49 <unknown>
#10 0x556a81a27405 <unknown>
#11 0x556a819f04ff <unknown>
#12 0x556a814a8664 <unknown>
#13 0x7fcbcdc27083 __libc_start_main
