<a href="https://colab.research.google.com/github/ualahari/ai-resume-analyzer/blob/main/ngrok.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =========================================================================
# STEP 0: Install dependencies & Setup
# =========================================================================
# We install all required libraries for the project
!pip install flask pyngrok pandas pdfplumber scikit-learn spacy

# Download the SpaCy English model for NLP preprocessing
!python -m spacy download en_core_web_sm

# =========================================================================
# STEP 1: Imports and Initialization
# =========================================================================
import os
import pandas as pd
import pdfplumber  # For PDF extraction
import spacy       # For NLP preprocessing
from flask import Flask, request, render_template, send_file
from pyngrok import ngrok
from sklearn.feature_extraction.text import TfidfVectorizer  # For vectorization
from sklearn.metrics.pairwise import cosine_similarity       # For scoring

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
UPLOAD_FOLDER = "resumes"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)


# =========================================================================
# STEP 2: Core Functions (PDF Extraction & NLP Preprocessing)
# =========================================================================

def extract_text_from_pdf(pdf_path):
    """Extracts text content from a PDF file using pdfplumber."""
    text = ""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + " "
    except Exception as e:
        print(f"Error reading PDF {pdf_path}: {e}")
        return ""
    return text

def preprocess_text(text):
    """
    Performs SpaCy preprocessing: lowercase, lemmatization,
    and removes stop words & non-alphabetic tokens.
    """
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens)


# =========================================================================
# STEP 3: Scoring & Ranking Algorithm
# =========================================================================

def rank_resumes(resume_texts, job_description):
    """
    Ranks resumes based on cosine similarity to the job description using TF-IDF.
    """
    # Preprocess all texts
    preprocessed_resumes = [preprocess_text(text) for text in resume_texts]
    preprocessed_job = preprocess_text(job_description)

    # TF-IDF vectorization:
    vectorizer = TfidfVectorizer()
    corpus = [preprocessed_job] + preprocessed_resumes
    tfidf_matrix = vectorizer.fit_transform(corpus)

    # Separate the vectors
    job_vec = tfidf_matrix[0]
    resume_vecs = tfidf_matrix[1:]

    # Calculate cosine similarity
    scores = cosine_similarity(resume_vecs, job_vec).flatten()

    # Get the indices that would sort the scores in descending order
    ranked_indices = scores.argsort()[::-1]
    ranked_scores = scores[ranked_indices]

    return ranked_indices, ranked_scores


# =========================================================================
# STEP 4: Flask App Routes (The Web UI)
# =========================================================================
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
def index():
    if request.method == "POST":
        job_description = request.form.get("job_description", "")
        uploaded_files = request.files.getlist("resumes")

        # --- Validation ---
        if not job_description or not uploaded_files:
            return render_template("index.html", error="Please provide a job description and upload at least one PDF resume.")

        resume_texts = []
        resume_names = []

        # --- File Processing ---
        for file in uploaded_files:
            if file.filename and file.filename.endswith('.pdf'):
                file_path = os.path.join(UPLOAD_FOLDER, file.filename)
                file.save(file_path)
                text = extract_text_from_pdf(file_path)
                if text:
                    resume_texts.append(text)
                    resume_names.append(file.filename)

        if not resume_texts:
            return render_template("index.html", error="No valid PDF text could be extracted. Please check your files.")

        # --- Ranking ---
        ranked_indices, ranked_scores = rank_resumes(resume_texts, job_description)

        # --- Report Generation (MODIFIED FOR PERCENTAGE) ---
        df = pd.DataFrame({
            "Rank": range(1, len(ranked_indices) + 1),
            "Resume": [resume_names[i] for i in ranked_indices],
            # Convert score to percentage string, e.g., "87.52%"
            "Score (Percentage)": [f"{round(score * 100, 2)}%" for score in ranked_scores]
        })
        df.to_csv("resume_ranking.csv", index=False)

        # Pass results back to the UI
        return render_template("index.html", tables=[df.to_html(classes='data', index=False)], download_link="/download")

    # --- GET Request (Initial Page Load) ---
    return render_template("index.html", error=None)

@app.route("/download")
def download():
    """Provides the CSV download for HR."""
    return send_file("resume_ranking.csv", as_attachment=True)


# =========================================================================
# STEP 5: HTML Template
# =========================================================================
# We create the 'templates' directory and write the 'index.html' file
os.makedirs("templates", exist_ok=True)
with open("templates/index.html", "w") as f:
    f.write("""
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>AI-Powered Resume Ranker</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
            margin: 20px;
            background-color: #f4f4f9;
            color: #333;
        }
        .container { max-width: 800px; margin: 0 auto; padding: 20px; }
        h1 {
            color: #2c3e50;
            border-bottom: 2px solid #3498db;
            padding-bottom: 10px;
            text-align: center;
        }
        h2 { color: #34495e; margin-top: 30px; }
        form {
            background: #ffffff;
            padding: 25px;
            border-radius: 8px;
            box-shadow: 0 4px 12px rgba(0,0,0,0.05);
        }
        label {
            display: block;
            margin-bottom: 8px;
            font-weight: bold;
        }
        textarea {
            width: 100%;
            padding: 10px;
            border: 1px solid #ccc;
            border-radius: 4px;
            box-sizing: border-box;
            margin-bottom: 15px;
            font-size: 14px;
        }
        input[type="file"] {
            display: block;
            margin-bottom: 15px;
        }
        input[type="submit"] {
            background-color: #3498db;
            color: white;
            padding: 12px 20px;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
            transition: background-color 0.3s;
        }
        input[type="submit"]:hover { background-color: #2980b9; }
        .data {
            width: 100%;
            border-collapse: collapse;
            margin-top: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.05);
        }
        .data th, .data td {
            border: 1px solid #ddd;
            padding: 10px;
            text-align: left;
        }
        .data th { background-color: #ecf0f1; }
        .download-link {
            color: #3498db;
            text-decoration: none;
            font-weight: bold;
            margin-top: 15px;
            display: inline-block;
            font-size: 16px;
        }
        .download-link:hover { text-decoration: underline; }
        .error {
            color: #e74c3c;
            background: #fbecec;
            padding: 10px;
            border: 1px solid #e74c3c;
            border-radius: 4px;
            margin-bottom: 15px;
        }
    </style>
  </head>
  <body>
    <div class="container">
        <h1>AI-Powered Resume Ranker 🤖</h1>
        <form method="POST" enctype="multipart/form-data">
          {% if error %}
            <p class="error"><strong>Error:</strong> {{ error }}</p>
          {% endif %}

          <label for="job_description">Job Description:</label>
          <textarea id="job_description" name="job_description" rows="8"
                    placeholder="Paste the detailed job description here..."></textarea>

          <label for="resumes">Upload Resumes (PDF):</label>
          <input id="resumes" type="file" name="resumes" multiple accept=".pdf">

          <input type="submit" value="Rank Resumes">
        </form>

        {% if tables %}
            <h2>Ranking Results</h2>
            {% for table in tables %}
                {{ table|safe }}
            {% endfor %}
            <a href="{{ download_link }}" class="download-link">Download Ranking as CSV</a>
        {% endif %}
    </div>
  </body>
</html>
""")


# =========================================================================
# STEP 6: Start ngrok tunnel and Run Flask App
# =========================================================================

# 🚨 CRITICAL: Paste your ngrok auth token here
# Get your token from: https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = "34dYSTG02OpDIFCXDLrV1WGyVXs_bGpXSzPYQnnP3LRAY8RC"

try:
    # 1. Set auth token
    if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN_HERE":
         raise ValueError("Please replace 'YOUR_NGROK_AUTH_TOKEN_HERE' with your actual ngrok token.")

    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

    # 2. Terminate any existing tunnels (to prevent the "already online" error)
    ngrok.kill()

    # 3. Start the new tunnel
    # We set hostname="" to force a new, random URL every time.
    public_url = ngrok.connect(addr=5000, hostname="").public_url
    print(f"🎉 Your Flask app is live! Open this URL in your browser:\n{public_url}")

    # 4. Run the Flask app
    app.run(port=5000, use_reloader=False)

except ValueError as e:
    print(f"\n❌ SETUP ERROR: {e}")
    print("Please follow the instructions in Step 6 to set your NGROK_AUTH_TOKEN.")
except Exception as e:
    print(f"\n❌ RUNTIME ERROR: Failed to start ngrok or Flask.")
    print("Please restart your Colab runtime (Runtime > Restart runtime) and run the cell again.")
    print(f"Details: {e}")

Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-5.0.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.9/67.9 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Downloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer_six-20250506-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━

 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [27/Oct/2025 08:38:54] "GET / HTTP/1.1" 200 -


In [None]:
# =========================================================================
# STEP 0: Install dependencies & Setup
# =========================================================================
# We install all required libraries for the project
!pip install flask pyngrok pandas pdfplumber scikit-learn spacy

# Download the SpaCy English model for NLP preprocessing
!python -m spacy download en_core_web_sm

# =========================================================================
# STEP 1: Imports and Initialization
# =========================================================================
import os
import pandas as pd
import pdfplumber  # For PDF extraction
import spacy       # For NLP preprocessing
from flask import Flask, request, render_template, send_file
from pyngrok import ngrok
from sklearn.feature_extraction.text import TfidfVectorizer  # For vectorization
from sklearn.metrics.pairwise import cosine_similarity       # For scoring

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
UPLOAD_FOLDER = "resumes"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)


# =========================================================================
# STEP 2: Core Functions (PDF Extraction & NLP Preprocessing)
# =========================================================================

def extract_text_from_pdf(pdf_path):
    """Extracts text content from a PDF file using pdfplumber."""
    text = ""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text + " "
    except Exception as e:
        print(f"Error reading PDF {pdf_path}: {e}")
        return ""
    return text

def preprocess_text(text):
    """
    Performs SpaCy preprocessing: lowercase, lemmatization,
    and removes stop words & non-alphabetic tokens.
    """
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
    return " ".join(tokens)


# =========================================================================
# STEP 3: Scoring & Ranking Algorithm
# =========================================================================

def rank_resumes(resume_texts, job_description):
    """
    Ranks resumes based on cosine similarity to the job description using TF-IDF.
    """
    # Preprocess all texts
    preprocessed_resumes = [preprocess_text(text) for text in resume_texts]
    preprocessed_job = preprocess_text(job_description)

    # TF-IDF vectorization:
    vectorizer = TfidfVectorizer()
    corpus = [preprocessed_job] + preprocessed_resumes
    tfidf_matrix = vectorizer.fit_transform(corpus)

    # Separate the vectors
    job_vec = tfidf_matrix[0]
    resume_vecs = tfidf_matrix[1:]

    # Calculate cosine similarity
    # This score will be between 0.0 (no match) and 1.0 (perfect match)
    scores = cosine_similarity(resume_vecs, job_vec).flatten()

    # Get the indices that would sort the scores in descending order
    ranked_indices = scores.argsort()[::-1]
    ranked_scores = scores[ranked_indices]

    return ranked_indices, ranked_scores


# =========================================================================
# STEP 4: Flask App Routes (The Web UI)
# =========================================================================
app = Flask(__name__)

@app.route("/", methods=["GET", "POST"])
def index():
    if request.method == "POST":
        job_description = request.form.get("job_description", "")
        uploaded_files = request.files.getlist("resumes")

        # --- Validation ---
        if not job_description or not uploaded_files:
            return render_template("index.html", error="Please provide a job description and upload at least one PDF resume.")

        resume_texts = []
        resume_names = []

        # --- File Processing ---
        for file in uploaded_files:
            if file.filename and file.filename.endswith('.pdf'):
                file_path = os.path.join(UPLOAD_FOLDER, file.filename)
                file.save(file_path)
                text = extract_text_from_pdf(file_path)
                if text:
                    resume_texts.append(text)
                    resume_names.append(file.filename)

        if not resume_texts:
            return render_template("index.html", error="No valid PDF text could be extracted. Please check your files.")

        # --- Ranking ---
        ranked_indices, ranked_scores = rank_resumes(resume_texts, job_description)

        # --- Report Generation (Converts 0.0-1.0 score to 0-100 percentage) ---
        df = pd.DataFrame({
            "Rank": range(1, len(ranked_indices) + 1),
            "Resume": [resume_names[i] for i in ranked_indices],
            # Convert score (e.g., 0.8752) to percentage string (e.g., "87.52%")
            "Score (Percentage)": [f"{round(score * 100, 2)}%" for score in ranked_scores]
        })
        df.to_csv("resume_ranking.csv", index=False)

        # Pass results back to the UI
        return render_template("index.html", tables=[df.to_html(classes='data', index=False)], download_link="/download")

    # --- GET Request (Initial Page Load) ---
    return render_template("index.html", error=None)

@app.route("/download")
def download():
    """Provides the CSV download for HR."""
    return send_file("resume_ranking.csv", as_attachment=True)


# =========================================================================
# STEP 5: HTML Template
# =========================================================================
# We create the 'templates' directory and write the 'index.html' file
os.makedirs("templates", exist_ok=True)
with open("templates/index.html", "w") as f:
    f.write("""
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>AI-Powered Resume Ranker</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
            margin: 20px;
            background-color: #f4f4f9;
            color: #333;
        }
        .container { max-width: 800px; margin: 0 auto; padding: 20px; }
        h1 {
            color: #2c3e50;
            border-bottom: 2px solid #3498db;
            padding-bottom: 10px;
            text-align: center;
        }
        h2 { color: #34495e; margin-top: 30px; }
        form {
            background: #ffffff;
            padding: 25px;
            border-radius: 8px;
            box-shadow: 0 4px 12px rgba(0,0,0,0.05);
        }
        label {
            display: block;
            margin-bottom: 8px;
            font-weight: bold;
        }
        textarea {
            width: 100%;
            padding: 10px;
            border: 1px solid #ccc;
            border-radius: 4px;
            box-sizing: border-box;
            margin-bottom: 15px;
            font-size: 14px;
        }
        input[type="file"] {
            display: block;
            margin-bottom: 15px;
        }
        input[type="submit"] {
            background-color: #3498db;
            color: white;
            padding: 12px 20px;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
            transition: background-color 0.3s;
        }
        input[type="submit"]:hover { background-color: #2980b9; }
        .data {
            width: 100%;
            border-collapse: collapse;
            margin-top: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.05);
        }
        .data th, .data td {
            border: 1px solid #ddd;
            padding: 10px;
            text-align: left;
        }
        .data th { background-color: #ecf0f1; }
        .download-link {
            color: #3498db;
            text-decoration: none;
            font-weight: bold;
            margin-top: 15px;
            display: inline-block;
            font-size: 16px;
        }
        .download-link:hover { text-decoration: underline; }
        .error {
            color: #e74c3c;
            background: #fbecec;
            padding: 10px;
            border: 1px solid #e74c3c;
            border-radius: 4px;
            margin-bottom: 15px;
        }
    </style>
  </head>
  <body>
    <div class="container">
        <h1>AI-Powered Resume Ranker 🤖</h1>
        <form method="POST" enctype="multipart/form-data">
          {% if error %}
            <p class="error"><strong>Error:</strong> {{ error }}</p>
          {% endif %}

          <label for="job_description">Job Description:</label>
          <textarea id="job_description" name="job_description" rows="8"
                    placeholder="Paste the detailed job description here..."></textarea>

          <label for="resumes">Upload Resumes (PDF):</label>
          <input id="resumes" type="file" name="resumes" multiple accept=".pdf">

          <input type="submit" value="Rank Resumes">
        </form>

        {% if tables %}
            <h2>Ranking Results</h2>
            {% for table in tables %}
                {{ table|safe }}
            {% endfor %}
            <a href="{{ download_link }}" class="download-link">Download Ranking as CSV</a>
        {% endif %}
    </div>
  </body>
</html>
""")


# =========================================================================
# STEP 6: Start ngrok tunnel and Run Flask App
# =========================================================================

# 🚨 CRITICAL: Paste your ngrok auth token here
# Get your token from: https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = "34dYSTG02OpDIFCXDLrV1WGyVXs_bGpXSzPYQnnP3LRAY8RC"

try:
    # 1. Set auth token
    if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN_HERE":
        raise ValueError("Please replace 'YOUR_NGROK_AUTH_TOKEN_HERE' with your actual ngrok token.")

    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

    # 2. Terminate any existing tunnels (to prevent the "already online" error)
    ngrok.kill()

    # 3. Start the new tunnel
    # We set hostname="" to force a new, random URL every time.
    public_url = ngrok.connect(addr=5000, hostname="").public_url
    print(f"🎉 Your Flask app is live! Open this URL in your browser:\n{public_url}")

    # 4. Run the Flask app
    app.run(port=5000, use_reloader=False)

except ValueError as e:
    print(f"\n❌ SETUP ERROR: {e}")
    print("Please follow the instructions in Step 6 to set your NGROK_AUTH_TOKEN.")
except Exception as e:
    print(f"\n❌ RUNTIME ERROR: Failed to start ngrok or Flask.")
    print("Please restart your Colab runtime (Runtime > Restart runtime) and run the cell again.")
    print(f"Details: {e}")

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m109.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
🎉 Your Flask app is live! Open this URL in your browser:
https://peritrichate-unfabling-addie.ngrok-free.dev
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [27/Oct/2025 08:41:39] "GET / HTTP/1.1" 200 -
