In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install word2number pyngrok streamlit --quiet
# Install spaCy transformer pipeline and model
!pip install spacy-transformers
!python -m spacy download en_core_web_trf

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m72.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m94.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for word2number (setup.py) ... [?25l[?25hdone
Collecting spacy-transformers
  Downloading spacy_transformers-1.3.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Collecting transformers<4.50.0,>=3.4.0 (from spacy-transformers)
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Colle

In [None]:
%%writefile app.py
import streamlit as st
import json
import re
from word2number import w2n
from nltk.stem import PorterStemmer
from sentence_transformers import SentenceTransformer, util
import spacy

# --- Load Models and Data ---
nlp = spacy.load("/content/drive/My Drive/ResumeMatcher/models/my_spacy_model")
model = SentenceTransformer('all-MiniLM-L6-v2')

with open('/content/drive/MyDrive/ResumeMatcher/resume_extract.json') as f:
    cv = json.load(f)

nlp_ner = spacy.load("en_core_web_trf")

# --- CSS Styling ---
st.markdown("""
    <style>
    body { background-color: #F9F9F9; }
    .main { background-color: #FFFFFF; padding: 30px; border-radius: 10px; }
    h1 { color: #333366; }
    .stTextInput input { padding: 10px; border: 1px solid #CCC; border-radius: 5px; }
    .stButton button { background-color: #0066cc; color: white; border-radius: 5px; padding: 10px 20px; }
    .result-container { background-color: #FAFAFA; padding: 15px; border-radius: 8px; margin-bottom: 15px; box-shadow: 0px 1px 3px rgba(0,0,0,0.1); }
    </style>
""", unsafe_allow_html=True)

# --- Utility Functions ---
def predict_skills(text):
    doc = nlp(text)
    return list(set([ent.text.strip(".,:;/()") for ent in doc.ents if ent.label_ == "SKILL"]))

def normalize_experience(exp_str):

    exp_str = str(exp_str).lower()
    range_match = re.search(r"(\w+)[-\s]+(?:to[-\s]+)?(\w+)\s*(years|months|year)", exp_str)
    if range_match:
        num1_str, num2_str, unit = range_match.groups()
        num1 = w2n.word_to_num(num1_str) if not num1_str.isdigit() else float(num1_str)
        num2 = w2n.word_to_num(num2_str) if not num2_str.isdigit() else float(num2_str)
        avg = (num1 + num2) / 2
        return avg / 12 if unit == "months" else avg
    plus_match = re.search(r"(\w+)\+\s*(years|months)", exp_str)
    if plus_match:
        num_str, unit = plus_match.groups()
        num = w2n.word_to_num(num_str) if not num_str.isdigit() else float(num_str)
        return num / 12 if unit == "months" else num
    matches = re.findall(r"(\w+)\s*(years|months)", exp_str)
    total_years = 0
    for num_str, unit in matches:
        num = w2n.word_to_num(num_str) if not num_str.isdigit() else float(num_str)
        total_years += num / 12 if unit == "months" else num
    if total_years > 0:
        return total_years
    single_match = re.search(r"(\w+)\s*(years|months)", exp_str)
    if single_match:
        num_str, unit = single_match.groups()
        num = w2n.word_to_num(num_str) if not num_str.isdigit() else float(num_str)
        return num / 12 if unit == "months" else num
    nums = re.findall(r"\b(\d+\.?\d*|\w+)\b", exp_str)
    for num_str in nums:
        if num_str.replace('.', '').isdigit():
            return float(num_str)
        try:
            return w2n.word_to_num(num_str)
        except:
            pass
    return 0


def extract_years_experience(text):
    doc = nlp_ner(text)
    entities = [ent.text for ent in doc.ents if ent.label_ in {"DATE", "CARDINAL", "QUANTITY"}]
    return entities[0].strip() if entities else ""

def calculate_match(resume_data, job_data):
    stemmer = PorterStemmer()
    resume_skills = [skill.lower().strip() for skill in resume_data["skills"]]
    stemmed_resume = set([stemmer.stem(w) for skill in resume_skills for w in skill.split()])
    job_skills = [skill.lower().strip() for skill in job_data["required_skills"].split(",")]
    matched = 0
    for job_skill in job_skills:
        stemmed = [stemmer.stem(w) for w in job_skill.split()]
        if job_skill in resume_skills or any(s in stemmed_resume for s in stemmed):
            matched += 1
    skill_score = matched / len(job_skills) if job_skills else 0
    resume_exp = resume_data["years_experience"]
    job_exp = normalize_experience(job_data["required_experience"])
    exp_score = min(resume_exp / job_exp, 1) if job_exp > 0 else 0
    return round((skill_score * 0.7 + exp_score * 0.3) * 100, 1)

def compute_similarity(cv_text, job_text):
    emb_cv = model.encode(cv_text, convert_to_tensor=True)
    emb_job = model.encode(job_text, convert_to_tensor=True)
    return util.cos_sim(emb_cv, emb_job).item()

# --- Sidebar ---
st.sidebar.title("🔧 Job Configuration")
job_description = st.sidebar.text_area("Paste Job Description Here", height=150)

predicted_skills = predict_skills(job_description) if job_description else []
predicted_exp = extract_years_experience(job_description) if job_description else ""

required_skills_input = st.sidebar.text_input("Required Skills ",
                                              value=", ".join(predicted_skills))
required_experience_input = st.sidebar.text_input("Required Experience", value=predicted_exp)

# --- Main ---
st.title("📄 Resume Matcher")
st.markdown("Upload a job description and compare it to multiple resumes for **skills** and **experience** alignment.")

if st.button("🚀 Run Matching"):
    job = {
        "job_text": job_description,
        "required_skills": required_skills_input,
        "required_experience": required_experience_input
    }

    results = []
    for file_path, details in cv.items():
        try:
            match_score = calculate_match(details, job)
            sim_score = compute_similarity(details["cv_text"], job["job_text"])
            final_score = round(match_score * 0.75 + sim_score * 0.25 * 100, 2)
            results.append((final_score, file_path, details))
        except Exception as e:
            st.error(f"❌ Error processing {file_path}: {e}")
    results = sorted(results, key=lambda x: x[0], reverse=True)

    st.subheader("📊 Matching Results")
    st.write(f"Total CVs Processed: **{len(results)}**")

    for score, file_path, details in results:
        st.markdown(f"<div class='result-container'>", unsafe_allow_html=True)
        st.markdown(f"**📂 File:** `{file_path}`")
        st.markdown(f"**👤 Name:** {details['name']}")
        st.markdown(f"**📧 Email:** {details.get('email')}")
        st.markdown(f"**📱 Phone:** {details.get('phone')}")
        st.markdown(f"**✅ Score:** `{score}`")
        st.markdown("</div>", unsafe_allow_html=True)


Overwriting app.py


In [None]:
# Configure ngrok and launch Streamlit (run in a separate cell)

from pyngrok import ngrok

# Set your ngrok authtoken (only needs to be done once)
!ngrok config add-authtoken 2vadW0LW2TDMhFyuRHNlTl5Ofks_4rws9HHURZ3X5o6yhEEEz

# Kill any existing tunnels
ngrok.kill()

# Open a tunnel on port 8501 (default for Streamlit)
public_url = ngrok.connect(8501)
print("Streamlit app available at:", public_url)


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Streamlit app available at: NgrokTunnel: "https://0d71-34-59-117-57.ngrok-free.app" -> "http://localhost:8501"


In [None]:
!streamlit run app.py &>/dev/null&

