In [None]:
!pip install pdfplumber pytesseract Pillow tensorflow nltk pandas numpy scikit-learn
!sudo apt install -y tesseract-ocr

In [None]:
!apt install tesseract-ocr -y

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
!ls /content/drive/My\ Drive/

In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import pdfplumber
import pytesseract
from PIL import Image
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, SpatialDropout1D, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

# NLTK Download


nltk.download('stopwords')

# Load Dataset


print("Loading dataset...")
data = pd.read_csv('/content/drive/My Drive/job_descriptions Short Form.csv')
print("Dataset loaded successfully!")

# Function to extract text from PDF


def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text.strip()

# Function to extract text from images


def extract_text_from_image(image_file):
    image = Image.open(image_file)
    text = pytesseract.image_to_string(image)
    return text.strip()

# Text Preprocessing Function


def preprocess_text(text):
    if pd.isna(text):
        return ""
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    words = text.split()
    stop_words = set(stopwords.words('english'))
    stemmer = PorterStemmer()
    words = [stemmer.stem(word) for word in words if word not in stop_words]
    return ' '.join(words)

# Apply Preprocessing


data['job_description'] = data['Job Description'].apply(preprocess_text)
data['skills'] = data['skills'].apply(preprocess_text)
data['experience'] = data['Experience'].apply(preprocess_text)
data['qualifications'] = data['Qualifications'].apply(preprocess_text)
data['work_type'] = data['Work Type'].apply(preprocess_text)
data['preference'] = data['Preference'].apply(preprocess_text)
data['country'] = data['Country'].apply(preprocess_text)

data['text_features'] = data['job_description'] + ' ' + data['skills'] + ' ' + data['experience'] + ' ' + data['qualifications'] + ' ' + data['work_type'] + ' ' + data['preference'] + ' ' + data['country']

# Tokenization


tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['text_features'])
vocab_size = len(tokenizer.word_index) + 1

X = tokenizer.texts_to_sequences(data['text_features'])
X = pad_sequences(X, padding='post', maxlen=200)

# Encode Job Titles


label_encoder = LabelEncoder()
y = label_encoder.fit_transform(data['Job Title'])

# Compute Class Weights


class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Train-Test Split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build Model


model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=100, input_length=200),
    SpatialDropout1D(0.2),
    Bidirectional(LSTM(128, dropout=0.2, recurrent_dropout=0.2)),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(y)), activation='softmax')
])

# Compile Model


model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train Model


history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), class_weight=class_weight_dict, verbose=2)

# Save Matching Resumes


def save_matching_resumes(resume_text, file_name):
    with open(file_name, 'a') as file:
        file.write(resume_text + '\n---\n')

# Load and process multiple resumes


resume_files = []
processed_resumes = []

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Loading dataset...
Dataset loaded successfully!
Epoch 1/10




634/634 - 921s - 1s/step - accuracy: 0.3909 - loss: 2.3909 - val_accuracy: 0.9756 - val_loss: 0.2181
Epoch 2/10
634/634 - 918s - 1s/step - accuracy: 0.8981 - loss: 0.3583 - val_accuracy: 1.0000 - val_loss: 0.0097
Epoch 3/10
634/634 - 917s - 1s/step - accuracy: 0.9713 - loss: 0.1165 - val_accuracy: 1.0000 - val_loss: 0.0016
Epoch 4/10
634/634 - 914s - 1s/step - accuracy: 0.9850 - loss: 0.0642 - val_accuracy: 1.0000 - val_loss: 8.3236e-04
Epoch 5/10
634/634 - 912s - 1s/step - accuracy: 0.9895 - loss: 0.0449 - val_accuracy: 1.0000 - val_loss: 5.2622e-04
Epoch 6/10
634/634 - 911s - 1s/step - accuracy: 0.9917 - loss: 0.0312 - val_accuracy: 1.0000 - val_loss: 1.2154e-04
Epoch 7/10
634/634 - 921s - 1s/step - accuracy: 0.9934 - loss: 0.0267 - val_accuracy: 1.0000 - val_loss: 9.8550e-05
Epoch 8/10
634/634 - 933s - 1s/step - accuracy: 0.9946 - loss: 0.0214 - val_accuracy: 1.0000 - val_loss: 5.7908e-05
Epoch 9/10
634/634 - 913s - 1s/step - accuracy: 0.9931 - loss: 0.0256 - val_accuracy: 1.0000 - 

In [None]:
!pip install streamlit pyngrok pandas numpy tensorflow scikit-learn nltk

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
from pyngrok import ngrok

# Ngrok ka authentication token replace karein apne khud ke token se
!ngrok authtoken 2syOYmEY22b8ICyj6vDwyHAveII_6fRqGLJjdtsjTK2BoYSNB

In [None]:
!pip install --upgrade streamlit

In [None]:
%%writefile UsmanResume.py

import streamlit as st
from PIL import Image
import pdfplumber
import pytesseract
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Page Configuration


st.set_page_config(page_title='AI Resume Screening', layout='wide')

# Custom CSS for Styling


st.markdown('''<style>
body {background-color: #f5f5f5;}
.main {padding: 2rem 5rem;}
.header {font-size: 2.5rem; color: #333333; font-weight: 700; text-align: center; margin-bottom: 2rem;}
.card {background: white; border-radius: 15px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); padding: 2rem; margin-bottom: 2rem;}
.card h3 {color: #2c3e50;}
.button {background-color: #3498db; color: white; border: none; padding: 0.5rem 1rem; border-radius: 5px; cursor: pointer;}
.button:hover {background-color: #2980b9;}
</style>''', unsafe_allow_html=True)

# Load Dataset


@st.cache_data
def load_data():
    data = pd.read_csv('/content/drive/My Drive/job_descriptions Short Form.csv')
    return data

# Sidebar Navigation


st.sidebar.title("Usman-AI Resume Screening System")
st.sidebar.markdown("---")

data = load_data()

# All Jobs Section


st.sidebar.header("All Job Titles")
all_jobs = data['Job Title'].unique()
st.sidebar.write(all_jobs)
st.sidebar.markdown("---")

# All Skills Section


st.sidebar.header("All Skills")
all_skills = data['skills'].str.split(',').explode().unique()
st.sidebar.write(all_skills)
st.sidebar.markdown("---")

# All Qualifications Section


st.sidebar.header("All Qualifications")
all_qualifications = data['Qualifications'].str.split(',').explode().unique()
st.sidebar.write(all_qualifications)
st.sidebar.markdown("---")

# Job Criteria Input


st.sidebar.header("Job Criteria")
job_title = st.sidebar.text_input("Job Title", "All Jobs")
skills_required = st.sidebar.text_area("Required Skills", "All Skills")
experience_required = st.sidebar.text_input("Minimum Experience", "2 Years")
qualifications_required = st.sidebar.text_area("Qualifications", "All Qualifications")
st.sidebar.markdown("---")

# File Upload


st.sidebar.header("Upload Resumes")
uploaded_files = st.sidebar.file_uploader("Upload resumes (PDF/Images)", type=["pdf", "jpg", "png"], accept_multiple_files=True)
st.sidebar.markdown("---")

# Main Content


st.markdown("<div class='header'>Usman-AI-Powered Resume Screening System</div>", unsafe_allow_html=True)
st.markdown("Automatically screen resumes based on job criteria!")
st.markdown("---")

# Extract Text Functions


def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text.strip()

def extract_text_from_image(image_file):
    image = Image.open(image_file)
    text = pytesseract.image_to_string(image)
    return text.strip()

# Process Resumes


processed_resumes = []
resume_files = []
if uploaded_files:
    for uploaded_file in uploaded_files:
        file_extension = uploaded_file.name.split(".")[-1].lower()
        if file_extension == "pdf":
            text = extract_text_from_pdf(uploaded_file)
        elif file_extension in ["jpg", "png"]:
            text = extract_text_from_image(uploaded_file)
        else:
            continue
        processed_resumes.append(text)
        resume_files.append(uploaded_file.name)

# Matching resumes with job criteria


if st.sidebar.button("Process Resumes") and processed_resumes:
    job_criteria = f"{job_title} {skills_required} {experience_required} {qualifications_required}"
    vectorizer = TfidfVectorizer()
    all_texts = [job_criteria] + processed_resumes
    tfidf_matrix = vectorizer.fit_transform(all_texts)
    eligible_resumes = []

    for i in range(1, len(processed_resumes) + 1):
        similarity = cosine_similarity(tfidf_matrix[0], tfidf_matrix[i])[0][0] * 100
        if similarity >= 70:
            eligible_resumes.append(resume_files[i - 1])

    # Display Results


    st.markdown("<div class='card'><h3>Eligible Resumes</h3>", unsafe_allow_html=True)
    if eligible_resumes:
        for resume in eligible_resumes:
            st.markdown(f"<p>{resume}</p>", unsafe_allow_html=True)
    else:
        st.markdown("<p>No resumes matched the criteria.</p>", unsafe_allow_html=True)
    st.markdown("</div>", unsafe_allow_html=True)

In [None]:
!ls


In [None]:
!curl https://loca.lt/mytunnelpassword


In [None]:
from tensorflow.keras.models import Sequential

model = Sequential()  # Apna actual model yahan define karo
model.save("job_description_model.keras")

In [None]:
!streamlit run UsmanResume.py & npx localtunnel --port 8501