In [1]:
!pip install pdfplumber langdetect sentence-transformers transformers torch numpy accelerate


Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-

In [2]:
# ============================
# 1. Gerekli Kütüphaneler
# ============================
import pdfplumber
from langdetect import detect
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import torch
import numpy as np

# ============================
# 2. Anahtar Kelimeler ve Ağırlıklar
# ============================
criteria_labels = {
    "iş_deneyimi": ["İş Deneyimi", "Work Experience", "Career History", "Experience"],
    "egitim": ["Eğitim", "Education", "Academic Background", "University"],
    "teknik_beceriler": ["Teknik Beceriler", "Technical Skills", "Skills"],
    "ozet": ["Özet", "Hakkımda", "Summary", "Profile", "Objective"],
    "liderlik": ["Liderlik", "Leadership", "Organization Experience"],
    "sertifikalar": ["Sertifikalar", "Certificates", "Certifications", "Courses"],
    "iletisim": ["İletişim", "Contact", "Phone", "Email"],
    "portfolyo": ["Portfolyo", "Portfolio", "GitHub", "Website"],
    "diller": ["Diller", "Languages"],
    "referanslar": ["Referanslar", "References"]
}

criteria_weights = {
    "iş_deneyimi": 25,
    "egitim": 15,
    "teknik_beceriler": 15,
    "ozet": 10,
    "liderlik": 10,
    "sertifikalar": 10,
    "iletisim": 5,
    "portfolyo": 5,
    "diller": 3,
    "referanslar": 2
}

# ============================
# 3. Modellerin Yüklenmesi
# ============================
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')  # Hızlı ve çok dilli model
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")

# ============================
# 4. PDF Metin Çıkarma
# ============================
def extract_text_from_pdf(file_path):
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

# ============================
# 5. Hibrit Analiz Fonksiyonu
# ============================
def analyze_cv(text):
    lang = detect(text)
    text_lower = text.lower()
    total_score = 0
    strengths, weaknesses = [], []

    for section, labels in criteria_labels.items():
        max_score = 0

        # --- 5.1 Keyword Kontrolü ---
        for kw in labels:
            if kw.lower() in text_lower:
                max_score = 1.0
                break

        # --- 5.2 Semantik Benzerlik ---
        if max_score < 1.0:
            embeddings1 = semantic_model.encode(text, convert_to_tensor=True)
            embeddings2 = semantic_model.encode(" ".join(labels), convert_to_tensor=True)
            sim = float(util.cos_sim(embeddings1, embeddings2))
            if sim > 0.4:
                max_score = max(max_score, sim)

        # --- 5.3 Zero-shot Kontrol ---
        if max_score < 0.4:
            try:
                result = classifier(text[:512], candidate_labels=labels, multi_label=True)
                zs_score = max(result["scores"]) if "scores" in result else 0
                max_score = max(max_score, zs_score)
            except:
                pass

        # --- 5.4 Puanlama ---
        section_score = round(criteria_weights[section] * max_score, 2)
        total_score += section_score

        if max_score >= 0.6:
            strengths.append(f"{section.replace('_', ' ').title()} bölümü iyi (benzerlik: {max_score:.2f}).")
        elif max_score >= 0.4:
            weaknesses.append(f"{section.replace('_', ' ').title()} bölümü zayıf (benzerlik: {max_score:.2f}).")
        else:
            weaknesses.append(f"{section.replace('_', ' ').title()} bölümü eksik.")

    total_score = round(total_score, 2)

    return {
        "language": lang,
        "total_score": total_score,
        "strengths": strengths,
        "weaknesses": weaknesses
    }

# ============================
# 6. Manuel PDF Yükleme
# ============================
def manual_pdf_upload():
    try:
        from google.colab import files
        print("Lütfen analiz etmek istediğiniz PDF dosyasını seçin:")
        uploaded = files.upload()
        pdf_path = list(uploaded.keys())[0]
        print(f"'{pdf_path}' başarıyla yüklendi.")
        return pdf_path
    except ImportError:
        pdf_path = input("PDF dosyasının tam yolunu girin: ")
        return pdf_path

# ============================
# 7. Ana Program
# ============================
# Bu kısım Google Colab'de otomatik çalışmaz, hücre olarak manuel çalıştır:
pdf_path = manual_pdf_upload()
cv_text = extract_text_from_pdf(pdf_path)
report = analyze_cv(cv_text)

print(f"\nCV Dili: {report['language']}")
print(f"Toplam Puan: {report['total_score']}/100\n")

print("Güçlü Yönler:")
for s in report['strengths']:
    print(f" - {s}")

print("\nZayıf Yönler:")
for w in report['weaknesses']:
    print(f" - {w}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spm.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.3M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

Device set to use cpu


Lütfen analiz etmek istediğiniz PDF dosyasını seçin:


Saving Curriculum Vitae - Meriç Özcan.pdf to Curriculum Vitae - Meriç Özcan.pdf
'Curriculum Vitae - Meriç Özcan.pdf' başarıyla yüklendi.

CV Dili: en
Toplam Puan: 88.74/100

Güçlü Yönler:
 - Iş Deneyimi bölümü iyi (benzerlik: 1.00).
 - Egitim bölümü iyi (benzerlik: 1.00).
 - Teknik Beceriler bölümü iyi (benzerlik: 1.00).
 - Liderlik bölümü iyi (benzerlik: 1.00).
 - Iletisim bölümü iyi (benzerlik: 1.00).
 - Portfolyo bölümü iyi (benzerlik: 1.00).
 - Diller bölümü iyi (benzerlik: 1.00).
 - Referanslar bölümü iyi (benzerlik: 1.00).

Zayıf Yönler:
 - Ozet bölümü zayıf (benzerlik: 0.48).
 - Sertifikalar bölümü eksik.
