In [5]:
!pip install pdfplumber langdetect sentence-transformers transformers sentencepiece -q

In [21]:
# ============================
# 1. Gerekli Kütüphaneler
# ============================
# Colab için:
# !pip install pdfplumber langdetect sentence-transformers transformers ipywidgets

import re
import pdfplumber
from langdetect import detect
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display, Markdown, HTML

# ============================
# 2. Anahtar Kelimeler ve Ağırlıklar
# ============================
criteria_labels = {
    "iş_deneyimi": ["İş Deneyimi", "Work Experience", "Career History", "Experience"],
    "egitim": ["Eğitim", "Education", "Academic Background", "University"],
    "teknik_beceriler": ["Teknik Beceriler", "Technical Skills", "Skills"],
    "ozet": ["Özet", "Hakkımda", "Summary", "Profile", "Objective"],
    "liderlik": ["Liderlik", "Leadership", "Organization Experience"],
    "sertifikalar": ["Sertifikalar", "Certificates", "Certifications", "Courses"],
    "iletisim": ["İletişim", "Contact", "Phone", "Email"],
    "portfolyo": ["Portfolyo", "Portfolio", "GitHub", "Website"],
    "diller": ["Diller", "Languages"],
    "referanslar": ["Referanslar", "References"]
}

criteria_weights = {
    "iş_deneyimi": 25,
    "egitim": 15,
    "teknik_beceriler": 15,
    "ozet": 10,
    "liderlik": 10,
    "sertifikalar": 10,
    "iletisim": 5,
    "portfolyo": 5,
    "diller": 3,
    "referanslar": 2
}

# ============================
# 3. Modellerin Yüklenmesi
# ============================
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')  # Hızlı ve çok dilli model
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")

# ============================
# 4. PDF Metin Çıkarma
# ============================
def extract_text_from_pdf(file_path):
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

# ============================
# 5. Regex ile Bölüm Ayıklama
# ============================
def extract_sections(text):
    sections = {}
    lines = text.split("\n")
    current_section = "Genel"
    sections[current_section] = ""

    pattern = re.compile(r"(?i)(iş deneyimi|work experience|eğitim|education|skills|teknik beceriler|summary|özgeçmiş|certificates|diller|languages|references|portfolyo|profile|objective|liderlik|sertifikalar|iletisim)")

    for line in lines:
        if pattern.search(line):
            current_section = line.strip()
            sections[current_section] = ""
        else:
            sections[current_section] += line + " "

    return sections

# ============================
# 6. BERT Tabanlı NER (İsim Varlık Tanıma)
# ============================
def ner_entities(text):
    entities = ner_pipeline(text[:1000])  # uzun metinlerde kısaltılabilir
    extracted = {"PER": [], "ORG": [], "LOC": [], "DATE": []}

    for ent in entities:
        label = ent["entity_group"]
        if label in extracted:
            extracted[label].append(ent["word"])

    return extracted

# ============================
# 7. Hibrit Analiz Fonksiyonu
# ============================
def analyze_cv(text):
    lang = detect(text)
    text_lower = text.lower()
    total_score = 0
    strengths, weaknesses = [], []

    for section, labels in criteria_labels.items():
        max_score = 0

        # --- 7.1 Keyword Kontrolü ---
        for kw in labels:
            if kw.lower() in text_lower:
                max_score = 1.0
                break

        # --- 7.2 Semantik Benzerlik ---
        if max_score < 1.0:
            embeddings1 = semantic_model.encode(text, convert_to_tensor=True)
            embeddings2 = semantic_model.encode(" ".join(labels), convert_to_tensor=True)
            sim = float(util.cos_sim(embeddings1, embeddings2))
            if sim > 0.4:
                max_score = max(max_score, sim)

        # --- 7.3 Zero-shot Kontrol ---
        if max_score < 0.4:
            try:
                result = classifier(text[:512], candidate_labels=labels, multi_label=True)
                zs_score = max(result["scores"]) if "scores" in result else 0
                max_score = max(max_score, zs_score)
            except:
                pass

        # --- 7.4 Puanlama ---
        section_score = round(criteria_weights[section] * max_score, 2)
        total_score += section_score

        if max_score >= 0.6:
            strengths.append(f"{section.replace('_', ' ').title()} bölümü iyi (benzerlik: {max_score:.2f}).")
        elif max_score >= 0.4:
            weaknesses.append(f"{section.replace('_', ' ').title()} bölümü zayıf (benzerlik: {max_score:.2f}).")
        else:
            weaknesses.append(f"{section.replace('_', ' ').title()} bölümü eksik.")

    total_score = round(total_score, 2)

    return {
        "language": lang,
        "total_score": total_score,
        "strengths": strengths,
        "weaknesses": weaknesses
    }

def analyze_cv_with_sections(text):
    base_report = analyze_cv(text)
    base_report["sections"] = extract_sections(text)  # Bu artık kullanılmayabilir, ama dilerseniz tutabiliriz.
    base_report["ner"] = ner_entities(text)
    return base_report

# ============================
# 8. Manuel PDF Yükleme (Colab uyumlu)
# ============================
def manual_pdf_upload():
    try:
        from google.colab import files
        print("CV PDF dosyanızı yükleyin:")
        uploaded = files.upload()
        pdf_path = list(uploaded.keys())[0]
        print(f"'{pdf_path}' başarıyla yüklendi.")
        return pdf_path
    except ImportError:
        pdf_path = input("PDF dosyasının tam yolunu girin: ")
        return pdf_path

# ============================
# 9. Rapor Görselleştirme Fonksiyonları
# ============================
def color_for_score(score):
    if score >= 75:
        return "green"
    elif score >= 50:
        return "orange"
    else:
        return "red"

def display_report(report):
    display(Markdown(f"## 📊 CV Analiz Raporu"))
    display(Markdown(f"**Dil:** {report['language']}"))
    display(Markdown("---"))

    score = report['total_score']
    color = color_for_score(score)
    score_html = f"""
    <div style='font-size:48px; font-weight:bold; color:{color}; text-align:center;'>
        {score} / 100
    </div>
    """
    display(HTML(score_html))

    strengths_md = "### ✅ Güçlü Yönler\n"
    if report['strengths']:
        for s in report['strengths']:
            strengths_md += f"- {s}\n"
    else:
        strengths_md += "- Belirgin güçlü yön bulunamadı.\n"
    display(Markdown(strengths_md))

    weaknesses_md = "### ⚠️ Zayıf Yönler\n"
    if report['weaknesses']:
        for w in report['weaknesses']:
            weaknesses_md += f"- {w}\n"
    else:
        weaknesses_md += "- Belirgin zayıf yön bulunamadı.\n"
    display(Markdown(weaknesses_md))

    ner_md = "### 🏷️ İsim Varlıkları (NER)\n"
    ner_found = False
    for ent_type, ents in report["ner"].items():
        if ents:
            ner_found = True
            unique_ents = set(ents)
            ner_md += f"- **{ent_type}**: {', '.join(unique_ents)}\n"
    if not ner_found:
        ner_md += "- Bulunamadı.\n"
    display(Markdown(ner_md))

# ============================
# 10. Ana Program (Çalıştırıcı)
# ============================
def run_interactive_report():
    pdf_path = manual_pdf_upload()
    cv_text = extract_text_from_pdf(pdf_path)
    report = analyze_cv_with_sections(cv_text)
    display_report(report)

# Eğer direk çalıştırmak isterseniz:
# run_interactive_report()


Device set to use cpu
Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [22]:
run_interactive_report()


CV PDF dosyanızı yükleyin:


Saving Curriculum Vitae - Meriç Özcan.pdf to Curriculum Vitae - Meriç Özcan (3).pdf
'Curriculum Vitae - Meriç Özcan (3).pdf' başarıyla yüklendi.


## 📊 CV Analiz Raporu

**Dil:** en

---

### ✅ Güçlü Yönler
- Iş Deneyimi bölümü iyi (benzerlik: 1.00).
- Egitim bölümü iyi (benzerlik: 1.00).
- Teknik Beceriler bölümü iyi (benzerlik: 1.00).
- Liderlik bölümü iyi (benzerlik: 1.00).
- Iletisim bölümü iyi (benzerlik: 1.00).
- Portfolyo bölümü iyi (benzerlik: 1.00).
- Diller bölümü iyi (benzerlik: 1.00).
- Referanslar bölümü iyi (benzerlik: 1.00).


### ⚠️ Zayıf Yönler
- Ozet bölümü zayıf (benzerlik: 0.48).
- Sertifikalar bölümü eksik.


### 🏷️ İsim Varlıkları (NER)
- **ORG**: ##Z, QNB Finance Bank, Excel
- **LOC**: İstan, Turkey, ##bul
