In [None]:
import os
import re
import torch
import pdfplumber
from langdetect import detect
from sentence_transformers import SentenceTransformer, util
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import language_tool_python

# --------------------
# Anahtar kelimeler ve ağırlıklar
criteria_labels = {
    "iş_deneyimi": ["İş Deneyimi", "Work Experience", "Career History", "Experience"],
    "egitim": ["Eğitim", "Education", "Academic Background", "University"],
    "teknik_beceriler": ["Teknik Beceriler", "Technical Skills", "Skills"],
    "ozet": ["Özet", "Hakkımda", "Summary", "Profile", "Objective"],
    "liderlik": ["Liderlik", "Leadership", "Organization Experience"],
    "sertifikalar": ["Sertifikalar", "Certificates", "Certifications", "Courses"],
    "iletisim": ["İletişim", "Contact", "Phone", "Email"],
    "portfolyo": ["Portfolyo", "Portfolio", "GitHub", "Website"],
    "diller": ["Diller", "Languages"],
    "referanslar": ["Referanslar", "References"]
}

criteria_weights = {
    "iş_deneyimi": 25,
    "egitim": 15,
    "teknik_beceriler": 15,
    "ozet": 10,
    "liderlik": 10,
    "sertifikalar": 10,
    "iletisim": 5,
    "portfolyo": 5,
    "diller": 3,
    "referanslar": 2
}

# --------------------
# Model yüklemeleri (ilk importta bir kez çağrılır)
tokenizer = AutoTokenizer.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")
model = AutoModelForSequenceClassification.from_pretrained("MoritzLaurer/mDeBERTa-v3-base-mnli-xnli")

classifier = pipeline(
    "zero-shot-classification",
    model=model,
    tokenizer=tokenizer,
    device=-1  # CPU
)

semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

# --------------------
# Yardımcı Fonksiyonlar
def extract_text_from_pdf(path):
    text = ""
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

def grammar_check(text, lang_code):
    tool = language_tool_python.LanguageTool("en-US" if lang_code == "en" else "tr")
    matches = tool.check(text[:5000])
    return [m.message for m in matches]

def zero_shot_for_section(text, labels):
    segment = text[:512]
    try:
        result = classifier(segment, candidate_labels=labels, multi_label=True)
        return max(result["scores"])
    except Exception as e:
        print("Zero-shot hata:", e)
        return 0

def is_informative_line(line, section_name):
    if len(line.strip()) < 6:
        return False
    try:
        result = classifier(
            line.strip(),
            candidate_labels=[
                section_name.replace('_', ' ').title(),
                "Rastgele Yazı", "Alakasız", "Hobiler", "Kişisel Yorum", "Boş"
            ],
            multi_label=False
        )
        label = result["labels"][0]
        score = result["scores"][0]
        return label == section_name.replace('_', ' ').title() and score > 0.6
    except Exception as e:
        print(f"[Zero-shot Hata] Satır: {line} – Hata: {e}")
        return False

def evaluate_section_content(section_text, section_name):
    lines = [line.strip() for line in section_text.split('\n') if line.strip()]
    valid_lines = [line for line in lines if is_informative_line(line, section_name)]
    return len(valid_lines) / len(lines) if lines else 0.0

def extract_section_text(text, section_keywords):
    for kw in section_keywords:
        pattern = rf"{kw}[\s\S]{{0,1500}}"
        match = re.search(pattern, text, flags=re.IGNORECASE)
        if match:
            return match.group()
    return ""

def generate_recommendations(text):
    recs = []
    text_lower = text.lower()

    if "python" not in text_lower:
        recs.append({
            "message": "CV’nizde Python yetkinliği belirtilmemiş. Python becerilerinizi öne çıkarın.",
            "resources": ["https://www.learnpython.org/", "LeetCode", "Kaggle"]
        })

    if "github" not in text_lower and "gitlab" not in text_lower:
        recs.append({
            "message": "Projelerinizi göstermek için GitHub veya GitLab profilinizi ekleyin.",
            "resources": ["https://github.com/", "https://gitlab.com/"]
        })

    if "proje" not in text_lower and "project" not in text_lower:
        recs.append({
            "message": "CV’nizde yer alan projeler, yetkinliklerinizi göstermek için önemlidir.",
            "resources": ["Kendi kişisel portföy siteniz", "Notion, Behance gibi araçlar"]
        })

    if "ingilizce" not in text_lower and "english" not in text_lower:
        recs.append({
            "message": "Dil becerilerinizi belirtmeniz işe alım uzmanları için önemlidir.",
            "resources": ["https://www.duolingo.com/", "IELTS/TOEFL sertifika programları"]
        })

    return recs

# --------------------
# Dışa Açık Ana Fonksiyon
def analyze_cv(text):
    lang = detect(text)
    total_score = 0
    strengths, weaknesses = [], []

    for section, labels in criteria_labels.items():
        section_text = extract_section_text(text, labels)
        section_validity = evaluate_section_content(section_text, section)

        if section_validity > 0:
            score = round(criteria_weights[section] * section_validity, 2)
            total_score += score

            if section_validity >= 0.9:
                strengths.append(f"{section.replace('_',' ').title()} bölümü çok güçlü.")
            elif section_validity >= 0.6:
                strengths.append(f"{section.replace('_',' ').title()} bölümü var ve geliştirilebilir.")
            elif section_validity >= 0.3:
                weaknesses.append(f"{section.replace('_',' ').title()} başlığı var ama içerik yetersiz.")
            else:
                weaknesses.append(f"{section.replace('_',' ').title()} bölümü eksik.")
        else:
            try:
                e1 = semantic_model.encode(text, convert_to_tensor=True)
                e2 = semantic_model.encode(" ".join(labels), convert_to_tensor=True)
                sim = float(util.cos_sim(e1, e2))
                if sim > 0.4:
                    score = round(criteria_weights[section] * sim, 2)
                    total_score += score
                    weaknesses.append(f"{section.replace('_',' ').title()} benzer içerik bulundu ama yeterli değil.")
                else:
                    max_score = zero_shot_for_section(text, labels)
                    score = round(criteria_weights[section] * max_score, 2)
                    total_score += score
                    if max_score > 0.4:
                        weaknesses.append(f"{section.replace('_',' ').title()} zero-shot ile benzer içerik bulundu ama yetersiz.")
                    else:
                        weaknesses.append(f"{section.replace('_',' ').title()} bölümü eksik.")
            except Exception as e:
                weaknesses.append(f"{section.replace('_',' ').title()} bölümü eksik (hata: {e})")

    recommendations = generate_recommendations(text)
    grammar_issues = grammar_check(text, lang)

    return {
        "language": "English" if lang == "en" else "Türkçe",
        "score": round(total_score, 2),
        "strengths": strengths,
        "weaknesses": weaknesses,
        "grammar_issues": grammar_issues,
        "recommendations": recommendations
    }
