In [2]:
from flask import Flask, request, render_template_string, send_file, redirect, url_for
from textblob import TextBlob
import spacy
from fpdf import FPDF
from io import BytesIO
from pdfminer.high_level import extract_text
import os
import re
from spacy.cli import download
import sys

app = Flask(__name__)
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    # Automatically download the model if not present
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

# HTML template with improved styling and functionality
HTML_PAGE = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LinkedIn Analyzer - ZahoorTech</title>
    <style>
        :root {
            --primary: #0a66c2;
            --secondary: #f5f5f5;
            --accent: #ff6b00;
            --text: #333333;
            --light: #ffffff;
            --success: #28a745;
            --warning: #ffc107;
            --danger: #dc3545;
        }
        * {
            box-sizing: border-box;
            margin: 0;
            padding: 0;
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        }
        body {
            background-color: var(--secondary);
            color: var(--text);
            line-height: 1.6;
            padding: 20px;
        }
        .container {
            max-width: 800px;
            margin: 0 auto;
            background: var(--light);
            border-radius: 10px;
            box-shadow: 0 5px 15px rgba(0,0,0,0.1);
            padding: 30px;
        }
        header {
            text-align: center;
            margin-bottom: 30px;
            padding-bottom: 20px;
            border-bottom: 1px solid #eee;
        }
        h1 {
            color: var(--primary);
            margin-bottom: 10px;
        }
        .subtitle {
            color: #666;
            font-size: 1.1rem;
        }
        .upload-section {
            background: #f8f9fa;
            border: 2px dashed #dee2e6;
            border-radius: 8px;
            padding: 30px;
            text-align: center;
            margin-bottom: 30px;
        }
        .upload-icon {
            font-size: 48px;
            color: var(--primary);
            margin-bottom: 15px;
        }
        .form-group {
            margin-bottom: 20px;
        }
        input[type="file"] {
            margin: 0 auto;
            display: block;
        }
        .btn {
            display: inline-block;
            background: var(--primary);
            color: white;
            padding: 12px 25px;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            font-size: 16px;
            font-weight: 600;
            transition: all 0.3s ease;
        }
        .btn:hover {
            background: #004182;
            transform: translateY(-2px);
            box-shadow: 0 4px 8px rgba(0,0,0,0.1);
        }
        .btn-download {
            background: var(--success);
            margin-top: 15px;
        }
        .btn-download:hover {
            background: #218838;
        }
        .result-section {
            display: {% if result %}block{% else %}none{% endif %};
            margin-top: 30px;
            padding: 25px;
            background: #e8f4fe;
            border-radius: 8px;
            border-left: 4px solid var(--primary);
        }
        .analysis-title {
            color: var(--primary);
            margin-bottom: 20px;
            display: flex;
            align-items: center;
        }
        .analysis-title i {
            margin-right: 10px;
            font-size: 24px;
        }
        .analysis-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
            gap: 20px;
        }
        .analysis-card {
            background: white;
            border-radius: 8px;
            padding: 20px;
            box-shadow: 0 3px 10px rgba(0,0,0,0.05);
        }
        .card-title {
            font-weight: 600;
            margin-bottom: 10px;
            color: var(--text);
            display: flex;
            align-items: center;
        }
        .card-title i {
            margin-right: 8px;
            font-size: 20px;
        }
        .sentiment-positive { color: var(--success); }
        .sentiment-neutral { color: var(--warning); }
        .sentiment-negative { color: var(--danger); }
        .score-bar {
            height: 10px;
            background: #e9ecef;
            border-radius: 5px;
            margin: 10px 0;
            overflow: hidden;
        }
        .score-fill {
            height: 100%;
            background: var(--accent);
            border-radius: 5px;
        }
        .keyword-list {
            list-style-type: none;
            margin-top: 10px;
        }
        .keyword-list li {
            margin-bottom: 5px;
            padding: 5px 10px;
            background: #e9ecef;
            border-radius: 4px;
            display: inline-block;
            margin-right: 8px;
        }
        .keyword-found { background: #d4edda !important; }
        .error {
            background: #f8d7da;
            color: var(--danger);
            padding: 15px;
            border-radius: 5px;
            margin: 20px 0;
            border-left: 4px solid var(--danger);
        }
        footer {
            text-align: center;
            margin-top: 30px;
            color: #6c757d;
            font-size: 0.9rem;
        }
    </style>
</head>
<body>
    <div class="container">
        <header>
            <h1>LinkedIn Profile Analyzer</h1>
            <p class="subtitle">Upload your LinkedIn profile PDF to receive professional optimization suggestions</p>
        </header>
        
        <div class="upload-section">
            <div class="upload-icon">📄</div>
            <h3>Upload Your LinkedIn Profile (PDF)</h3>
            <form method="post" enctype="multipart/form-data">
                <div class="form-group">
                    <input type="file" name="file" accept=".pdf" required>
                </div>
                <button type="submit" class="btn">Analyze Profile</button>
            </form>
        </div>
        
        {% if result %}
            {% if result.error %}
                <div class="error">
                    <strong>Error:</strong> {{ result.error }}
                </div>
            {% else %}
                <div class="result-section">
                    <h2 class="analysis-title">📊 Analysis Report</h2>
                    
                    <div class="analysis-grid">
                        <div class="analysis-card">
                            <h3 class="card-title"><i>😊</i> Profile Sentiment</h3>
                            <p><strong>Overall Tone:</strong> 
                                <span class="sentiment-{{ result.sentiment_class }}">
                                    {{ result.sentiment }}
                                </span>
                            </p>
                            <p><strong>Polarity:</strong> {{ result.sentiment_score|round(2) }}</p>
                            <p>{{ result.sentiment_advice }}</p>
                        </div>
                        
                        <div class="analysis-card">
                            <h3 class="card-title"><i>📝</i> Grammar & Writing</h3>
                            <p><strong>Issues Found:</strong> {{ result.grammar_issues }}</p>
                            <p><strong>Spelling Errors:</strong> {{ result.spelling_errors }}</p>
                            <p>{{ result.grammar_advice }}</p>
                        </div>
                        
                        <div class="analysis-card">
                            <h3 class="card-title"><i>🔑</i> Professional Keywords</h3>
                            <p><strong>Score:</strong> {{ result.keyword_score }}/10</p>
                            <div class="score-bar">
                                <div class="score-fill" style="width: {{ result.keyword_score * 10 }}%"></div>
                            </div>
                            <ul class="keyword-list">
                                {% for keyword in result.keywords %}
                                    <li class="{% if keyword.found %}keyword-found{% endif %}">
                                        {{ keyword.word }} {% if keyword.found %}✓{% endif %}
                                    </li>
                                {% endfor %}
                            </ul>
                        </div>
                    </div>
                    
                    <form action="/download" method="post">
                        <input type="hidden" name="summary" value="{{ result.sentiment }}">
                        <input type="hidden" name="grammar" value="{{ result.grammar_issues }}">
                        <input type="hidden" name="spelling" value="{{ result.spelling_errors }}">
                        <input type="hidden" name="score" value="{{ result.keyword_score }}">
                        <input type="hidden" name="keywords" value="{{ result.keyword_string }}">
                        <input type="hidden" name="sentiment_score" value="{{ result.sentiment_score }}">
                        <button type="submit" class="btn btn-download">Download Full PDF Report</button>
                    </form>
                </div>
            {% endif %}
        {% endif %}
        
        <footer>
            <p>© 2025 ZahoorTech - LinkedIn Profile Analyzer</p>
        </footer>
    </div>
</body>
</html>
'''

# Utility to extract text from PDF
def extract_text_from_pdf(pdf_file):
    try:
        # Save to temp file
        temp_path = "temp.pdf"
        pdf_file.save(temp_path)
        
        # Extract text
        text = extract_text(temp_path)
        
        # Clean up
        os.remove(temp_path)
        
        return text
    except Exception as e:
        raise RuntimeError(f"PDF processing error: {str(e)}")

# Check spelling in text
def check_spelling(text):
    try:
        blob = TextBlob(text)
        corrections = []
        
        # Check each word
        for word in blob.words:
            # Skip URLs and emails
            if re.match(r'^https?://|.*@.*\..*', word):
                continue
                
            # Get spelling suggestions
            suggestions = blob.word_spellcheck(word)
            
            # If top suggestion isn't the word itself, it's misspelled
            if suggestions and suggestions[0][0].lower() != word.lower():
                corrections.append({
                    'word': word,
                    'suggestions': [s[0] for s in suggestions[:3]]
                })
        
        return corrections
    except Exception:
        return []

# Analyze text comprehensively
def analyze_text(text):
    try:
        # Sentiment analysis
        blob = TextBlob(text)
        sentiment_score = blob.sentiment.polarity
        
        if sentiment_score > 0.2:
            sentiment = "Positive"
            sentiment_class = "positive"
            sentiment_advice = "Great! Your profile has a positive tone that attracts recruiters."
        elif sentiment_score < -0.2:
            sentiment = "Negative"
            sentiment_class = "negative"
            sentiment_advice = "Consider using more positive language to make your profile more appealing."
        else:
            sentiment = "Neutral"
            sentiment_class = "neutral"
            sentiment_advice = "Good foundation. Add more action verbs to make your profile stand out."
        
        # Grammar analysis
        doc = nlp(text)
        short_sentences = [sent.text.strip() for sent in doc.sents if len(sent) < 8]
        passive_sentences = [sent.text.strip() for sent in doc.sents 
                            if any(token.dep_ == "auxpass" for token in sent)]
        
        grammar_issues = []
        if short_sentences:
            grammar_issues.append(f"{len(short_sentences)} short sentences")
        if passive_sentences:
            grammar_issues.append(f"{len(passive_sentences)} passive constructions")
            
        grammar_issues_str = ", ".join(grammar_issues) if grammar_issues else "Minor issues"
        
        # Spelling check
        spelling_errors = check_spelling(text)
        spelling_count = len(spelling_errors)
        
        if spelling_count == 0:
            spelling_advice = "Excellent! No spelling errors found."
        elif spelling_count < 3:
            spelling_advice = "Good. Few spelling errors need attention."
        else:
            spelling_advice = f"Needs improvement. {spelling_count} spelling errors found."
        
        # Professional keywords
        keyword_list = [
            'achieved', 'managed', 'developed', 'led', 'improved', 
            'implemented', 'increased', 'decreased', 'optimized', 
            'collaborated', 'resolved', 'created', 'spearheaded', 
            'Python', 'JavaScript', 'SQL', 'AWS', 'Azure', 
            'machine learning', 'AI', 'project management', 
            'leadership', 'communication', 'problem solving'
        ]
        
        found_keywords = []
        for keyword in keyword_list:
            found = keyword.lower() in text.lower()
            found_keywords.append({
                'word': keyword.capitalize(),
                'found': found
            })
        
        keyword_count = sum(1 for kw in found_keywords if kw['found'])
        keyword_score = min(10, keyword_count)  # Cap at 10
        
        # Prepare keyword string for PDF
        keyword_string = ", ".join(kw['word'] for kw in found_keywords if kw['found'])
        
        return {
            "sentiment": sentiment,
            "sentiment_class": sentiment_class,
            "sentiment_score": sentiment_score,
            "sentiment_advice": sentiment_advice,
            "grammar_issues": grammar_issues_str,
            "spelling_errors": spelling_count,
            "grammar_advice": "Use more active voice and vary sentence length for better readability.",
            "keyword_score": keyword_score,
            "keywords": found_keywords,
            "keyword_string": keyword_string
        }
        
    except Exception as e:
        return {"error": f"Analysis error: {str(e)}"}

# Enhanced PDF report generation
from fpdf import FPDF
from io import BytesIO

def generate_pdf_report(data):
    pdf = FPDF()
    pdf.add_page()
    
    # Title
    pdf.set_font("Arial", 'B', 24)
    pdf.cell(0, 15, "LinkedIn Profile Analysis Report", 0, 1, 'C')
    pdf.ln(10)
    
    # Report summary
    pdf.set_font("Arial", 'B', 16)
    pdf.cell(0, 10, "Profile Summary", 0, 1)
    pdf.set_font("Arial", '', 12)
    pdf.multi_cell(0, 8, "This report provides actionable insights to optimize your LinkedIn profile based on our analysis of your profile content.", 0, 1)
    pdf.ln(5)
    
    # Sentiment analysis
    pdf.set_fill_color(230, 240, 255)
    pdf.set_font("Arial", 'B', 14)
    pdf.cell(0, 10, "1. Profile Sentiment Analysis", 0, 1, fill=True)
    pdf.set_font("Arial", '', 12)
    pdf.cell(40, 8, "Overall Tone:", 0, 0)
    pdf.set_font("Arial", 'B', 12)
    pdf.cell(0, 8, data['summary'], 0, 1)
    
    pdf.set_font("Arial", '', 12)
    pdf.cell(40, 8, "Sentiment Score:", 0, 0)
    pdf.cell(0, 8, str(data['sentiment_score']), 0, 1)
    
    pdf.cell(40, 8, "Recommendations:", 0, 1)
    pdf.set_font("Arial", 'I', 12)
    pdf.multi_cell(0, 8, "Use more action verbs and accomplishment-oriented language. Highlight quantifiable achievements to strengthen your profile impact.")
    pdf.ln(5)
    
    # Grammar analysis
    pdf.set_fill_color(255, 245, 230)
    pdf.set_font("Arial", 'B', 14)
    pdf.cell(0, 10, "2. Writing Quality Analysis", 0, 1, fill=True)
    pdf.set_font("Arial", '', 12)
    pdf.cell(50, 8, "Grammar Issues Found:", 0, 0)
    pdf.cell(0, 8, data['grammar'], 0, 1)
    
    pdf.cell(50, 8, "Spelling Errors:", 0, 0)
    pdf.cell(0, 8, str(data['spelling']), 0, 1)
    
    pdf.cell(50, 8, "Recommendations:", 0, 1)
    pdf.set_font("Arial", 'I', 12)
    pdf.multi_cell(0, 8, "Avoid passive voice and vary sentence length. Proofread carefully for grammatical accuracy. Use tools like Grammarly for additional checks.")
    pdf.ln(5)
    
    # Keyword analysis
    pdf.set_fill_color(230, 255, 240)
    pdf.set_font("Arial", 'B', 14)
    pdf.cell(0, 10, "3. Professional Keyword Analysis", 0, 1, fill=True)
    pdf.set_font("Arial", '', 12)
    pdf.cell(50, 8, "Keyword Score:", 0, 0)
    pdf.cell(0, 8, f"{data['score']}/10", 0, 1)
    
    pdf.cell(50, 8, "Keywords Found:", 0, 1)
    pdf.set_font("Arial", 'I', 12)
    pdf.multi_cell(0, 8, data['keywords'] or "No keywords detected", 0, 1)
    
    pdf.set_font("Arial", '', 12)
    pdf.cell(50, 8, "Recommendations:", 0, 1)
    pdf.set_font("Arial", 'I', 12)
    pdf.multi_cell(0, 8, "Include more industry-specific keywords and action verbs. Focus on skills mentioned in job descriptions for positions you're targeting.")
    pdf.ln(10)
    
    # Final recommendations
    pdf.set_font("Arial", 'B', 16)
    pdf.cell(0, 10, "Optimization Action Plan", 0, 1, 'C')
    pdf.set_font("Arial", '', 12)
    
    actions = [
        "1. Add 3-5 accomplishment statements with quantifiable results",
        "2. Include at least 5 more industry-specific keywords",
        "3. Rewrite passive sentences to active voice",
        "4. Add a compelling professional summary section",
        "5. Request 2-3 skill endorsements for top skills",
        "6. Add multimedia elements (projects, presentations)"
    ]
    
    for action in actions:
        pdf.cell(0, 8, action, 0, 1)
    
    # Footer
    pdf.ln(15)
    pdf.set_font("Arial", 'I', 10)
    pdf.cell(0, 5, "Generated by ZahoorTech LinkedIn Analyzer", 0, 1, 'C')
    pdf.cell(0, 5, "www.zahoortech.com | contact@zahoortech.com", 0, 1, 'C')
    
    # Fix: Output to a BytesIO buffer
    buffer = BytesIO()
    pdf_output = pdf.output(dest='S').encode('latin1')  # Output as string then encode
    buffer.write(pdf_output)
    buffer.seek(0)
    
    return buffer


@app.route("/", methods=["GET", "POST"])
def index():
    result = None
    if request.method == "POST":
        file = request.files.get('file')
        
        if not file:
            result = {"error": "No file uploaded"}
        elif file.filename == '':
            result = {"error": "No selected file"}
        elif not file.filename.lower().endswith('.pdf'):
            result = {"error": "Please upload a PDF file"}
        else:
            try:
                text = extract_text_from_pdf(file)
                if not text.strip():
                    result = {"error": "The uploaded PDF appears to be empty or could not be processed"}
                else:
                    result = analyze_text(text)
            except Exception as e:
                result = {"error": str(e)}
                
    return render_template_string(HTML_PAGE, result=result)

@app.route("/download", methods=["POST"])
def download():
    # Get data from form
    data = {
        "summary": request.form.get('summary', ''),
        "grammar": request.form.get('grammar', ''),
        "spelling": request.form.get('spelling', '0'),
        "score": request.form.get('score', '0'),
        "keywords": request.form.get('keywords', ''),
        "sentiment_score": float(request.form.get('sentiment_score', '0'))
    }
    
    # Generate PDF
    pdf_buffer = generate_pdf_report(data)
    
    # Send the file
    return send_file(
        pdf_buffer,
        as_attachment=True,
        download_name="LinkedIn_Optimization_Report.pdf",
        mimetype='application/pdf'
    )
    # Fix for spacy model loading in Jupyter/Colab environments
    if "ipykernel" in sys.modules:
        try:
            nlp = spacy.load("en_core_web_sm")
        except Exception:
            download("en_core_web_sm")
            nlp = spacy.load("en_core_web_sm")
if __name__ == "__main__":
    app.run(debug=False)
    

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [25/Jul/2025 14:34:19] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [25/Jul/2025 14:35:03] "POST / HTTP/1.1" 200 -
127.0.0.1 - - [25/Jul/2025 14:35:17] "POST /download HTTP/1.1" 200 -
