In [1]:
# First we need some libraries - you need to install these first!
import requests  # for getting web pages
from bs4 import BeautifulSoup  # for reading web pages easily
import pandas as pd  # for handling data
import numpy as np  # for math stuff
from pymongo import MongoClient  # for MongoDB
import json  # for saving data
from datetime import datetime  # for getting today's date

# Part 1: Scraping Indeed.com
# ===========================

def get_indeed_jobs():
    # This is where we'll store all jobs we find
    all_jobs = []
    
    # Let's only look at first 2 pages to keep it simple
    for page in range(2):
        # Make the URL - we're looking for Python jobs
        url = f"https://www.indeed.com/jobs?q=Python+Developer&l=New+York&start={page*10}"
        
        # Get the webpage
        response = requests.get(url)
        
        # Turn the webpage into something we can read easily
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all job cards
        job_cards = soup.find_all('div', class_='job_seen_beacon')
        
        # Look through each job card
        for job in job_cards:
            try:
                # Get the info we want
                job_data = {
                    'title': job.find('h2', class_='jobTitle').text.strip(),
                    'company': job.find('span', class_='companyName').text.strip(),
                    'location': job.find('div', class_='companyLocation').text.strip(),
                    'salary': job.find('div', class_='salary-snippet').text.strip() if job.find('div', class_='salary-snippet') else 'Not listed',
                    'date_scraped': datetime.now().strftime("%Y-%m-%d")
                }
                all_jobs.append(job_data)
            except:
                # If something goes wrong, just skip that job
                continue
    
    return all_jobs

# Part 2: Saving to MongoDB
# ========================

def save_to_mongodb(jobs):
    # Connect to MongoDB - you need to change these details!
    client = MongoClient('mongodb://localhost:27017/')
    db = client['job_database']
    collection = db['python_jobs']
    
    # Save each job
    for job in jobs:
        collection.insert_one(job)

# Part 3: Calculate Average Salary
# ==============================

def calculate_average_salary(jobs):
    # Get all salaries
    salaries = []
    
    for job in jobs:
        salary = job['salary']
        if 'year' in salary.lower():
            # Remove any text and convert to number
            try:
                num = int(''.join(filter(str.isdigit, salary)))
                salaries.append(num)
            except:
                continue
    
    # Calculate average using numpy
    if salaries:
        return np.mean(salaries)
    return 0

# Part 4: Django Admin Setup
# =========================
"""
# In your Django models.py:

from django.db import models

class JobListing(models.Model):
    title = models.CharField(max_length=200)
    company = models.CharField(max_length=200)
    location = models.CharField(max_length=200)
    salary = models.CharField(max_length=100)
    date_scraped = models.DateField()

    def __str__(self):
        return f"{self.title} at {self.company}"

# In your admin.py:

from django.contrib import admin
from .models import JobListing

@admin.register(JobListing)
class JobListingAdmin(admin.ModelAdmin):
    list_display = ('title', 'company', 'location', 'salary', 'date_scraped')
    search_fields = ('title', 'company', 'location')
    list_filter = ('date_scraped',)
"""

# Let's run everything!
# ====================

if __name__ == "__main__":
    print("Starting job scraping...")
    jobs = get_indeed_jobs()
    
    print(f"Found {len(jobs)} jobs!")
    
    print("Saving to MongoDB...")
    save_to_mongodb(jobs)
    
    print("Calculating average salary...")
    avg_salary = calculate_average_salary(jobs)
    print(f"Average salary: ${avg_salary:,.2f}")
    
    print("Done! 🎉")

# Requirements.txt
"""
requests==2.28.1
beautifulsoup4==4.11.1
pandas==1.5.2
numpy==1.23.5
pymongo==4.3.3
Django==4.1.4
"""

Starting job scraping...
Found 0 jobs!
Saving to MongoDB...
Calculating average salary...
Average salary: $0.00
Done! 🎉


'\nrequests==2.28.1\nbeautifulsoup4==4.11.1\npandas==1.5.2\nnumpy==1.23.5\npymongo==4.3.3\nDjango==4.1.4\n'

In [2]:
# First we need some libraries - you need to install these first!
from selenium import webdriver  # for controlling the browser
from selenium.webdriver.common.by import By  # for finding things on the page
from selenium.webdriver.chrome.options import Options  # for setting up Chrome
from selenium.webdriver.support.ui import WebDriverWait  # for waiting for page to load
from selenium.webdriver.support import expected_conditions as EC  # for checking if elements exist
import pandas as pd  # for handling data
import numpy as np  # for math stuff
from pymongo import MongoClient  # for MongoDB
import time  # for adding delays
from datetime import datetime  # for getting today's date

# Part 1: Scraping Indeed.com
# ===========================

def setup_driver():
    # Set up Chrome to run in headless mode (no window)
    chrome_options = Options()
    chrome_options.add_argument('--headless')  # Run in background
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    
    # Create the driver
    driver = webdriver.Chrome(options=chrome_options)
    return driver

def get_indeed_jobs():
    # This is where we'll store all jobs we find
    all_jobs = []
    
    # Start the browser
    driver = setup_driver()
    
    # Let's only look at first 2 pages to keep it simple
    for page in range(2):
        try:
            # Make the URL - we're looking for Python jobs in New York
            url = f"https://www.indeed.com/jobs?q=Python+Developer&l=New+York&start={page*10}"
            
            # Go to the webpage
            driver.get(url)
            
            # Wait a bit for the page to load (3 seconds)
            time.sleep(3)
            
            # Find all job cards
            job_cards = driver.find_elements(By.CLASS_NAME, 'job_seen_beacon')
            
            # Look through each job card
            for job in job_cards:
                try:
                    # Get the info we want
                    title = job.find_element(By.CLASS_NAME, 'jobTitle').text
                    company = job.find_element(By.CLASS_NAME, 'companyName').text
                    location = job.find_element(By.CLASS_NAME, 'companyLocation').text
                    
                    # Try to get salary, but it's okay if we can't find it
                    try:
                        salary = job.find_element(By.CLASS_NAME, 'salary-snippet').text
                    except:
                        salary = 'Not listed'
                    
                    # Save all the info
                    job_data = {
                        'title': title,
                        'company': company,
                        'location': location,
                        'salary': salary,
                        'date_scraped': datetime.now().strftime("%Y-%m-%d")
                    }
                    all_jobs.append(job_data)
                    print(f"Found job: {title} at {company}")  # Print each job we find
                    
                except Exception as e:
                    print(f"Couldn't get info for a job: {str(e)}")
                    continue
                    
        except Exception as e:
            print(f"Error on page {page}: {str(e)}")
            continue
    
    # Close the browser
    driver.quit()
    
    return all_jobs

# Part 2: Saving to MongoDB
# ========================

def save_to_mongodb(jobs):
    try:
        # Connect to MongoDB - you need to change these details!
        client = MongoClient('mongodb://localhost:27017/')
        db = client['job_database']
        collection = db['python_jobs']
        
        # Delete old jobs first
        collection.delete_many({})
        
        # Save each job
        for job in jobs:
            collection.insert_one(job)
        
        print(f"Successfully saved {len(jobs)} jobs to MongoDB!")
        
    except Exception as e:
        print(f"Error saving to MongoDB: {str(e)}")

# Part 3: Calculate Average Salary
# ==============================

def calculate_average_salary(jobs):
    # Get all salaries
    salaries = []
    
    for job in jobs:
        salary = job['salary']
        if 'year' in salary.lower():
            # Remove any text and convert to number
            try:
                # This will find all numbers in the salary string
                nums = ''.join(filter(str.isdigit, salary))
                if nums:  # Only add if we found a number
                    num = int(nums)
                    if num > 20000:  # Only add if it looks like a yearly salary
                        salaries.append(num)
                        print(f"Found salary: ${num:,}")
            except:
                continue
    
    # Calculate average using numpy
    if salaries:
        return np.mean(salaries)
    return 0

# Part 4: Django Admin Setup
# =========================
"""
# In your Django models.py:

from django.db import models

class JobListing(models.Model):
    title = models.CharField(max_length=200)
    company = models.CharField(max_length=200)
    location = models.CharField(max_length=200)
    salary = models.CharField(max_length=100)
    date_scraped = models.DateField()

    def __str__(self):
        return f"{self.title} at {self.company}"

# In your admin.py:

from django.contrib import admin
from .models import JobListing

@admin.register(JobListing)
class JobListingAdmin(admin.ModelAdmin):
    list_display = ('title', 'company', 'location', 'salary', 'date_scraped')
    search_fields = ('title', 'company', 'location')
    list_filter = ('date_scraped',)
"""

# Let's run everything!
# ====================

if __name__ == "__main__":
    print("Starting job scraping...")
    jobs = get_indeed_jobs()
    
    print(f"\nFound {len(jobs)} jobs!")
    
    if jobs:  # Only continue if we found jobs
        print("\nSaving to MongoDB...")
        save_to_mongodb(jobs)
        
        print("\nCalculating average salary...")
        avg_salary = calculate_average_salary(jobs)
        print(f"Average salary: ${avg_salary:,.2f}")
    
    print("\nDone! 🎉")

# Requirements.txt
"""
selenium==4.9.0
pandas==1.5.2
numpy==1.23.5
pymongo==4.3.3
Django==4.1.4
"""

Starting job scraping...

Found 0 jobs!

Done! 🎉


'\nselenium==4.9.0\npandas==1.5.2\nnumpy==1.23.5\npymongo==4.3.3\nDjango==4.1.4\n'

In [3]:
# We'll use these libraries
import pandas as pd
import numpy as np
from pymongo import MongoClient
from datetime import datetime, timedelta

# Part 1: Create Sample Job Data
# =============================

def create_sample_jobs():
    # Let's create some fake jobs that look real
    sample_jobs = [
        {
            "title": "Junior Python Developer",
            "company": "Tech Solutions Inc",
            "location": "New York, NY",
            "salary": "$75,000 per year",
            "date_scraped": (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
        },
        {
            "title": "Python Backend Developer",
            "company": "StartUp Hub",
            "location": "New York, NY",
            "salary": "$95,000 per year",
            "date_scraped": datetime.now().strftime("%Y-%m-%d")
        },
        {
            "title": "Python Full Stack Developer",
            "company": "Big Corp Technologies",
            "location": "Brooklyn, NY",
            "salary": "$85,000 - $110,000 per year",
            "date_scraped": datetime.now().strftime("%Y-%m-%d")
        },
        {
            "title": "Python Data Engineer",
            "company": "Data Systems LLC",
            "location": "Manhattan, NY",
            "salary": "$90,000 per year",
            "date_scraped": (datetime.now() - timedelta(days=2)).strftime("%Y-%m-%d")
        },
        {
            "title": "Django Developer",
            "company": "Web Solutions Co",
            "location": "Queens, NY",
            "salary": "Not listed",
            "date_scraped": datetime.now().strftime("%Y-%m-%d")
        }
    ]
    
    print(f"Created {len(sample_jobs)} sample jobs!")
    return sample_jobs

# Part 2: Save to MongoDB
# ======================

def save_to_mongodb(jobs):
    try:
        # Connect to MongoDB
        print("Connecting to MongoDB...")
        client = MongoClient('mongodb://localhost:27017/')
        db = client['job_database']
        collection = db['python_jobs']
        
        # Delete old data
        collection.delete_many({})
        print("Cleared old data from MongoDB")
        
        # Save new jobs
        collection.insert_many(jobs)
        print(f"Saved {len(jobs)} jobs to MongoDB!")
        
    except Exception as e:
        print(f"Error with MongoDB: {str(e)}")
        print("Make sure MongoDB is running on your computer!")

# Part 3: Calculate Average Salary
# ==============================

def calculate_average_salary(jobs):
    salaries = []
    
    for job in jobs:
        salary = job['salary']
        if 'year' in salary.lower() and '-' not in salary:  # Skip salary ranges
            try:
                # Get just the numbers from the salary
                num = int(''.join(filter(str.isdigit, salary)))
                salaries.append(num)
                print(f"Found salary: ${num:,}")
            except:
                continue
    
    if salaries:
        avg = np.mean(salaries)
        print(f"\nFound {len(salaries)} salaries to average")
        return avg
    return 0

# Part 4: Django Setup
# ===================
"""
# settings.py - Add these to INSTALLED_APPS:
INSTALLED_APPS = [
    'django.contrib.admin',
    'django.contrib.auth',
    'django.contrib.contenttypes',
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'jobs',  # add your app here
]

# models.py
from django.db import models

class JobListing(models.Model):
    title = models.CharField(max_length=200)
    company = models.CharField(max_length=200)
    location = models.CharField(max_length=200)
    salary = models.CharField(max_length=100)
    date_scraped = models.DateField()

    def __str__(self):
        return f"{self.title} at {self.company}"

    class Meta:
        ordering = ['-date_scraped']  # Show newest first

# admin.py
from django.contrib import admin
from .models import JobListing

@admin.register(JobListing)
class JobListingAdmin(admin.ModelAdmin):
    list_display = ('title', 'company', 'location', 'salary', 'date_scraped')
    search_fields = ('title', 'company', 'location')
    list_filter = ('date_scraped', 'location')
"""

# Let's run everything!
# ====================

if __name__ == "__main__":
    print("Creating sample jobs...")
    jobs = create_sample_jobs()
    
    print("\nSaving to MongoDB...")
    save_to_mongodb(jobs)
    
    print("\nCalculating average salary...")
    avg_salary = calculate_average_salary(jobs)
    print(f"Average salary: ${avg_salary:,.2f}")
    
    print("\nDone! Now you can:")
    print("1. Set up your Django project")
    print("2. Copy the Django code above into your project")
    print("3. Run migrations: python manage.py makemigrations")
    print("4. Apply migrations: python manage.py migrate")
    print("5. Create admin user: python manage.py createsuperuser")
    print("6. Start server: python manage.py runserver")
    print("7. Go to http://127.0.0.1:8000/admin to see your jobs!")

# Requirements.txt
"""
pandas==1.5.2
numpy==1.23.5
pymongo==4.3.3
Django==4.1.4
"""

Creating sample jobs...
Created 5 sample jobs!

Saving to MongoDB...
Connecting to MongoDB...
Error with MongoDB: localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms), Timeout: 30s, Topology Description: <TopologyDescription id: 67b06dc1c4d29e7c903dce5a, topology_type: Unknown, servers: [<ServerDescription ('localhost', 27017) server_type: Unknown, rtt: None, error=AutoReconnect('localhost:27017: [WinError 10061] No connection could be made because the target machine actively refused it (configured timeouts: socketTimeoutMS: 20000.0ms, connectTimeoutMS: 20000.0ms)')>]>
Make sure MongoDB is running on your computer!

Calculating average salary...
Found salary: $75,000
Found salary: $95,000
Found salary: $90,000

Found 3 salaries to average
Average salary: $86,666.67

Done! Now you can:
1. Set up your Django project
2. Copy the Django code above into your

'\npandas==1.5.2\nnumpy==1.23.5\npymongo==4.3.3\nDjango==4.1.4\n'