# Job Scraper - Data Exploration

Explore jobs scraped from FINN.no and NAV.no

In [5]:
import sqlite3
import pandas as pd

## Connect to Database

In [11]:
# Connect to the database
conn = sqlite3.connect('jobs.db')

# Load all jobs into a DataFrame
df = pd.read_sql_query("SELECT * FROM jobs", conn)


df.head(1)

Unnamed: 0,id,title,company,location,url,source,keywords,deadline,job_type,published,scraped_date,description
0,1,Fagleder nettverk,Bergen kommune,Vestland,https://www.finn.no/job/ad/438277687,FINN,data science,07.12.2025,,"21.11.2025, 13:03",2025-11-23T15:28:52.108931,Vil du lede utviklingen av den digitale ryggra...


## Basic Statistics

In [12]:
# Jobs by source
print("Jobs by source:")
print(df['source'].value_counts())
print()

# Unique companies
print(f"Unique companies: {df['company'].nunique()}")
print()

# Top companies
print("Top 5 companies:")
print(df['company'].value_counts().head())

Jobs by source:
source
FINN    8
NAV     5
Name: count, dtype: int64

Unique companies: 10

Top 5 companies:
company
Helse Vest IKT    2
Vizir As          2
Vizir AS          2
Bergen kommune    1
Folk AS           1
Name: count, dtype: int64


## Jobs by Location

In [9]:
print("Jobs by location:")
df['location'].value_counts().head(10)

Jobs by location:


location
Vestland    8
Bergen      5
Name: count, dtype: int64

## Search by Keywords

In [None]:
# Search for jobs with specific keywords
keyword = 'python'

results = df[df['keywords'].str.contains(keyword, case=False, na=False)]
print(f"Jobs with '{keyword}': {len(results)}")

# Display results
results[['title', 'company', 'location', 'source', 'keywords']]

## Jobs with Deadlines

In [None]:
# Jobs with deadlines
jobs_with_deadlines = df[df['deadline'].notna()]
print(f"Jobs with deadlines: {len(jobs_with_deadlines)}")

jobs_with_deadlines[['title', 'company', 'deadline', 'url']]

## View Specific Job Details

In [None]:
# Display a specific job (change index to see different jobs)
job_index = 0

job = df.iloc[job_index]
print(f"Title: {job['title']}")
print(f"Company: {job['company']}")
print(f"Location: {job['location']}")
print(f"Source: {job['source']}")
print(f"Keywords: {job['keywords']}")
print(f"Deadline: {job['deadline']}")
print(f"Job Type: {job['job_type']}")
print(f"URL: {job['url']}")
print(f"Scraped: {job['scraped_date']}")

## Filter by Source

In [None]:
# View only FINN jobs
finn_jobs = df[df['source'] == 'FINN']
print(f"FINN jobs: {len(finn_jobs)}")
finn_jobs[['title', 'company', 'location']]

In [None]:
# View only NAV jobs
nav_jobs = df[df['source'] == 'NAV']
print(f"NAV jobs: {len(nav_jobs)}")
nav_jobs[['title', 'company', 'location']]

## Export to CSV

In [13]:
# Export all jobs to CSV
df.to_csv('jobs_export.csv', index=False)
print("Exported to jobs_export.csv")

Exported to jobs_export.csv


## Cleanup

In [14]:
# Close database connection
conn.close()