In [1]:
import pandas as pd

data = pd.read_csv("ai_job_dataset.csv")
data.head()

Unnamed: 0,job_id,job_title,salary_usd,salary_currency,experience_level,employment_type,company_location,company_size,employee_residence,remote_ratio,required_skills,education_required,years_experience,industry,posting_date,application_deadline,job_description_length,benefits_score,company_name
0,AI00001,AI Research Scientist,90376,USD,SE,CT,China,M,China,50,"Tableau, PyTorch, Kubernetes, Linux, NLP",Bachelor,9,Automotive,2024-10-18,2024-11-07,1076,5.9,Smart Analytics
1,AI00002,AI Software Engineer,61895,USD,EN,CT,Canada,M,Ireland,100,"Deep Learning, AWS, Mathematics, Python, Docker",Master,1,Media,2024-11-20,2025-01-11,1268,5.2,TechCorp Inc
2,AI00003,AI Specialist,152626,USD,MI,FL,Switzerland,L,South Korea,0,"Kubernetes, Deep Learning, Java, Hadoop, NLP",Associate,2,Education,2025-03-18,2025-04-07,1974,9.4,Autonomous Tech
3,AI00004,NLP Engineer,80215,USD,SE,FL,India,M,India,50,"Scala, SQL, Linux, Python",PhD,7,Consulting,2024-12-23,2025-02-24,1345,8.6,Future Systems
4,AI00005,AI Consultant,54624,EUR,EN,PT,France,S,Singapore,100,"MLOps, Java, Tableau, Python",Master,0,Media,2025-04-15,2025-06-23,1989,6.6,Advanced Robotics


In [2]:
import os
from dotenv import load_dotenv

from langchain_groq.chat_models import ChatGroq

In [3]:
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("MY_API_KEY")

llm = ChatGroq(model_name="qwen/qwen3-32b")

In [4]:
from pandasai import Agent

In [5]:
import os
from IPython.display import Image, display
from pandasai.responses.response_parser import ResponseParser

class PandasDataFrame(ResponseParser):
    def __init__(self, context) -> None:
        super().__init__(context)

    def format_plot(self, result):
        file_path = "export/charts/temp_chart.png"
        if os.path.exists(file_path):
            return display(Image(filename=file_path))
        else:
            import matplotlib.pyplot as plt
            plt.show() 
            return result

In [6]:
config = {'llm':llm, "response_parser":PandasDataFrame, "max_retires":4,"enable_cache": False} 

In [7]:
field_descriptions={
  "job_id": "Unique identifier for each job posting",
  "job_title": "Title or role being advertised",
  "salary_usd": "Annual salary converted to USD for standardization",
  "salary_currency": "Original currency in which the salary is offered",
  "experience_level": "Experience level required (EN-Entry, MI-Mid, SE-Senior, EX-Executive)",
  "employment_type": "Type of employment contract (FT-Full-time, PT-Part-time, CT-Contract, FL-Freelance)",
  "company_location": "Country where the hiring company is based",
  "company_size": "Size of the company (S-Small, M-Medium, L-Large)",
  "employee_residence": "Country of residence of the employee or job seeker",
  "remote_ratio": "Percentage of remote work allowed (0-Onsite, 50-Hybrid, 100-Remote)",
  "required_skills": "List of key technical skills required (comma-separated)",
  "education_required": "Minimum education level required for the job",
  "years_experience": "Minimum years of experience required",
  "industry": "Industry or sector to which the job belongs",
  "posting_date": "Date when the job was posted (DD-MMM-YYYY)",
  "application_deadline": "Last date to apply for the job (DD-MMM-YYYY)",
  "benefits_score": "Score (0-10) reflecting the quality of job benefits",
  "company_name": "Name of the company offering the job"
}


In [8]:
from pandasai.connectors import PandasConnector
data_df_connector = PandasConnector({"original_df":data},field_descriptions=field_descriptions)

In [9]:
agent =  Agent(data_df_connector, memory_size=5,
               description="You are a  data analysis agent. Your main goal is to help non-techincal users analyze data.", config=config)

In [10]:
agent.chat("How many rows are there?")

15000

In [11]:
agent.chat("How many columns are there?")

19

In [12]:
agent.chat("provide statistics about data, in a table form")

Unnamed: 0,Unique Values,Most Frequent,Frequency,mean,std,min,25%,75%,max
job_id,15000.0,AI00001,1.0,,,,,,
job_title,20.0,Machine Learning Researcher,808.0,,,,,,
salary_usd,,,,115348.97,60260.94,32519.0,70179.75,146408.5,399095.0
salary_currency,3.0,USD,11957.0,,,,,,
experience_level,4.0,MI,3781.0,,,,,,
employment_type,4.0,FT,3812.0,,,,,,
company_location,20.0,Germany,814.0,,,,,,
company_size,3.0,S,5007.0,,,,,,
employee_residence,20.0,Sweden,790.0,,,,,,
remote_ratio,,,,49.48,40.81,0.0,0.0,100.0,100.0


In [13]:
agent.chat("Show the missing values of data, in a table form")

Unnamed: 0,Column,Missing_Count
0,job_id,0
1,job_title,0
2,salary_usd,0
3,salary_currency,0
4,experience_level,0
5,employment_type,0
6,company_location,0
7,company_size,0
8,employee_residence,0
9,remote_ratio,0


In [14]:
agent.start_new_conversation()

In [15]:
agent.chat("find the job requirements: job title must be Data Engineer, Remote job")

Unnamed: 0,job_id,job_title,required_skills,experience_level,years_experience,education_required,salary_usd,company_location,company_size
39,AI00040,Data Engineer,"Computer Vision, Azure, Tableau",EX,15,Associate,166080,Netherlands,L
74,AI00075,Data Engineer,"Scala, R, Tableau, Data Visualization",EN,0,PhD,51920,Sweden,S
75,AI00076,Data Engineer,"Mathematics, Kubernetes, Spark",EN,0,Master,56427,Singapore,M
104,AI00105,Data Engineer,"TensorFlow, GCP, PyTorch, Spark",EX,13,Master,213287,United Kingdom,S
169,AI00170,Data Engineer,"Spark, Linux, AWS, Deep Learning",EX,13,Master,145355,Canada,S
...,...,...,...,...,...,...,...,...,...
14728,AI14729,Data Engineer,"Kubernetes, Linux, Python",MI,2,Bachelor,100644,Netherlands,M
14767,AI14768,Data Engineer,"Python, Docker, Data Visualization, Azure",MI,4,PhD,77365,Australia,M
14798,AI14799,Data Engineer,"R, Git, Deep Learning, Mathematics",MI,3,Associate,66322,India,L
14856,AI14857,Data Engineer,"Git, Linux, Azure, Java, Scala",SE,8,Associate,118900,Germany,L


In [16]:
agent.chat("find the job requirements: job title must be Data Engineer, Remote job for years_experience is 0")

'Required skills: AWS, Azure, Computer Vision, Data Visualization, Deep Learning, Docker, GCP, Git, Hadoop, Java, Kubernetes, Linux, MLOps, Mathematics, NLP, PyTorch, Python, R, SQL, Scala, Spark, Statistics, Tableau, TensorFlow'

In [17]:
agent.chat("Show average salary grouped by experience level as a bar chart")

{'type': 'plot',
 'value': 'C:/Users/vigne/Downloads/Data-Analytics-Agent.85e8634a-d870-421c-a7dd-1ae6853c3261/exports/charts/temp_chart.png'}