# Python technologies statistics

In [None]:
import datetime
import os
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
df_dou = pd.read_csv("../output/dou_vacancies.csv")
df_work_ua = pd.read_csv("../output/work_ua_vacancies.csv")

df = pd.concat([df_dou, df_work_ua], ignore_index=True)

In [None]:
df.head()

In [None]:
current_date = datetime.datetime.now().strftime("%Y-%m-%d")
figures_dir = "../output/figures"
os.makedirs(figures_dir, exist_ok=True)

In [None]:
df["technologies"] = df["technologies"].apply(lambda x: x.split(',') if isinstance(x, str) else x)

In [None]:
tech_counts = df["technologies"].explode().value_counts()

In [None]:
plt.figure(figsize=(12, 8))
tech_counts.head(15).plot(kind="bar", color="skyblue")
plt.title("Most Popular Python Technologies in Job Listings by counts")
plt.xlabel("Technology")
plt.ylabel("Number of Vacancies")
plt.xticks(rotation=45, ha="right")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.savefig(f"{figures_dir}/tech_counts_{current_date}.png")
plt.show()

In [None]:
total_vacancies = len(df)
tech_percentage = tech_counts / total_vacancies * 100

In [None]:

plt.figure(figsize=(12, 8))
tech_percentage.head(15).plot(kind="bar", color="skyblue")
plt.title("Most Popular Python Technologies in Job Listings by percentage")
plt.xlabel("Technology")
plt.ylabel("Percentage")
plt.xticks(rotation=45, ha="right")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.savefig(f"{figures_dir}/tech_percentage_{current_date}.png")
plt.show()