### Imports

In [39]:
import os
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

### Getting opinions about product from database

In [40]:
print(*[filename.split(".")[0] for filename in os.listdir("./opinions")], sep="\n")

167976636
28217807
46584356;02517


In [42]:
product_id = input("Enter product code: ")

In [43]:
opinions = pd.read_json(f"./opinions/{product_id}.json")
opinions

Unnamed: 0,opinion_id,author,recomend,stars,content_pl,pros_pl,cons_pl,up_votes,down_votes,published,purchased,content_en,pros_en,cons_en,recomendation
0,19535811,a...h,Polecam,5.0,"Świetna suszarka, spełnia wszystkie moje oczek...","[bardzo estetyczny wy, głośność pracy, mały ga...",[],0,0,2025-03-19 16:39:41,2025-03-03 07:48:53,A great dryer meets all my expectations. Dry l...,"[very aesthetic, volume of work, small size, p...",[],1.0
1,19631782,j...7,,3.5,Produkt byłby super gdyby miał końcówkę skupia...,"[bardzo estetyczny wy, głośność pracy, mały ga...",[],0,0,2025-05-03 12:34:08,2025-03-31 15:03:35,The product would be great if it had an air su...,"[very aesthetic, volume of work, small size, p...",[],
2,19170406,i...a,Polecam,5.0,"Suszarka spełnia swoją funkcję, jest bardzo po...","[głośność pracy, szybkość nagrzewania, wygląd]",[],0,0,2024-10-30 10:29:22,2024-09-28 22:59:32,"The dryer performs its function, it is very ha...","[volume of work, heating speed, appearance]",[],1.0
3,19452211,a...2,Polecam,5.0,Produkt dobrze wykonany. Moc bardzo dobra. Naj...,"[bardzo estetyczny wy, głośność pracy, mały ga...",[],0,0,2025-02-16 19:29:49,2025-01-19 22:36:40,A well -made product. Very good power. The bes...,"[very aesthetic, volume of work, small size, p...",[],1.0
4,19527462,d...a,Polecam,5.0,"Jak tylko odebrałam paczkę, od razu przetestow...","[bardzo estetyczny wy, szybkość nagrzewania, w...",[],0,0,2025-03-18 08:39:44,2025-03-14 14:35:28,"As soon as I picked up the package, I immediat...","[very aesthetic, heating speed, appearance]",[],1.0
5,19256445,o...0,Polecam,5.0,"Świetna suszarka, polecam każdemu kto zastanaw...",[],[],0,0,2024-12-03 20:13:19,2024-11-16 14:31:18,"A great dryer, I recommend it to anyone who is...",[],[],1.0
6,19574346,a...1,Polecam,5.0,"Suszarka rewelacja, suszenie zajmuje połowę mn...","[bardzo estetyczny wy, głośność pracy, szybkoś...",[],0,0,2025-04-03 06:15:41,2025-03-31 14:31:27,"A revelation dryer, drying takes half less tim...","[very aesthetic, volume of work, heating speed...",[],1.0
7,19150464,l...k,Polecam,5.0,"Mała, lekka, poręczna i dmucha jak szalona. Sz...","[głośność pracy, szybkość nagrzewania, wygląd]",[],0,0,2024-10-22 09:58:18,2024-10-19 01:03:17,"Small, light, handy and blows like crazy. It h...","[volume of work, heating speed, appearance]",[],1.0
8,19321151,k...a,Polecam,5.0,Suszarka jest niesamowita. Spełnia swoje zadan...,[],[],0,0,2024-12-25 12:09:28,2024-12-09 10:42:24,The dryer is amazing. Fulfills its task in 200...,[],[],1.0
9,19632155,e...n,Nie polecam,0.5,Otrzymałam niesprawną suszarkę. Suszarka nie w...,[],[głośność pracy],0,0,2025-05-04 07:34:12,2025-04-17 07:50:52,I received a malfunctioning dryer. The dryer d...,[],[volume of work],0.0


### Calculating basic statistics

In [44]:
opinions_count = opinions.shape[0]
pros_count = opinions.pros_pl.astype(bool).sum()
cons_count = opinions.cons_pl.astype(bool).sum()
pros_cons_count = opinions.apply(lambda o: bool(o.pros_pl) and bool(o.cons_pl), axis=1).sum()
average_stars = opinions.stars.mean()
print(f"Number of opinions about the product: {opinions_count}")
print(f"Number of opinions about the product in which advantages have been listed: {pros_count}")
print(f"Number of opinions about the product in which disadvantages have been listed: {cons_count}")
print(f"Number of opinions about the product in which advantages and disadvantages have been listed: {pros_cons_count}")

print(f"Average score of the product: {average_stars:.2f}")

Number of opinions about the product: 18
Number of opinions about the product in which advantages have been listed: 11
Number of opinions about the product in which disadvantages have been listed: 1
Number of opinions about the product in which advantages and disadvantages have been listed: 0
Average score of the product: 4.64


In [45]:
pros = opinions.pros_en.explode().value_counts()
cons = opinions.cons_en.explode().value_counts()
print(pros)
print(cons)

pros_en
heating speed        11
appearance           11
volume of work       10
very aesthetic        6
small size            3
power                 3
great accessories     3
Name: count, dtype: int64
cons_en
volume of work    1
Name: count, dtype: int64


### Drawing charts

In [46]:
recomendations = opinions.recomend.value_counts(dropna=False).sort_index().reindex(["Polecam", "Nie polecam", None], fill_value=0)
recomendations

recomend
Polecam        16
Nie polecam     1
None            1
Name: count, dtype: int64

In [52]:
if not os.path.exists("./pie_charts"):
    os.mkdir("./pie_charts")
if not os.path.exists("./bar_charts"):
    os.mkdir("./bar_charts")

In [72]:
recomendations.plot.pie(
    label = "",   
    labels = ["Recommend", "Not recommend", "No opinion"],
    colors = ["forestgreen", "crimson", "steelblue"],
    autopct = lambda r: f"{r:.1f}%" if r>0 else ""
)
plt.title(f"Recomendations for product {product_id}\nTotal number of opinions {opinions_count}")
plt.savefig(f"./pie_charts/{product_id}.png")
plt.close()

In [65]:
stars = opinions.stars.value_counts().reindex(list(np.arange(0.5, 5.5, 0.5)), fill_value=0)
stars

stars
0.5     1
1.0     0
1.5     0
2.0     0
2.5     0
3.0     0
3.5     1
4.0     0
4.5     1
5.0    15
Name: count, dtype: int64

In [75]:
plt.figure(figsize=(7,6))
ax = stars.plot.bar(
    color=["forestgreen" if s>3.5 else "crimson" if s<3 else "steelblue" for s in stars.index]
)
plt.bar_label(container=ax.containers[0])
plt.xlabel("Number of stars")
plt.ylabel("Number of opinions")
plt.title(f"Number of opinions about product {product_id}\nwith particular number of stars\n Total number of opinions {opinions_count}")
plt.xticks(rotation=0)
plt.savefig(f"./bar_charts/{product_id}.png")
plt.close()