In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib import rcParams


rcParams['font.family'] = 'DejaVu Sans'

texts = ["Life is beautiful", "Livet er smukt", "人生は素晴らしい", "La vida és bella"]
languages = ["English", "Danish", "Japanese", "Catalan"]
is_correct = [True, True, True, True] 

texts.reverse()
languages.reverse()

colors = ['#a8e6a3' for _ in is_correct]

fig, ax = plt.subplots(figsize=(10, 4))
y_positions = range(len(texts))

for i, (text, color) in enumerate(zip(texts, colors)):
    if i == 1:
        rcParams['font.family'] = 'Droid Sans Japanese'
    else:
        rcParams['font.family'] = 'DejaVu Sans'
    text_box = Rectangle((-0.5, i - 0.4), width=4, height=0.8,
                         edgecolor='black', facecolor="white")
    ax.add_patch(text_box)
    ax.text(1.5, i, text, ha="center", va="center", fontsize=12, fontweight="bold")

for i, (lang, color) in enumerate(zip(languages, colors)):
    lang_box = Rectangle((5, i - 0.4), width=2, height=0.8,
                         edgecolor='black', facecolor=color)
    ax.add_patch(lang_box)
    ax.text(6, i, lang, ha="center", va="center", fontsize=12, fontweight="bold")

for i in y_positions:
    ax.plot([3.5, 5], [i, i], color="black", lw=1)

ax.set_xlim(-1, 8)
ax.set_ylim(-1, len(texts))
ax.axis("off")
plt.savefig("langid.pdf", bbox_inches='tight')
plt.show()



In [None]:
import matplotlib.font_manager
print([f.name for f in matplotlib.font_manager.fontManager.ttflist])

### Train time

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")

models = ['textcat', 'Naïve Bayes', 'FastText']
instances = [500000, 1000000, 2033999, 6101997]
training_times = {
    'textcat': [184.61, 324.86, 541.60, 1330.26],
    'Naïve Bayes': [292.68, 541.79, 1072.8, 2901.12],
    'FastText': [378.49, 752.1, 1591.935, 4535.13]
}

training_times = {key: [value / 60 for value in values] for key, values in training_times.items()}

print(training_times)


plt.figure(figsize=(8, 6))

for model in models:
    plt.plot(instances, training_times[model], marker='o', label=model)

plt.xlabel('Number of Instances', fontsize=20)
plt.ylabel('Training Time (minutes)', fontsize=20)
plt.grid(True, which="both", linestyle='--', linewidth=0.5)

plt.legend(fontsize=18)
plt.tick_params(axis='both', labelsize=16)


plt.savefig(f"train.pdf", format="pdf", bbox_inches="tight")
plt.show()
