In [None]:
import sys
import os

project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.append(project_root)

from configs import utils
import polars as pl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pl.Config.set_tbl_hide_column_data_types(True)

df_dog_info = utils.load_data("../data/raw", "DogInfo.csv")

In [None]:
df_dog_info.describe()

In [None]:
df_dog_info.null_count()

In [None]:
counts= df_dog_info.group_by('Breed').agg(pl.len().alias('count')).sort('count', descending=True)

plt.figure(figsize=(14, 6))
sns.barplot(x='Breed', y='count', data=counts)
plt.title('Distribuição de Raças de Cães')
plt.xlabel('Raça')
plt.ylabel('Quantidade')
plt.xticks(rotation=60, ha='right')
plt.tight_layout()
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(integer=True)) # type: ignore
plt.savefig("../reports/figures/distribuicao_racas_caes.png")
plt.show()


In [None]:
df_dog_info = df_dog_info.with_columns(
    (pl.col("Age months") / 12).alias("Age")
)

plt.figure(figsize=(10, 6))
sns.histplot(df_dog_info.select("Age"), bins=30, kde=True)
plt.title('Distribuição de Idade dos Cães')
plt.xlabel('Idade (em anos)')
plt.ylabel('Quantidade')
plt.tight_layout()
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(integer=True)) # type: ignore
plt.savefig("../reports/figures/distribuicao_idade_caes.png")
plt.show()

print(df_dog_info.select("Age").describe())

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df_dog_info.select("Weight"), bins=30, kde=True)
plt.title('Distribuição de Peso dos Cães')
plt.xlabel('Peso (em kg)')
plt.ylabel('Quantidade')
plt.tight_layout()
plt.gca().yaxis.set_major_locator(plt.MaxNLocator(integer=True)) # type: ignore
plt.savefig("../reports/figures/distribuicao_peso_caes.png")
plt.show()

print(df_dog_info.select("Weight").describe())


In [None]:
gender_counts = df_dog_info.group_by('Gender').agg(pl.len().alias('count'))

plt.figure(figsize=(8, 6))
plt.pie(gender_counts['count'], labels=("Fêmeas", "Machos"), autopct='%1.1f%%')
plt.title('Distribuição por Sexo')
plt.tight_layout()
plt.savefig("../reports/figures/distribuicao_sexo_caes.png")
plt.show()


In [None]:
neutered_counts = df_dog_info.group_by('NeuteringStatus').agg(pl.len().alias('count'))

plt.figure(figsize=(8, 6))
plt.pie(neutered_counts['count'], labels=("Castrado", "Não Castrado"), autopct='%1.1f%%')
plt.title('Distribuição por Castração')
plt.tight_layout()
plt.savefig("../reports/figures/distribuicao_castracao_caes.png")
plt.show()
