# Analysing the data

Before starting the task, let's analyze the data. Let's start by getting genre list, amount of lines with given genre and average genre rating.

In [None]:
import pandas as pd

# Load the CSV file
file_path = 'data/anime.csv'
columns = ['anime_id', 'name', 'genre', 'type', 'episodes', 'rating', 'members']
df = pd.read_csv(file_path, usecols=columns)

# Drop rows with missing values in 'genre' or 'rating' to ensure clean analysis
df = df.dropna(subset=['genre', 'rating'])

# Split genres into individual rows
genres_df = df.assign(genre=df['genre'].str.split(',')).explode('genre')

# Remove leading/trailing whitespaces from genre names
genres_df['genre'] = genres_df['genre'].str.strip()

# Group by genre and calculate count and mean rating
genre_stats = genres_df.groupby('genre').agg(
    count=('genre', 'size'),
    avg_rating=('rating', 'mean')
).reset_index()

# Sort by count
genre_stats = genre_stats.sort_values(by='avg_rating', ascending=False)

print(f"Total rows: {len(df)}")
print(f"Total genres: {genres_df['genre'].nunique()}")

# Display the results
print(genre_stats)


            genre  count  avg_rating
14          Josei     52    7.443077
39       Thriller     86    7.382326
21        Mystery    485    7.232412
23         Police    195    7.122051
32        Shounen   1684    7.057553
24  Psychological    226    7.010398
19       Military    416    7.004904
38   Supernatural   1001    7.000380
25        Romance   1437    6.999054
31      Shoujo Ai     54    6.978148
6           Drama   1977    6.977218
27         School   1176    6.972049
29         Seinen    532    6.966353
10          Harem    313    6.965687
33     Shounen Ai     62    6.961613
37    Super Power    451    6.906785
40        Vampire    100    6.884600
30         Shoujo    594    6.875976
26        Samurai    146    6.853836
17   Martial Arts    264    6.819356
16          Magic    747    6.805890
0          Action   2768    6.787150
9            Game    177    6.783333
36         Sports    533    6.777974
12     Historical    798    6.746216
1       Adventure   2316    6.742539
3