In [1]:
var df = DataFrame.readCSV(fileOrUrl = "../src/main/resources/netflix_titles.csv", delimiter = ',')

df.head()

In [2]:
df.describe()

**Content Type on Netflix**

In [3]:
val contentTypes = df.groupBy { type }.count()
contentTypes

In [4]:
val blankTheme = theme {
    global.line {
        blank = true
    }
    blankAxes()
}

contentTypes.plot {
    pie {
        slice(count)
        fillColor(type)
        size = 30.0
        stroke = 1.0
        strokeColor = Color.WHITE
        hole = 0.5
    }
    layout {
        theme(blankTheme)
    }
 }

**Growth in content over the years**

In [5]:
val contentGrowth = df
    .groupBy { type and release_year }
    .count()
    .sortBy { release_year.desc() }
    
contentGrowth

In [6]:
contentGrowth.plot {
    line {
        x(release_year)
        y(count)
        color("type")
        width = 2.0
        alpha = 0.6
    }
}

**TV-Show Directors from the USA with the most of content**

In [7]:
val directors = df
    .filter { country == "United States" && type == "Movie" && director != null }
    .groupBy { director }
    .count()
    .sortByDesc("count")
    .take(10)

directors

In [8]:
directors.plot {
    layout.title = "TV-Show Directors from the USA with the most of content"
    barsH {
        y(director) { axis.name = "Directors" }
        x(count) { axis.name = "Number of TV-Shows" }
        alpha = 0.75
        fillColor(director) {
            scale = categoricalColorHue()
        }
    }
}

**Rating Heatmap through the years**

In [9]:
val ratingsByCountry = df
    .filter { rating != null && country == "United States" && release_year > 2013 }
    .groupBy { rating and release_year }
    .count()
    .sortByDesc("release_year")

ratingsByCountry

In [10]:
ratingsByCountry.plot {
    tiles {
        x(rating)
        y(release_year) {
            scale = categorical()
            axis.breaks(format = "d")
        }
        
        fillColor(count) {
             scale = continuous(Color.RED..Color.GREEN)
        }
    }
}