In [9]:
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.api.head
import org.jetbrains.kotlinx.dataframe.io.readCSV

var df = DataFrame.readCSV(fileOrUrl = "../src/main/resources/world-data-2023.csv", delimiter = ',')
df.head(10)

In [6]:
df.describe()

**Task 1: Find top-10 countries by Total CO2 emissions**

In [25]:
val df1 = df.select { country and co2_emissions }.sortByDesc { co2_emissions }.take(10)

**Task 2: Build a Bar Plot**

In [26]:
df1.plot { 
    bars { 
        x(country)
        y(co2_emissions)
     }
 }

**Task 3: Find top-10 countries by Total CO2 emissions per Person**

val df2 = df.fillNA { population and co2_emissions }.with { 1.0 }  // fill missing flight numbers
    .convert { population and co2_emissions}.toDouble()

In [28]:
val df3 = df2.add { "co2_density" from { co2_emissions / population } }

In [29]:
df3
    .select { country and co2_density }
    .sortByDesc { co2_density }
    .take(10)

**Task 4: Build a Bar Gradient for absolute CO2 Emission and CO2 Emission per Person**

In [32]:
df3
    .sortByDesc { co2_emissions }
    .take(10)
    .plot {
    layout.title = "CO2 Emissions Analysis"
    bars {
        x(country) { axis.name = "Country" }
        y(co2_emissions) { axis.name = "CO2 Emission" }
        fillColor(co2_density) {
            legend.name = "CO2 Emissions per Person"
            scale = continuous(Color.GREEN..Color.RED)
        }
        alpha = 0.8
    }
}

**Task 5: Calculate middle geographical point and average forested area for group of countries with the same official language**

In [40]:
val rawLanguages = df
    .select { official_language and forested_area and longitude and latitude  }
    .filter { official_language!= null && official_language!= "None" && forested_area!= null }
    .update { forested_area }.with { it!!.replace("%", "") }
    .convert { forested_area }.toDouble()
    
rawLanguages

In [45]:
val languages = rawLanguages
        .groupBy { official_language }
        .aggregate { 
            mean { forested_area } into "forested_area"
            mean { latitude }  into "latitude"
            mean { longitude } into "longitude" 
        }
        
languages

**Task 6: Find the best option to visualize this dataframe among charts available with Kandy**

In [58]:
languages.plot {
    points {
        x(longitude)
        y(latitude)
        color(official_language) {
            scale = categorical()
        }
        size(forested_area) {
            legend.type = LegendType.None
            scale = continuous(transform = Transformation.LOG10)
        }
    }
}