### Define imports

In [1]:
%useLatestDescriptors
%use dataframe
%use kandy
%trackExecution generated
%use lib-ext(0.11.0-398)


### Explore the data

In [52]:
import org.jetbrains.kotlinx.dataframe.codeGen.generateCode
import org.jetbrains.kotlinx.dataframe.codeGen.generateInterfaces

val df = DataFrame.read("/Users/urs/development/github/ai/kotlin-ai-talk/langchain4j/src/main/resources/dataset/burglaries.csv", delimiter = ';')
df.head()

//df.describe()
//df.generateCode()


Datetime,Latitude,Longitude,Neighborhood,TypeOfProperty,PropertyValue,AmountStolen,CategoryOfItemsStolen,Temperature,ArrestMade
2024-08-14T01:25:56,407079974000000,-740088826000000,Downtown,apartment,6682,1782,art,-3.89°C,True
2024-08-28T01:53:52,407080218000000,-740014447000000,Downtown,shop,372622,50237,jewelry,19.43°C,False
2024-08-17T07:09:39,407078386000000,-740134814000000,Downtown,hotel,163754,26034,cash,23.25°C,False
2024-08-15T08:34:38,407077988000000,-74007607000000,Downtown,restaurant,1348,152,food,5.13°C,True
2024-08-18T19:03:37,407082332000000,-740005406000000,Downtown,apartment,45896,7316,electronics,16.32°C,True


In [79]:
import kotlinx.datetime.DayOfWeek

val dfExtra = df.add("DayOfWeek"){Datetime.dayOfWeek}.add("hourOfDay"){Datetime.hour}
dfExtra.generateCode()

dfExtra.generateInterfaces()


@DataSchema
interface _DataFrameType4 {
    val AmountStolen: kotlin.Int
    val ArrestMade: kotlin.Boolean
    val CategoryOfItemsStolen: kotlin.String
    val Datetime: kotlinx.datetime.LocalDateTime
    val DayOfWeek: java.time.DayOfWeek
    val Latitude: kotlin.Double
    val Longitude: kotlin.Double
    val Neighborhood: kotlin.String
    val PropertyValue: kotlin.Int
    val Temperature: kotlin.String
    val TypeOfProperty: kotlin.String
    val hourOfDay: kotlin.Int
}

In [65]:
dfExtra.select("hourOfDay", "DayOfWeek", "AmountStolen", "CategoryOfItemsStolen", "TypeOfProperty")

hourOfDay,DayOfWeek,AmountStolen,CategoryOfItemsStolen,TypeOfProperty
1,WEDNESDAY,1782,art,apartment
1,WEDNESDAY,50237,jewelry,shop
7,SATURDAY,26034,cash,hotel
8,THURSDAY,152,food,restaurant
19,SUNDAY,7316,electronics,apartment
1,WEDNESDAY,1782,art,apartment
1,WEDNESDAY,88237,jewelry,shop
7,SATURDAY,26034,cash,hotel
8,THURSDAY,152,food,restaurant
19,SUNDAY,7316,electronics,apartment


In [82]:
fun getDayPeriodAll(hourOfDay: Int): String {
    return when (hourOfDay) {
        in 5..7 -> "Early-Morning-5_7" // Covers 5:00 AM to 7:59 AM
        in 8..10 -> "Breakfast-Time-8_10" // Covers 8:00 AM to 10:59 AM
        in 11..13 -> "Lunch-Time-11_13" // Covers 11:00 AM to 1:59 PM
        in 14..16 -> "Afternoon-14_16" // Covers 2:00 PM to 4:59 PM
        in 17..19 -> "Evening-17_19" // Covers 5:00 PM to 7:59 PM
        in 20..20 -> "Dinner-Time-20_20" // Specifically covers 8:00 PM to 8:59 PM
        in 21..23 -> "Night-21_23" // Covers 9:00 PM to 11:59 PM
        in 0..4 -> "Late-Night-0_4" // Covers 12:00 AM to 4:59 AM
        24, 0 -> "Midnight-0_1" // Specifically covers the transition at midnight from 12:00 AM to 1:00 AM
        else -> "Invalid-Hour"
    }
}
val result = dfExtra.add("periodOfDay"){getDayPeriodAll(hourOfDay)}
    .filter { CategoryOfItemsStolen == "food" }
    .groupBy("DayOfWeek", "periodOfDay")
    .aggregate { count() into "count" }
    .sortByDesc(){it["count"]}

// Print the result
result.print()

    DayOfWeek         periodOfDay count
  0  SATURDAY    Lunch-Time-11_13     3
  1  THURSDAY Breakfast-Time-8_10     2
  2    FRIDAY Breakfast-Time-8_10     2
  3  THURSDAY   Early-Morning-5_7     1
  4    SUNDAY    Lunch-Time-11_13     1
  5  THURSDAY       Evening-17_19     1
  6   TUESDAY    Lunch-Time-11_13     1
  7    MONDAY   Early-Morning-5_7     1
  8    FRIDAY       Evening-17_19     1
  9  THURSDAY    Lunch-Time-11_13     1
 10    SUNDAY       Evening-17_19     1
 11   TUESDAY       Evening-17_19     1
 12   TUESDAY   Dinner-Time-20_20     1
 13    FRIDAY   Dinner-Time-20_20     1
 14    MONDAY       Evening-17_19     1



### Visualize the data

In [83]:
    val propertyColors = (listOf("apartment", "hotel", "restaurant", "shop","industrial") zip listOf("green", "yellow", "blue", "red", "black")).toMap()

val key = File("/Users/urs/.googlemaps").readText().trim()
    val markers = df
        .filter { TypeOfProperty == "restaurant" }
        .map { "&markers=color:${propertyColors.getValue(TypeOfProperty)}|label:${TypeOfProperty.first().uppercase()}|${Latitude.toString().toCoordinate()},${Longitude.toString().toCoordinate()}" }.joinToString("")
    
    val url = "https://maps.google.com/maps/api/staticmap?center=Finance+District,New+York,NY&zoom=14&size=512x512&maptype=roadmap$markers&sensor=false&key=$key"
    val out = File("out.png")
    out.writeBytes(URL(url).openStream().readAllBytes())
//Image.embedSrc(URL(url).openStream().readAllBytes(), "out.png"))
    Image(out, embed = true).withWidth(400)

In [84]:
dfExtra.plot {
    x(hourOfDay) {
        //scale = continuous(1..24)
        axis {
            name = "Hour Of Day" // Axis label
//                breaks(format = "2") // Custom axis breaks
        }
    }
    y(TypeOfProperty) {
        axis.name = "Type Of Property"
    }

    points {
        size(AmountStolen){
            legend.type = LegendType.None
            scale = continuous(range = 5.0..70.0)
        }
        color(AmountStolen)
      
    }
    layout.size = 1200 to 700
}

In [39]:
dfExtra.plot {
    x(TypeOfProperty) {
        //scale = continuous(1..24)
        axis {
            name = "Type of Property" // Axis label
//                breaks(format = "2") // Custom axis breaks
        }
    }
    y(CategoryOfItemsStolen) {
        axis.name = "Goods Stolen"
    }

    points {
        size(AmountStolen){
            legend.type = LegendType.None
            scale = continuous(range = 5.0..60.0)
        }
        color(AmountStolen) {
            scale = continuous(range = Color.YELLOW..Color.RED)   
        }

    }
    layout.size = 1200 to 700
}

In [55]:
dfExtra.plot {
    x(hourOfDay) {
        //scale = continuous(1..24)
        axis {
            name = "Hour Of Day" // Axis label
//                breaks(format = "2") // Custom axis breaks
        }
    }
    y(TypeOfProperty) {
        axis.name = "Type Of Property"
    }

    points {
        size(AmountStolen){
            legend.type = LegendType.None
            scale = continuous(range = 5.0..70.0)
            
        }
        color(AmountStolen)
        symbol(CategoryOfItemsStolen) {
            legend {
                type = LegendType.DiscreteLegend()
                name = "Stolen Goods"
                //breaksLabeled("4" to "4WD", "r" to "RWD", "f" to "FWD")
            }
        }

    }
    layout.size = 1200 to 700
}

#### Fix coordinates

In [97]:
import org.jetbrains.kotlinx.dataframe.codeGen.generateCode
fun String.toCoordinate() =  this.replace(".", "").replace("E7", "").replace("E8", "").let{n -> (if(n.startsWith("-")) 3 else 2).let{n.take(it) + "." + n.drop(it)}.toDouble()}

val dfr = DataFrame.read("/Users/urs/development/github/ai/kotlin-ai-talk/langchain4j/src/main/resources/dataset/google_maps_restaurants(cleaned).csv")
dfr.schema()
dfr.describe()
dfr.generateCode()
dfr.select("Lat","Lon")
val latLongValues = dfr.map { it.get("Lat").toString().toCoordinate() to it.get("Lon").toString().toCoordinate()}.toList()
latLongValues
//latLongValues.take(10).map{(f, s) -> String.format("%.5f", f) to String.format("%.5f", s)}.joinToString("\n")
//val d = 40.880365
//d
//dfr.map{ it.get("Lat").toString().replace(".", "").replace("E7", "").replace("E8", "").let{it.take(2) + "." + it.drop(2)}}

//dfr.head()
fun euclideanDistance(p1: Pair<Double, Double>, p2: Pair<Double, Double>): Double {
    return sqrt((p1.first - p2.first).pow(2) + (p2.second - p2.second).pow(2))
}

val referencePoint = Pair(40.7079974,-74.0088826)
// Function to sort latitude and longitude pairs
fun sortLatLong(values: List<Pair<Double, Double>>, referencePoint: Pair<Double, Double>): List<Pair<Double, Double>> {
    return values.sortedBy { euclideanDistance(it, referencePoint) }
}
val sortedValues = sortLatLong(latLongValues, referencePoint)

println(sortedValues.take(45).joinToString("\n"))

File("/Users/urs/development/github/ai/kotlin-ai-talk/langchain4j/src/main/resources/dataset/burglaries.csv").readLines().drop(1).withIndex().map { (indx, line) -> 
    line.replace("lat", sortedValues[indx].first.toString()).replace("lon", sortedValues[indx].second.toString())   
}.joinToString("\n")


(40.7079974, -74.0088826)
(40.7080218, -74.0014447)
(40.7078386, -74.0134814)
(40.7077988, -74.007607)
(40.7082332, -74.0005406)
(40.7077159, -73.961362)
(40.7084781, -74.013525)
(40.7074372, -73.9032313)
(40.7085845, -74.0140166)
(40.70733, -74.0063713)
(40.7071242, -74.0083922)
(40.708882, -74.0114326)
(40.7092337, -74.0399722)
(40.706529, -74.0844223)
(40.7065047, -74.0077715)
(40.7095008, -74.0126642)
(40.7063523, -74.0138086)
(40.7062148, -73.9228243)
(40.7062008, -74.0938172)
(40.7060908, -74.0072932)
(40.710158, -73.899559)
(40.7104848, -74.1569801)
(40.705498, -74.007364)
(40.7052835, -74.0996135)
(40.7052041, -73.8965961)
(40.7051201, -74.0140463)
(40.7049382, -74.0085917)
(40.7048499, -74.0095927)
(40.7112962, -74.0927397)
(40.7046878, -74.0116672)
(40.711468, -73.9445361)
(40.7040298, -74.01048)
(40.7037947, -74.0801455)
(40.7122007, -74.0632807)
(40.7034721, -73.9411468)
(40.7127482, -74.015579)
(40.7032457, -74.0950441)
(40.7029533, -73.9938431)
(40.7027894, -73.9081067)
(

 2024-08-14 01:25:56;40.7079974;-74.0088826;Downtown;apartment;6,682.59;1,782.45;art;-3.89°C;True
 2024-08-28 01:53:52;40.7080218;-74.0014447;Downtown;industrial;372,622.40;88,237.03;jewelry;19.43°C;False
 2024-08-17 07:09:39;40.7078386;-74.0134814;Downtown;house;163,754.64;26,034.85;cash;23.25°C;False
 2024-08-15 05:34:38;40.7077988;-74.007607;Downtown;restaurant;1,348.93;152.55;food;5.13°C;True
 2024-08-18 19:03:37;40.7082332;-74.0005406;Downtown;apartment;45,896.52;7,316.38;electronics;16.32°C;True
 2024-08-14 01:25:56;40.7077159;-73.961362;Downtown;apartment;6,682.59;1,782.45;art;-3.89°C;True
 2024-08-28 01:53:52;40.7084781;-74.013525;Downtown;industrial;372,622.40;88,237.03;jewelry;19.43°C;False
 2024-08-17 07:09:39;40.7074372;-73.9032313;Downtown;house;163,754.64;26,034.85;cash;23.25°C;False
 2024-08-15 05:34:38;40.7085845;-74.0140166;Downtown;restaurant;1,348.93;152.55;food;5.13°C;True
 2024-08-18 19:03:37;40.70733;-74.0063713;Downtown;apartment;45,896.52;7,316.38;electronics;16