In [58]:
import dev.example.food.*
import dev.langchain4j.model.*
import dev.langchain4j.model.embedding.*
import dev.langchain4j.data.embedding.*
import dev.langchain4j.store.embedding.*
import dev.langchain4j.model.embedding.onnx.allminilml6v2.*
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
%use dataframe
%use lets-plot
%use lets-plot-gt
%useLatestDescriptors


### Embeddings

In [52]:
import dev.langchain4j.model.embedding.onnx.bgesmallzhv15q.BgeSmallZhV15QuantizedEmbeddingModel
val embeddingModel: EmbeddingModel =   BgeSmallZhV15QuantizedEmbeddingModel()


In [53]:
val response = embeddingModel.embed("kingdom")
val embedding:Embedding = response.content()
embedding.dimension()
//response.tokenUsage()

512

### Consine Similarity

In [41]:
val embeddingModel = AllMiniLmL6V2EmbeddingModel()
infix fun String.similarityTo(text: String): Double {
    val inputOne: Embedding = embeddingModel.embed(this).content()
    val inputTwo: Embedding = embeddingModel.embed(text).content()
    return CosineSimilarity.between(inputOne, inputTwo)
}


In [43]:

"king".similarityTo("monarch")

0.7360189855453844

In [55]:
"the king is in the capital".similarityTo("the king is on the city")


0.7520048821624081

### Similarity Search

In [56]:

val url = "jdbc:postgresql://localhost:5431/langchain"
val username = "user"
val password = "password"

val dbConfig = DatabaseConfiguration(url, username, password)

val tableName = "italianfood"

val df = DataFrame.readSqlTable(dbConfig, tableName, 100)



In [60]:
val referenceVector = java.util.Arrays.toString(embeddingModel.embed("Dessert with Berries, Cream and Chocolate").content().vector())
val query = """SELECT * from (WITH temp AS (SELECT (2 - (embedding <=> '%s')) / 2 AS score, embedding_id, embedding, text, metadata FROM %s) SELECT * FROM temp WHERE score >= %s ORDER BY score desc LIMIT %s) as result""".format(
    referenceVector, "italianfood", 0.6, 7
)

DataFrame.readSqlQuery(dbConfig, query).select("text", "score").print(valueLimit = 100)



org.postgresql.util.PSQLException: ERROR: different vector dimensions 384 and 512

{Index=[[0, 1, 2, 3, 4]], Vector1=[[1, 2, 3, 4, 5]], Vector2=[[5, 4, 3, 2, 1]], Color1=[[red, blue, green, yellow, purple]], Color2=[[red, blue, green, yellow, purple]]}


In [20]:
import kotlin.math.sqrt
//https://kindgeek.com/blog/post/experiments-with-langchain4j-or-java-way-to-llm-powered-applications

// Sample dataset
val data = listOf(
    Pair(1.0, 1.0),
    Pair(2.0, 2.0),
    Pair(3.0, 3.0),
    Pair(4.0, 4.0),
    Pair(5.0, 5.0)
)

// Function to calculate Euclidean distance
fun euclideanDistance(p1: Pair<Double, Double>, p2: Pair<Double, Double>): Double {
    return sqrt((p1.first - p2.first).pow(2) + (p1.second - p2.second).pow(2))
}

// Function to find k-nearest neighbors
fun kNearestNeighbors(data: List<Pair<Double, Double>>, point: Pair<Double, Double>, k: Int): List<Pair<Double, Double>> {
    return data.sortedBy { euclideanDistance(it, point) }.take(k)
}

// Define the point to find neighbors for
val point = Pair(3.5, 3.5)
val k = 3

// Find k-nearest neighbors
val neighbors = kNearestNeighbors(data, point, k)

// Create a DataFrame for the dataset
val df = dataFrameOf("x", "y")(
    data.map { it.first },
    data.map { it.second }
)

// Create a DataFrame for the neighbors
val neighborsDf = dataFrameOf("x", "y")(
    neighbors.map { it.first },
    neighbors.map { it.second }
)

// Plot the dataset and the neighbors
val plot = letsPlot(df.toMap()) +
        geomPoint(color = "blue", size = 3.0) { x = "x"; y = "y" } +
        geomPoint(data = neighborsDf.toMap(), color = "red", size = 5.0) { x = "x"; y = "y" } +
        geomPoint(data = mapOf("x" to listOf(point.first), "y" to listOf(point.second)), color = "green", size = 5.0) { x = "x"; y = "y" }

plot