Skip to content

Commit 6994b36

Browse files
committed
Add documentation for E5VEmbeddings, detailing usage for multimodal embeddings in both Python and Scala, including examples and sources.
1 parent 29e989b commit 6994b36

File tree

1 file changed

+133
-0
lines changed

1 file changed

+133
-0
lines changed
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{%- capture title -%}
2+
E5VEmbeddings
3+
{%- endcapture -%}
4+
5+
{%- capture description -%}
6+
Universal multimodal embeddings using E5-V.
7+
8+
E5-V is a multimodal embedding model that bridges the modality gap between text and images, enabling strong performance in cross-modal retrieval, classification, clustering, and more. It supports both image+text and text-only embedding scenarios, and is fine-tuned from lmms-lab/llama3-llava-next-8b. The default model is `"e5v_1_5_7b_int4"`.
9+
10+
Note that this annotator is only supported for Spark Versions 3.4 and up.
11+
12+
Pretrained models can be loaded with `pretrained` of the companion object:
13+
14+
```scala
15+
val embeddings = E5VEmbeddings.pretrained()
16+
.setInputCols("image_assembler")
17+
.setOutputCol("e5v")
18+
```
19+
20+
For available pretrained models please see the
21+
[Models Hub](https://sparknlp.org/models?q=E5V).
22+
23+
For extended examples of usage, see
24+
[E5VEmbeddingsTestSpec](https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/embeddings/E5VEmbeddingsTestSpec.scala).
25+
26+
**Sources** :
27+
28+
- [E5-V: Universal Embeddings with Multimodal Large Language Models (arXiv)](https://arxiv.org/abs/2407.12580)
29+
- [Hugging Face Model Card](https://huggingface.co/royokong/e5-v)
30+
- [E5-V Github Repository](https://github.com/kongds/E5-V)
31+
{%- endcapture -%}
32+
33+
{%- capture input_anno -%}
34+
IMAGE
35+
{%- endcapture -%}
36+
37+
{%- capture output_anno -%}
38+
SENTENCE_EMBEDDINGS
39+
{%- endcapture -%}
40+
41+
{%- capture python_example -%}
42+
# Image + Text Embedding
43+
import sparknlp
44+
from sparknlp.base import *
45+
from sparknlp.annotator import *
46+
from pyspark.ml import Pipeline
47+
from pyspark.sql.functions import lit
48+
49+
image_df = spark.read.format("image").option("dropInvalid", True).load(imageFolder)
50+
imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"
51+
test_df = image_df.withColumn("text", lit(imagePrompt))
52+
imageAssembler = ImageAssembler() \
53+
.setInputCol("image") \
54+
.setOutputCol("image_assembler")
55+
e5vEmbeddings = E5VEmbeddings.pretrained() \
56+
.setInputCols(["image_assembler"]) \
57+
.setOutputCol("e5v")
58+
pipeline = Pipeline().setStages([
59+
imageAssembler,
60+
e5vEmbeddings
61+
])
62+
result = pipeline.fit(test_df).transform(test_df)
63+
result.select("e5v.embeddings").show(truncate=False)
64+
65+
# Text-Only Embedding
66+
from sparknlp.util import EmbeddingsDataFrameUtils
67+
textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"
68+
textDesc = "A cat sitting in a box."
69+
nullImageDF = spark.createDataFrame(
70+
spark.sparkContext.parallelize([EmbeddingsDataFrameUtils.emptyImageRow]),
71+
EmbeddingsDataFrameUtils.imageSchema)
72+
textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc)))
73+
e5vEmbeddings = E5VEmbeddings.pretrained() \
74+
.setInputCols(["image"]) \
75+
.setOutputCol("e5v")
76+
result = e5vEmbeddings.transform(textDF)
77+
result.select("e5v.embeddings").show(truncate=False)
78+
{%- endcapture -%}
79+
80+
{%- capture scala_example -%}
81+
// Image + Text Embedding
82+
import org.apache.spark.sql.functions.lit
83+
import com.johnsnowlabs.nlp.base.ImageAssembler
84+
import com.johnsnowlabs.nlp.embeddings.E5VEmbeddings
85+
import org.apache.spark.ml.Pipeline
86+
87+
val imageDF = spark.read.format("image").option("dropInvalid", value = true).load(imageFolder)
88+
val imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"
89+
val testDF = imageDF.withColumn("text", lit(imagePrompt))
90+
val imageAssembler = new ImageAssembler().setInputCol("image").setOutputCol("image_assembler")
91+
val e5vEmbeddings = E5VEmbeddings.pretrained()
92+
.setInputCols("image_assembler")
93+
.setOutputCol("e5v")
94+
val pipeline = new Pipeline().setStages(Array(imageAssembler, e5vEmbeddings))
95+
val result = pipeline.fit(testDF).transform(testDF)
96+
result.select("e5v.embeddings").show(truncate = false)
97+
98+
// Text-Only Embedding
99+
import com.johnsnowlabs.nlp.util.EmbeddingsDataFrameUtils.{emptyImageRow, imageSchema}
100+
val textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"
101+
val textDesc = "A cat sitting in a box."
102+
val nullImageDF = spark.createDataFrame(spark.sparkContext.parallelize(Seq(emptyImageRow)), imageSchema)
103+
val textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc)))
104+
val e5vEmbeddings = E5VEmbeddings.pretrained()
105+
.setInputCols("image")
106+
.setOutputCol("e5v")
107+
val result2 = e5vEmbeddings.transform(textDF)
108+
result2.select("e5v.embeddings").show(truncate = false)
109+
{%- endcapture -%}
110+
111+
{%- capture api_link -%}
112+
[E5VEmbeddings](/api/com/johnsnowlabs/nlp/embeddings/E5VEmbeddings)
113+
{%- endcapture -%}
114+
115+
{%- capture python_api_link -%}
116+
[E5VEmbeddings](/api/python/reference/autosummary/sparknlp/annotator/cv/e5v_embeddings/index.html#sparknlp.annotator.cv.e5v_embeddings.E5VEmbeddings)
117+
{%- endcapture -%}
118+
119+
{%- capture source_link -%}
120+
[E5VEmbeddings](https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/main/scala/com/johnsnowlabs/nlp/embeddings/E5VEmbeddings.scala)
121+
{%- endcapture -%}
122+
123+
{% include templates/anno_template.md
124+
title=title
125+
description=description
126+
input_anno=input_anno
127+
output_anno=output_anno
128+
python_example=python_example
129+
scala_example=scala_example
130+
api_link=api_link
131+
python_api_link=python_api_link
132+
source_link=source_link
133+
%}

0 commit comments

Comments
 (0)