|
| 1 | +{%- capture title -%} |
| 2 | +E5VEmbeddings |
| 3 | +{%- endcapture -%} |
| 4 | + |
| 5 | +{%- capture description -%} |
| 6 | +Universal multimodal embeddings using E5-V. |
| 7 | + |
| 8 | +E5-V is a multimodal embedding model that bridges the modality gap between text and images, enabling strong performance in cross-modal retrieval, classification, clustering, and more. It supports both image+text and text-only embedding scenarios, and is fine-tuned from lmms-lab/llama3-llava-next-8b. The default model is `"e5v_1_5_7b_int4"`. |
| 9 | + |
| 10 | +Note that this annotator is only supported for Spark Versions 3.4 and up. |
| 11 | + |
| 12 | +Pretrained models can be loaded with `pretrained` of the companion object: |
| 13 | + |
| 14 | +```scala |
| 15 | +val embeddings = E5VEmbeddings.pretrained() |
| 16 | + .setInputCols("image_assembler") |
| 17 | + .setOutputCol("e5v") |
| 18 | +``` |
| 19 | + |
| 20 | +For available pretrained models please see the |
| 21 | +[Models Hub](https://sparknlp.org/models?q=E5V). |
| 22 | + |
| 23 | +For extended examples of usage, see |
| 24 | +[E5VEmbeddingsTestSpec](https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/embeddings/E5VEmbeddingsTestSpec.scala). |
| 25 | + |
| 26 | +**Sources** : |
| 27 | + |
| 28 | +- [E5-V: Universal Embeddings with Multimodal Large Language Models (arXiv)](https://arxiv.org/abs/2407.12580) |
| 29 | +- [Hugging Face Model Card](https://huggingface.co/royokong/e5-v) |
| 30 | +- [E5-V Github Repository](https://github.com/kongds/E5-V) |
| 31 | +{%- endcapture -%} |
| 32 | + |
| 33 | +{%- capture input_anno -%} |
| 34 | +IMAGE |
| 35 | +{%- endcapture -%} |
| 36 | + |
| 37 | +{%- capture output_anno -%} |
| 38 | +SENTENCE_EMBEDDINGS |
| 39 | +{%- endcapture -%} |
| 40 | + |
| 41 | +{%- capture python_example -%} |
| 42 | +# Image + Text Embedding |
| 43 | +import sparknlp |
| 44 | +from sparknlp.base import * |
| 45 | +from sparknlp.annotator import * |
| 46 | +from pyspark.ml import Pipeline |
| 47 | +from pyspark.sql.functions import lit |
| 48 | + |
| 49 | +image_df = spark.read.format("image").option("dropInvalid", True).load(imageFolder) |
| 50 | +imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 51 | +test_df = image_df.withColumn("text", lit(imagePrompt)) |
| 52 | +imageAssembler = ImageAssembler() \ |
| 53 | + .setInputCol("image") \ |
| 54 | + .setOutputCol("image_assembler") |
| 55 | +e5vEmbeddings = E5VEmbeddings.pretrained() \ |
| 56 | + .setInputCols(["image_assembler"]) \ |
| 57 | + .setOutputCol("e5v") |
| 58 | +pipeline = Pipeline().setStages([ |
| 59 | + imageAssembler, |
| 60 | + e5vEmbeddings |
| 61 | +]) |
| 62 | +result = pipeline.fit(test_df).transform(test_df) |
| 63 | +result.select("e5v.embeddings").show(truncate=False) |
| 64 | + |
| 65 | +# Text-Only Embedding |
| 66 | +from sparknlp.util import EmbeddingsDataFrameUtils |
| 67 | +textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 68 | +textDesc = "A cat sitting in a box." |
| 69 | +nullImageDF = spark.createDataFrame( |
| 70 | + spark.sparkContext.parallelize([EmbeddingsDataFrameUtils.emptyImageRow]), |
| 71 | + EmbeddingsDataFrameUtils.imageSchema) |
| 72 | +textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc))) |
| 73 | +e5vEmbeddings = E5VEmbeddings.pretrained() \ |
| 74 | + .setInputCols(["image"]) \ |
| 75 | + .setOutputCol("e5v") |
| 76 | +result = e5vEmbeddings.transform(textDF) |
| 77 | +result.select("e5v.embeddings").show(truncate=False) |
| 78 | +{%- endcapture -%} |
| 79 | + |
| 80 | +{%- capture scala_example -%} |
| 81 | +// Image + Text Embedding |
| 82 | +import org.apache.spark.sql.functions.lit |
| 83 | +import com.johnsnowlabs.nlp.base.ImageAssembler |
| 84 | +import com.johnsnowlabs.nlp.embeddings.E5VEmbeddings |
| 85 | +import org.apache.spark.ml.Pipeline |
| 86 | + |
| 87 | +val imageDF = spark.read.format("image").option("dropInvalid", value = true).load(imageFolder) |
| 88 | +val imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 89 | +val testDF = imageDF.withColumn("text", lit(imagePrompt)) |
| 90 | +val imageAssembler = new ImageAssembler().setInputCol("image").setOutputCol("image_assembler") |
| 91 | +val e5vEmbeddings = E5VEmbeddings.pretrained() |
| 92 | + .setInputCols("image_assembler") |
| 93 | + .setOutputCol("e5v") |
| 94 | +val pipeline = new Pipeline().setStages(Array(imageAssembler, e5vEmbeddings)) |
| 95 | +val result = pipeline.fit(testDF).transform(testDF) |
| 96 | +result.select("e5v.embeddings").show(truncate = false) |
| 97 | + |
| 98 | +// Text-Only Embedding |
| 99 | +import com.johnsnowlabs.nlp.util.EmbeddingsDataFrameUtils.{emptyImageRow, imageSchema} |
| 100 | +val textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n" |
| 101 | +val textDesc = "A cat sitting in a box." |
| 102 | +val nullImageDF = spark.createDataFrame(spark.sparkContext.parallelize(Seq(emptyImageRow)), imageSchema) |
| 103 | +val textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc))) |
| 104 | +val e5vEmbeddings = E5VEmbeddings.pretrained() |
| 105 | + .setInputCols("image") |
| 106 | + .setOutputCol("e5v") |
| 107 | +val result2 = e5vEmbeddings.transform(textDF) |
| 108 | +result2.select("e5v.embeddings").show(truncate = false) |
| 109 | +{%- endcapture -%} |
| 110 | + |
| 111 | +{%- capture api_link -%} |
| 112 | +[E5VEmbeddings](/api/com/johnsnowlabs/nlp/embeddings/E5VEmbeddings) |
| 113 | +{%- endcapture -%} |
| 114 | + |
| 115 | +{%- capture python_api_link -%} |
| 116 | +[E5VEmbeddings](/api/python/reference/autosummary/sparknlp/annotator/cv/e5v_embeddings/index.html#sparknlp.annotator.cv.e5v_embeddings.E5VEmbeddings) |
| 117 | +{%- endcapture -%} |
| 118 | + |
| 119 | +{%- capture source_link -%} |
| 120 | +[E5VEmbeddings](https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/main/scala/com/johnsnowlabs/nlp/embeddings/E5VEmbeddings.scala) |
| 121 | +{%- endcapture -%} |
| 122 | + |
| 123 | +{% include templates/anno_template.md |
| 124 | + title=title |
| 125 | + description=description |
| 126 | + input_anno=input_anno |
| 127 | + output_anno=output_anno |
| 128 | + python_example=python_example |
| 129 | + scala_example=scala_example |
| 130 | + api_link=api_link |
| 131 | + python_api_link=python_api_link |
| 132 | + source_link=source_link |
| 133 | +%} |
0 commit comments