In [5]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

sentences = [
    'The new movie is awesome',
    'This recent movie is so good',
]

model = SentenceTransformer('Supabase/gte-small')
embeddings = model.encode(sentences)
print(cos_sim(embeddings[0], embeddings[1]))

No sentence-transformers model found with name Supabase/gte-small. Creating a new one with MEAN pooling.


tensor([[0.8980]])


In [6]:
# Two lists of sentences
sentences1 = [
    "The cat sits outside",
    "A man is playing guitar",
    "The new movie is awesome",
]

sentences2 = [
    "The dog plays in the garden",
    "A woman watches TV",
    "中文是这个",
]

# Compute embedding for both lists
embeddings1 = model.encode(sentences1, convert_to_tensor=True)
embeddings2 = model.encode(sentences2, convert_to_tensor=True)

# Compute cosine-similarities
cosine_scores = cos_sim(embeddings1, embeddings2)

# Output the pairs with their score
for i in range(len(sentences1)):
    print("{} \t\t {} \t\t Score: {:.4f}".format(
        sentences1[i], sentences2[i], cosine_scores[i][i]
    ))

The cat sits outside 		 The dog plays in the garden 		 Score: 0.8200
A man is playing guitar 		 A woman watches TV 		 Score: 0.7016
The new movie is awesome 		 中文是这个 		 Score: 0.7315


In [7]:
# Our sentences to encode
sentences = [
    "This framework generates embeddings for each input sentence",
    "Sentences are passed as a list of string.",
    "The quick brown fox jumps over the lazy dog."
]

# Sentences are encoded by calling model.encode()
embeddings = model.encode(sentences)

# Print the embeddings
# for sentence, embedding in zip(sentences, embeddings):
    # print("Sentence:", sentence)
    # print("Embedding:", embedding)
    # print("")

Sentence: This framework generates embeddings for each input sentence
Embedding: [-4.56867069e-01 -6.03743754e-02  2.77015604e-02 -1.49323896e-01
 -2.58533675e-02  3.99035245e-01 -1.55862188e-02  2.39103243e-01
  9.95151699e-02  1.49657741e-01 -3.45251888e-01 -4.33489054e-01
  6.84537232e-01  2.49792948e-01  3.92542332e-01  3.05619389e-01
 -2.38010958e-01  3.97295952e-01 -4.60436285e-01 -1.37540236e-01
  5.90817750e-01 -2.84304023e-01  1.05978601e-01 -5.92266202e-01
 -1.59350500e-01  4.13091540e-01 -1.64931938e-01 -7.34145567e-02
 -3.01011831e-01 -1.89854705e+00  2.36649904e-02 -5.51726222e-01
  7.99842119e-01 -4.33843397e-02 -2.60188192e-01 -1.74996153e-01
 -4.91537094e-01  4.09644186e-01 -1.80871382e-01  2.30171129e-01
  2.36194983e-01  2.71462083e-01  2.17981711e-02 -6.09191656e-01
 -2.04823703e-01 -5.56082666e-01 -6.08014226e-01  7.79330730e-05
 -8.24697167e-02 -2.05188528e-01 -7.09771067e-02 -4.21118557e-01
 -9.76329967e-02  8.62648115e-02  2.12224156e-01  1.12527296e-01
  2.59943

In [8]:
from sentence_transformers.util import semantic_search

docs = [
    "A man is eating food.",
    "A man is eating a piece of bread.",
    "The girl is carrying a baby.",
    "A man is riding a horse.",
    "A woman is playing violin.",
    "Two men pushed carts through the woods.",
    "A man is riding a white horse on an enclosed ground.",
    "A monkey is playing drums.",
    "A cheetah is running behind its prey.",
]

docs_embeddings = model.encode(docs, convert_to_tensor=True)

query = "tell me about music"
query_embedding = model.encode(query, convert_to_tensor=True)

hits = semantic_search(query_embedding, docs_embeddings, top_k=2)
hits

for hit in hits[0]:
    print(docs[hit['corpus_id']], "(Score: %.4f)" % hit['score'])

A woman is playing violin. (Score: 0.7764)
A monkey is playing drums. (Score: 0.7570)


In [9]:
import tiktoken

def split_large_text(large_text, max_tokens):
    enc = tiktoken.get_encoding("cl100k_base")
    tokenized_text = enc.encode(large_text)

    chunks = []
    current_chunk = []
    current_length = 0

    for token in tokenized_text:
        current_chunk.append(token)
        current_length += 1

        if current_length >= max_tokens:
            chunks.append(enc.decode(current_chunk).rstrip(' .,;'))
            current_chunk = []
            current_length = 0

    if current_chunk:
        chunks.append(enc.decode(current_chunk).rstrip(' .,;'))

    return chunks

In [10]:
import tiktoken

sent = "If we split a text by number of characters, it is not obvious how many tokens these chunks will be."


print(len(sent.split()))

enc = tiktoken.get_encoding("cl100k_base")
encoded = enc.encode(sent)

print(len(encoded))
tokens = [enc.decode_single_token_bytes(x) for x in encoded]
print(tokens)
print(len(tokens))


decoded = enc.decode(encoded)
print(len(decoded.split()))
decoded

20
22
[b'If', b' we', b' split', b' a', b' text', b' by', b' number', b' of', b' characters', b',', b' it', b' is', b' not', b' obvious', b' how', b' many', b' tokens', b' these', b' chunks', b' will', b' be', b'.']
22
20


'If we split a text by number of characters, it is not obvious how many tokens these chunks will be.'

In [11]:
import google.generativeai as genai
import time
import os

import getpass

GOOGLE_API_KEY = getpass.getpass()

In [12]:
genai.configure(api_key=GOOGLE_API_KEY)
gen_model = genai.GenerativeModel('gemini-pro')

doc = """The new iPad Pro

apple.com
https://www.apple.com
Powerful AI capabilities — Thinpossible. All-new pro design. The thinnest Apple product ever. Order now. Introducing M4, the next generation of Apple silicon. Next-level performance. iPadOS 17. M4 chip. Silver.
Tech specs
Extreme performance by design. Discover more.
Compare iPad models
Which iPad is right for you? Learn more.
Order the new iPad Pro
Thrill. Sleeker. Order the new iPad Pro now.
Why iPad
Yes, it does that. And then some. Shop iPad.
Apple Trade In
Trade in. Upgrade. Save. It's a win-win-win. Learn more.

iPad Pro

Apple
https://www.apple.com › ipad-pro
new ipad pro features from www.apple.com
iPad Pro features the M4 chip, 11-inch or 13-inch Ultra Retina XDR display, Wi-Fi 6E, 5G, and support for Apple Pencil Pro.
$83.25
‎Buy · ‎Tech Specs · ‎Why iPad · ‎Learn more
People also ask
What does new iPad Pro do?
What cool things does the iPad Pro do?
Is the new iPad Pro OLED?
Is there a new iPad coming out in 2024?
Feedback

Apple unveils stunning new iPad Pro with M4 chip and ...

Apple
https://www.apple.com › newsroom › 2024/05 › apple-u...
8 hours ago — M4 features an entirely new display engine to enable the precision, color, and brightness of the Ultra Retina XDR display. With a new CPU, a ...
Pros and cons: Stunningly thin and light design ⋅ Incredibly precise contrast ⋅ Brilliant colors ⋅ View full list
‎Pro Cameras · ‎Apple Pencil Pro · ‎Powerful Ipados Features
Discussions and forums
Apple unveils stunning new iPad Pro with the world's ... - Reddit

Reddit  ·  
r/apple · 870+ comments · 7h ago
Apple Unveils Stunning New iPad Pro With the World's Most ...

TechPowerUp  ·  
7h ago
New iPad Pro may actually be powered by the M4 chip ...

Reddit  ·  
r/apple · 440+ comments · 1w ago
See more
Videos

8:43
M4 iPad Pro Event: Everything Apple Announced in 8 Minutes!
YouTube · MacRumors
6 hours ago

3:28
Apple releases new iPad Pro with M4 chip, teasing AI features
Yahoo Finance · Josh Lipton
3 hours ago

5:37
Apple Reveals iPad Pros With M4 Chips - Video
CNET
7 hours ago
Feedback
View all

New iPad Pro, Air: Prices, release dates, features ...

USA Today
https://www.usatoday.com › story › tech › 2024/05/07
8 hours ago — The new iPad Pro will feature a display using two OLED panels, which Apple is calling Tandem OLED. It also includes the M4 chip, a jump from the ...
Pros and cons: Faster WiFi ⋅ Quarter-pound lighter than its predecessor ⋅ Weighs less than a pound ⋅ View full list
Top stories

Apple
Apple unveils stunning new iPad Pro with M4 chip and Apple Pencil Pro
8 hours ago

USA Today
New iPad Pro, Air: Prices, release dates, features announced by Apple
8 hours ago

CNN
We got our hands on the 2024 iPad Pro and iPad Air — here’s what we think
2 hours ago
More news

iPad Pro - Technical Specifications

Apple
https://www.apple.com › iPad › iPad Pro
new ipad pro features from www.apple.com
11-inch iPad Pro and 13-inch iPad Pro Technical Specifications · 12MP Wide camera, ƒ/1.8 aperture · Digital zoom up to 5x · Five‑element lens · Adaptive True Tone ...

A new iPad Pro is coming: Here are four things to expect

9to5Mac
https://9to5mac.com › new-ipad-pro-features-rumors
new ipad pro features from 9to5mac.com
Apr 29, 2024 — A new iPad Pro is coming: Here are four things to expect · OLED screens · New Magic Keyboard · M4 chip · A much thinner design · Wrap up · Featured.
Pros and cons: Significantly higher brightness ⋅ Better contrast ⋅ View full list

iPad Pro 2024 release date, price, specs and upgrades

Tom's Guide
https://www.tomsguide.com › news › ipad-pro-2024
In this new $129 stylus, you've got a wealth of new features, including rollover to intelligently switch pen styles, haptic feedback to really feel your inputs ...
‎New iPad Pro and new iPad... · ‎iPad Air · ‎Apple set to announce new...
Shop all Memorial Day deal categories

Grills

TVs

Home Appliances

Electronics

Women's Apparel

Men's Apparel
Shop all Memorial Day deal categories

The 4 most exciting iPad Pro 2024 features (and what they ...

ZDNet
https://www.zdnet.com › ... › Computing › Tablets › iPad
4 hours ago — 1. OLED Displays with Ultra Retina XDR · 2. Thinner and lighter than ever · 3. M4 processors · 4. New apps, with a new Apple Pencil.
Pros and cons: Exceedingly light ⋅ An impressively blinding 1,600 nits for HDR ⋅ Very thin ⋅ View full list
"""
test_splited = split_large_text(doc, 30)

docs_embeddings = model.encode(test_splited, convert_to_tensor=True)

query = "tell me about new ipad features"
query_embedding = model.encode(query, convert_to_tensor=True)

hits = semantic_search(query_embedding, docs_embeddings, top_k=10)
needed_docs = []
for hit in hits[0]:
    # print(test_splited[hit['corpus_id']], "(Score: %.4f)" % hit['score'])
    needed_docs.append(test_splited[hit['corpus_id']])

corpus = '\n'.join(needed_docs)

response = gen_model.generate_content(query + '\n' + corpus)
print(response.text)

**Rollover to Intelligently Switch Pen Styles**

* **Automatic pen style selection:** When hovering the Apple Pencil 2 over the display, iPadOS intelligently recognizes the desired pen style based on context.
* **Seamless transitions:** The pen style transitions effortlessly, allowing artists and designers to switch between pencil, eraser, or other tools without manually selecting them.

**Haptic Feedback**

* **Taptic Engine:** iPad Pro now incorporates a Taptic Engine, providing haptic feedback for pen inputs, system interactions, and more.
* **Realistic writing and drawing experience:** The haptic feedback simulates the tactile sensations of pen on paper or other surfaces, enhancing the realism of creative tasks.

**Apple Pencil 3**

* **Redesigned tip:** The Apple Pencil 3 features a redesigned tip that improves accuracy, responsiveness, and friction for a more natural drawing experience.
* **MagSafe charging:** The Apple Pencil 3 magnetically attaches to the side of iPad Pro for c

In [13]:

response = gen_model.generate_content(query)
print(response.text)

**iPad Pro**

* **M2 chip:** Up to 15% faster CPU and 35% faster GPU performance.
* **Apple Pencil Hover:** Enables finer precision with Apple Pencil, allowing it to detect when it is up to 12 mm above the display.
* **Studio Display support:** Connects to a Studio Display for an expansive, professional-grade workspace.
* **Wi-Fi 6E:** Support for faster wireless connectivity.
* **ProRes video capture:** Record and edit ProRes videos in up to 4K at 30 fps.
* **12MP TrueDepth camera:** Improved low-light performance and a wider field of view.
* **iPadOS 16:** Includes new features such as Stage Manager, which allows for more efficient multitasking.

**iPad Air**

* **M1 chip:** Up to 60% faster CPU and twice the graphics performance of the previous iPad Air.
* **12MP Ultra Wide front-facing camera:** Expands the field of view by 2x for better video calls and selfies.
* **5G support (cellular models):** Enables faster connectivity for on-the-go use.
* **Center Stage:** Automatically keep