```terminal
brew install nvtop
```

```shell
uv pip install -U "sentence-transformers[onnx-gpu]"
```

### References

- https://sbert.net/docs/sentence_transformer/pretrained_models.html
- https://github.com/UKPLab/sentence-transformers/blob/master/examples/applications/semantic-search/semantic_search_quora_pytorch.py
- https://www.gpu-mart.com/blog/top-3-linux-gpu-monitoring-command-line-tools

In [1]:

import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=UserWarning)

In [2]:
# os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

In [3]:
import gc
from collections import Counter

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
from gait import FEL, Layers
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    precision_score,
    recall_score,
)
from tqdm.notebook import trange

In [4]:
plt.style.use("dark_background")

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if torch.backends.mps.is_available():
    device = torch.device("mps")  # Use Apple's GPU

device

device(type='mps')

In [7]:
layers = Layers.load("~/data/NorthSea.json")

In [8]:
fel = FEL(layers=layers)

In [9]:
line_1 = 1_000
line_2 = 1_000

elems = [fel.create_line_1() for _ in range(line_1)]
elem1 = [fel.create_line_2() for _ in range(line_2)]

elems.extend(elem1)
elems = {_.line: _.fel for _ in elems}

In [10]:
docs = list(elems.keys())
fels = list(elems.values())

In [11]:
len(docs)

1991

In [12]:
# for _ in random.choices(docs, k=3):
#     print(_)
#     print()

In [13]:
if "model" in locals() or "model" in globals():
    del model
    gc.collect()

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("CUDA flushed.")

In [14]:
# "all-mpnet-base-v2",
# "multi-qa-mpnet-base-cos-v1",
# "sentence-transformers/all-MiniLM-L6-v2"

# model_name = "all-MiniLM-L6-v2"
# model_name = "sentence-transformers/static-similarity-mrl-multilingual-v1"
# model_name = "sentence-transformers/static-retrieval-mrl-en-v1"
model_name = "multi-qa-MiniLM-L6-dot-v1"
# model_name = "multi-qa-mpnet-base-cos-v1"
model = SentenceTransformer(
    model_name,
    device=device,
)
# model = model.half().to("cuda")

In [15]:
show_progress_bar = True  # torch.backends.mps.is_available()

embeddings = model.encode(
    docs,
    batch_size=100,
    show_progress_bar=show_progress_bar,
    normalize_embeddings=True,
    convert_to_numpy=False,
    convert_to_tensor=True,
    device=model.device,
)

Batches:   0%|          | 0/20 [00:00<?, ?it/s]

In [16]:
embeddings = embeddings.to(model.device)

In [17]:
line_fel = fel.create_line_0()
embedding = model.encode(
    line_fel.line,
    convert_to_tensor=True,
    device=model.device,
    normalize_embeddings=True,
)

- https://sbert.net/examples/sentence_transformer/applications/semantic-search/README.html

In [18]:
simi = util.dot_score(embedding, embeddings)
score, index = torch.topk(simi, k=10)

In [19]:
score, index

(tensor([[0.8165, 0.8061, 0.8057, 0.7986, 0.7967, 0.7891, 0.7889, 0.7883, 0.7830,
          0.7823]], device='mps:0'),
 tensor([[1721, 1265, 1543, 1004, 1904, 1257, 1673, 1621, 1912, 1154]],
        device='mps:0'))

In [20]:
print(line_fel.line)
print()
for _ in index[0]:
    print(docs[_])

Show all wellbores where drilling operator is equinor energy as and drilling operator prefixes with mobil that are within 35.8 kilometers of pipelines with destination facility is grane-y and where dimension is above 8.625

Identify all wellbores with drilling operator is not blank that are within 48.2 kilometers of pipelines where dimension not below 28.0 and where medium is gas
Locate pipelines with current operator is equinor energy as that are within 20.1 miles of wellbores with has oil samples = 0 or where completion date is not on Mar 86
Show wellbores with drilling operator is specified (not blank) that are within a distance of 93.8 miles of pipelines where current phase is unspecified or empty
Identify wellbores that are within a distance of 25.5 meters of wellbores with drilling operator contains limited
Identify all wellbores with drilling operator is specified (not blank) that are in the range of 88.9 miles of pipelines with current operator starting with gassco
Show all pip

In [None]:
gt = []
pv = []
top_k = 50

for _ in trange(100):
    line_fel = fel.create_line_0()
    gt.append(line_fel.fel.route)
    embedding = model.encode(line_fel.line, convert_to_tensor=True, device=model.device)
    hits = util.semantic_search(embedding, embeddings, top_k=top_k)
    hits = hits[0]
    route_cnt = Counter()

    for hit in hits:
        index = hit["corpus_id"]
        feli = fels[index]
        route = feli.route
        route_cnt[route] += 1

    match route_cnt.most_common(2):
        case [(route, count)]:
            pv.append(route)
        case [(route, count1), (_, count2)]:
            pv.append(route)

In [None]:
accuracy = accuracy_score(gt, pv) * 100.0
precision = precision_score(gt, pv) * 100.0
recall = recall_score(gt, pv) * 100.0

print(f"\n{model_name} {line_1} / {line_2} {top_k=}\n")
print(f"Accuracy:\t{accuracy:.1f}%")
print(f"Precision:\t{precision:.1f}%")
print(f"Recall:\t\t{recall:.1f}%")

categories = ["FEL1", "FEL2"]
cf_matrix = confusion_matrix(gt, pv)
ax = sns.heatmap(
    cf_matrix / np.sum(cf_matrix),
    fmt=".1%",
    cmap="Blues",
    annot=True,
    xticklabels=categories,
    yticklabels=categories,
)
ax.set(xlabel="Predicted", ylabel="Expected")
ax.xaxis.tick_top()