In [1]:
from multiprocessing import Pool

def run_parallel(dataset, k_folds, k_td_values, devices):
    results = []
    with Pool(len(devices)) as pool:
        # Create verifier instances for each device
        tasks = [(k_td, fold, device) for k_td in k_td_values 
                                    for fold in range(k_folds)
                                    for device in devices]
        results = pool.starmap(defs.train_worker, tasks)
    return results

In [2]:
import defs
run_parallel('foo', 3, [0, 0.5, 1], [1,2,3])

[{'fold': 0,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 0,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 0,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 1,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 1,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 1,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 2,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 2,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 2,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 0,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 0,
  'k_td': 0.5,
  'results': '...need to implement main training logic...'},
 {'fold': 0,
  'k_td'

In [1]:
import defs
from multiprocessing import Pool
import time

# Test sequential
start_time = time.time()
results_seq = [defs.train_worker_2(i) for i in range(3)]
seq_time = time.time() - start_time
print(f"Sequential took: {seq_time:.2f} seconds")

# Test parallel
start_time = time.time()
with Pool(3) as p:
    results_par = p.map(defs.train_worker_2, range(3))
par_time = time.time() - start_time
print(f"Parallel took: {par_time:.2f} seconds")

print(f"Speedup factor: {seq_time/par_time:.2f}x")

Sequential took: 7.14 seconds


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Parallel took: 13.23 seconds
Speedup factor: 0.54x


In [1]:
import os
import time
from multiprocessing import Pool
import defs

os.environ["TOKENIZERS_PARALLELISM"] = "false"

def run_benchmark():
    # Test sequential
    print("Running sequential training...")
    start_time = time.time()
    results = [defs.train_fold(i) for i in range(3)]
    seq_time = time.time() - start_time
    print(f"Sequential took: {seq_time:.2f} seconds")
    
    # Test parallel
    print("\nRunning parallel training...")
    start_time = time.time()
    with Pool(3) as p:
        results = p.map(defs.train_fold, range(3))
    par_time = time.time() - start_time
    print(f"Parallel took: {par_time:.2f} seconds")
    print(f"Speedup factor: {seq_time/par_time:.2f}x")

if __name__ == "__main__":
    run_benchmark()

Running sequential training...
{'train_runtime': 6.4204, 'train_samples_per_second': 77.876, 'train_steps_per_second': 4.984, 'train_loss': 0.3266555666923523, 'epoch': 1.0}
{'train_runtime': 6.3789, 'train_samples_per_second': 78.384, 'train_steps_per_second': 5.017, 'train_loss': 0.31156042218208313, 'epoch': 1.0}
{'train_runtime': 5.8604, 'train_samples_per_second': 85.319, 'train_steps_per_second': 5.46, 'train_loss': 0.31156042218208313, 'epoch': 1.0}
Sequential took: 25.23 seconds

Running parallel training...
Parallel took: 23.25 seconds
Speedup factor: 1.09x


In [2]:
# @inproceedings{reimers-2019-sentence-bert,
#   title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
#   author = "Reimers, Nils and Gurevych, Iryna",
#   booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
#   month = "11",
#   year = "2019",
#   publisher = "Association for Computational Linguistics",
#   url = "https://arxiv.org/abs/1908.10084",
# }

from sentence_transformers import SentenceTransformer

# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

# The sentences to encode
sentences = [
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
]

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# 3. Calculate the embedding similarities
similarities = model.similarity(embeddings, embeddings)
print(similarities)
# tensor([[1.0000, 0.6660, 0.1046],
#         [0.6660, 1.0000, 0.1411],
#         [0.1046, 0.1411, 1.0000]])

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

(3, 384)
tensor([[1.0000, 0.6660, 0.1046],
        [0.6660, 1.0000, 0.1411],
        [0.1046, 0.1411, 1.0000]])
