
***
It has been observed that endpoint hosting using the HuggingFace LLM 0.8.2 container produces intermittent deployment failures. The error is a health check timeout due to model loading failure. CloudWatch logs report all shards are starting, but only a subset of shards report they are ready. An example CloudWatch log is at the end of this notebook.

These errors have been observed for `tiiuae/falcon-40b` and `tiiuae/falcon-40b-instruct`, across instances `ml.g5.12xlarge`, `ml.g5.48xlarge`, and `ml.p4d.24xlarge`, and with `SM_NUM_GPUS` set to 4 and 8 (for appropriate instances).
***

In [2]:
from concurrent import futures

import boto3
from botocore.config import Config
from sagemaker.session import Session
import sagemaker
from sagemaker.huggingface import get_huggingface_llm_image_uri
from sagemaker.huggingface import HuggingFaceModel
from sagemaker.jumpstart.model import JumpStartModel



NUM_DEPLOYMENTS = 50
MAX_CONCURRENT_DEPLOYMENTS = 25
SM_SESSION = Session(
    sagemaker_client=boto3.client(
        "sagemaker",
        config=Config(connect_timeout=5, read_timeout=60, retries={"max_attempts": 20}),
    )
)
SM_ROLE = sagemaker.get_execution_role()


def deploy_llm_endpoint(
        model_id: str,
        instance_type: str = "ml.g5.12xlarge",
        number_of_gpu: int = 4,
        max_input_length: int = 1024,
        max_total_tokens: int = 2048,
        health_check_timeout: int = 1200,
        use_jumpstart: bool = True
) -> bool:
    success = False
    try:
        if use_jumpstart is True:
            model = JumpStartModel(model_id=model_id, sagemaker_session=SM_SESSION)
            predictor = model.deploy()
        else:
            env = {
                "SM_NUM_GPUS": str(number_of_gpu),
                "MAX_INPUT_LENGTH": str(max_input_length),
                "MAX_TOTAL_TOKENS": str(max_total_tokens),
            }
            llm_image = get_huggingface_llm_image_uri("huggingface", version="0.8.2")
            env["HF_MODEL_ID"] = model_id
            model = HuggingFaceModel(role=SM_ROLE, image_uri=llm_image, env=env, sagemaker_session=SM_SESSION)
            predictor = model.deploy(
                initial_instance_count=1,
                instance_type=instance_type,
                container_startup_health_check_timeout=health_check_timeout,
            )
        predictor.delete_model()
        predictor.delete_endpoint()
        success = True
        print("\nSuccessful deployment.")
    except Exception as e:
        print(f"\nError with deploying model: {e}")
    return success


with futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENT_DEPLOYMENTS) as executor:
    results = executor.map(deploy_llm_endpoint, ["huggingface-llm-falcon-7b-instruct-bf16"] * NUM_DEPLOYMENTS)
    # results = executor.map(deploy_llm_endpoint, ["tiiuae/falcon-40b"] * NUM_DEPLOYMENTS)


results = list(results)
print(results)
print(f"Success rate: {sum(results) / len(results)}")

-----------------------------------!-
Successful deployment.
--!-
Successful deployment.
------!--
Successful deployment.
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

***
An example CloudWatch Log is as follows. Note that 4 shards report that they are starting, but only 3 out of 4 shards report ready with their server started. Because the model is never fully loaded, the endpoint status is never InService.
***

```
#033[2m2023-06-29T18:47:04.471989Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Args { model_id: "tiiuae/falcon-40b-instruct", revision: None, sharded: None, num_shard: Some(4), quantize: None, trust_remote_code: false, max_concurrent_requests: 128, max_best_of: 2, max_stop_sequences: 4, max_input_length: 1024, max_total_tokens: 2048, max_batch_size: None, waiting_served_ratio: 1.2, max_batch_total_tokens: 32000, max_waiting_tokens: 20, port: 8080, shard_uds_path: "/tmp/text-generation-server", master_addr: "localhost", master_port: 29500, huggingface_hub_cache: Some("/tmp"), weights_cache_override: None, disable_custom_kernels: false, json_output: false, otlp_endpoint: None, cors_allow_origin: [], watermark_gamma: None, watermark_delta: None, env: false }
#033[2m2023-06-29T18:47:04.472012Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Sharding model on 4 processes
#033[2m2023-06-29T18:47:04.472106Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Starting download process.
#033[2m2023-06-29T18:47:16.341324Z#033[0m #033[33m WARN#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m No safetensors weights found for model tiiuae/falcon-40b-instruct at revision None. Downloading PyTorch weights.
#033[2m2023-06-29T18:47:16.438090Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00001-of-00009.bin
#033[2m2023-06-29T18:47:26.478800Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00001-of-00009.bin in 0:00:10.
#033[2m2023-06-29T18:47:26.478868Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [1/9] -- ETA: 0:01:20
#033[2m2023-06-29T18:47:26.479057Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00002-of-00009.bin
#033[2m2023-06-29T18:47:36.604739Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00002-of-00009.bin in 0:00:10.
#033[2m2023-06-29T18:47:36.604792Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [2/9] -- ETA: 0:01:10
#033[2m2023-06-29T18:47:36.605046Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00003-of-00009.bin
#033[2m2023-06-29T18:47:46.483478Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00003-of-00009.bin in 0:00:09.
#033[2m2023-06-29T18:47:46.483531Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [3/9] -- ETA: 0:01:00
#033[2m2023-06-29T18:47:46.483797Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00004-of-00009.bin
#033[2m2023-06-29T18:47:56.720941Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00004-of-00009.bin in 0:00:10.
#033[2m2023-06-29T18:47:56.720988Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [4/9] -- ETA: 0:00:50
#033[2m2023-06-29T18:47:56.721237Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00005-of-00009.bin
#033[2m2023-06-29T18:48:06.677574Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00005-of-00009.bin in 0:00:09.
#033[2m2023-06-29T18:48:06.677616Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [5/9] -- ETA: 0:00:40
#033[2m2023-06-29T18:48:06.677909Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00006-of-00009.bin
#033[2m2023-06-29T18:48:16.702302Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00006-of-00009.bin in 0:00:10.
#033[2m2023-06-29T18:48:16.702352Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [6/9] -- ETA: 0:00:30
#033[2m2023-06-29T18:48:16.702601Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00007-of-00009.bin
#033[2m2023-06-29T18:48:26.117507Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00007-of-00009.bin in 0:00:09.
#033[2m2023-06-29T18:48:26.117582Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [7/9] -- ETA: 0:00:19.714286
#033[2m2023-06-29T18:48:26.117952Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00008-of-00009.bin
#033[2m2023-06-29T18:48:36.012255Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00008-of-00009.bin in 0:00:09.
#033[2m2023-06-29T18:48:36.012310Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [8/9] -- ETA: 0:00:09.875000
#033[2m2023-06-29T18:48:36.012584Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download file: pytorch_model-00009-of-00009.bin
#033[2m2023-06-29T18:48:43.393926Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Downloaded /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00009-of-00009.bin in 0:00:07.
#033[2m2023-06-29T18:48:43.393971Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Download: [9/9] -- ETA: 0
#033[2m2023-06-29T18:48:43.394061Z#033[0m #033[33m WARN#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m No safetensors weights found for model tiiuae/falcon-40b-instruct at revision None. Converting PyTorch weights to safetensors.
#033[2m2023-06-29T18:48:43.394258Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00001-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00001-of-00009.safetensors.
#033[2m2023-06-29T18:48:54.330215Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [1/9] -- Took: 0:00:10.935644
#033[2m2023-06-29T18:48:54.330260Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00002-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00002-of-00009.safetensors.
#033[2m2023-06-29T18:49:05.322326Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [2/9] -- Took: 0:00:10.991724
#033[2m2023-06-29T18:49:05.322371Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00003-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00003-of-00009.safetensors.
#033[2m2023-06-29T18:49:16.351771Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [3/9] -- Took: 0:00:11.029070
#033[2m2023-06-29T18:49:16.351828Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00004-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00004-of-00009.safetensors.
#033[2m2023-06-29T18:49:27.375137Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [4/9] -- Took: 0:00:11.022988
#033[2m2023-06-29T18:49:27.375209Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00005-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00005-of-00009.safetensors.
#033[2m2023-06-29T18:49:38.369999Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [5/9] -- Took: 0:00:10.994369
#033[2m2023-06-29T18:49:38.370057Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00006-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00006-of-00009.safetensors.
#033[2m2023-06-29T18:49:49.408964Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [6/9] -- Took: 0:00:11.038674
#033[2m2023-06-29T18:49:49.409020Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00007-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00007-of-00009.safetensors.
#033[2m2023-06-29T18:50:00.279620Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [7/9] -- Took: 0:00:10.870234
#033[2m2023-06-29T18:50:00.279689Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00008-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00008-of-00009.safetensors.
#033[2m2023-06-29T18:50:11.257998Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [8/9] -- Took: 0:00:10.978009
#033[2m2023-06-29T18:50:11.258055Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/pytorch_model-00009-of-00009.bin to /tmp/models--tiiuae--falcon-40b-instruct/snapshots/1e7fdcc9f45d13704f3826e99937917e007cd975/model-00009-of-00009.safetensors.
#033[2m2023-06-29T18:50:20.044315Z#033[0m #033[32m INFO#033[0m #033[1mdownload#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Convert: [9/9] -- Took: 0:00:08.785951
#033[2m2023-06-29T18:50:27.048080Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Successfully downloaded weights.
#033[2m2023-06-29T18:50:27.048387Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Starting shard 0
#033[2m2023-06-29T18:50:27.049129Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Starting shard 1
#033[2m2023-06-29T18:50:27.049266Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Starting shard 2
#033[2m2023-06-29T18:50:27.049300Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Starting shard 3
#033[2m2023-06-29T18:50:37.059973Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Waiting for shard 1 to be ready...
#033[2m2023-06-29T18:50:37.060148Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Waiting for shard 0 to be ready...
#033[2m2023-06-29T18:50:37.060688Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Waiting for shard 3 to be ready...
#033[2m2023-06-29T18:50:47.070521Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Waiting for shard 1 to be ready...
#033[2m2023-06-29T18:50:47.070595Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Waiting for shard 0 to be ready...
#033[2m2023-06-29T18:50:47.071024Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Waiting for shard 3 to be ready...
#033[2m2023-06-29T18:50:50.399921Z#033[0m #033[32m INFO#033[0m #033[1mshard-manager#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Server started at unix:///tmp/text-generation-server-0
 #033[2m#033[3mrank#033[0m#033[2m=#033[0m0#033[0m
#033[2m2023-06-29T18:50:50.403454Z#033[0m #033[32m INFO#033[0m #033[1mshard-manager#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Server started at unix:///tmp/text-generation-server-3
 #033[2m#033[3mrank#033[0m#033[2m=#033[0m3#033[0m
#033[2m2023-06-29T18:50:50.404596Z#033[0m #033[32m INFO#033[0m #033[1mshard-manager#033[0m: #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Server started at unix:///tmp/text-generation-server-1
 #033[2m#033[3mrank#033[0m#033[2m=#033[0m1#033[0m
#033[2m2023-06-29T18:50:50.473772Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Shard 1 ready in 23.422998035s
#033[2m2023-06-29T18:50:50.474256Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Shard 0 ready in 23.42347076s
#033[2m2023-06-29T18:50:50.474388Z#033[0m #033[32m INFO#033[0m #033[2mtext_generation_launcher#033[0m#033[2m:#033[0m Shard 3 ready in 23.423304398s
```