In [2]:
!pip list | grep sagemaker

sagemaker                            2.192.0
sagemaker-data-insights              0.3.3
sagemaker-datawrangler               0.4.3
sagemaker-scikit-learn-extension     2.5.0
sagemaker-studio-analytics-extension 0.0.20
sagemaker-studio-sparkmagic-lib      0.1.4
[0m

In [3]:
from datasets import load_dataset

dolly_dataset = load_dataset("databricks/databricks-dolly-15k", split="train")

# To train for question answering/information extraction, you can replace the assertion in next line to example["category"] == "closed_qa"/"information_extraction".
summarization_dataset = dolly_dataset.filter(lambda example: example["category"] == "summarization")
summarization_dataset = summarization_dataset.remove_columns("category")

# We split the dataset into two where test data is used to evaluate at the end.
train_and_test_dataset = summarization_dataset.train_test_split(test_size=0.1)

# Dumping the training data to a local file to be used for training.
train_and_test_dataset["train"].to_json("train.jsonl")

Creating json from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

2113673

In [10]:
from sagemaker.jumpstart.estimator import JumpStartEstimator
model_id, model_version = "meta-textgeneration-llama-2-7b", "*"
train_data_location = "s3://prospector-traning-data/llama2/training/"
estimator = JumpStartEstimator(
    model_id=model_id,
    environment={"accept_eula": "true"},
    disable_output_compression=True,  # For Llama-2-70b, add instance_type = "ml.g5.48xlarge"
    # instance_type = "ml.g5.48xlarge"
)
# By default, instruction tuning is set to false. Thus, to use instruction tuning dataset you use
estimator.set_hyperparameters(instruction_tuned="True", epoch="1", max_input_length="1024")
estimator.fit({"training": train_data_location})

INFO:sagemaker.jumpstart:Model 'meta-textgeneration-llama-2-70b' requires accepting end-user license agreement (EULA). See https://jumpstart-cache-prod-us-east-1.s3.us-east-1.amazonaws.com/fmhMetadata/eula/llamaEula.txt for terms of use.
INFO:sagemaker:Creating training-job with name: meta-textgeneration-llama-2-70b-2023-11-04-21-21-22-537


2023-11-04 21:21:22 Starting - Starting the training job......
2023-11-04 21:22:01 Starting - Preparing the instances for training..................

KeyboardInterrupt: 

In [4]:
finetuned_predictor = estimator.deploy()

INFO:sagemaker.jumpstart:No instance type selected for inference hosting endpoint. Defaulting to ml.g5.48xlarge.
INFO:sagemaker:Creating model with name: meta-textgeneration-llama-2-70b-2023-11-03-21-28-22-278
INFO:sagemaker:Creating endpoint-config with name meta-textgeneration-llama-2-70b-2023-11-03-21-28-22-276
INFO:sagemaker:Creating endpoint with name meta-textgeneration-llama-2-70b-2023-11-03-21-28-22-276


-----------------!

In [6]:
template = {
    "prompt": "Below is an instruction that describes a task, paired with an input that provides further context. "
    "Write a response that appropriately completes the request.\n\n"
    "### Instruction:\n{instruction}\n\n### Input:\n{context}\n\n",
    "completion": " {response}",
}
input_output_demarkation_key = "\n\n### Response:\n"
inst = "Is there an option for Flex to cover only my part of the rent, considering that I have roommates who share the accommodation with me?"
cont = ""
payload = {
        "inputs": template["prompt"].format(
            instruction=inst, context=cont
        )
        + input_output_demarkation_key,
        "parameters": {"max_new_tokens": 200},
}
finetuned_response = finetuned_predictor.predict(payload, custom_attributes="accept_eula=true")
finetuned_response

[{'generation': '  Flex is able to pay the full rent as long as your payment amounts do not exceed your approved Flex credit line. The credit line amount for each Flex member is personal to them based on Flex’s evaluation of their credit history, and their 2nd payment cannot exceed half of their approved credit line amount or the allowable borrowing limit for your state of residence.\nIf the rent amount you request ever exceeds your credit line, we’ll notify you via email or SMS during the authorization process. In these cases, we recommend that you reaching out to your property to set up a smaller monthly payment amount for Flex to be able to pay.'}]

In [7]:
finetuned_predictor.delete_model()
finetuned_predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: meta-textgeneration-llama-2-70b-2023-11-03-21-28-22-278
INFO:sagemaker:Deleting endpoint configuration with name: meta-textgeneration-llama-2-70b-2023-11-03-21-28-22-276
INFO:sagemaker:Deleting endpoint with name: meta-textgeneration-llama-2-70b-2023-11-03-21-28-22-276
