In [68]:
!pip install "transformers==4.31.0" "datasets[s3]==2.13.0" sagemaker --upgrade --quiet
!pip install "sagemaker>=2.163.0" --upgrade --quiet

In [None]:
!huggingface-cli login --token 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

In [None]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


In [73]:
from sagemaker.huggingface import get_huggingface_llm_image_uri

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="0.9"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")


import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
# hmmm why would the deployment cluster bigger than finetuning cluster in this case????
instance_type = "ml.g5.12xlarge"
number_of_gpu = 4
health_check_timeout = 300

# Define Model and Endpoint configuration parameter
config = {
  'HF_MODEL_ID': "/opt/ml/model",
  'SM_NUM_GPUS': json.dumps(number_of_gpu), # Number of GPU used per replica
  'MAX_INPUT_LENGTH': json.dumps(3072),  # Max length of input text
  'MAX_TOTAL_TOKENS': json.dumps(4096),  # Max length of the generation (including input text)
  # 'HF_MODEL_QUANTIZE': "bitsandbytes", # comment in to quantize, if the model itself is already quantized, do not add this.
}

# create HuggingFaceModel with the image uri

# select one with the 'succesful' training job on sagemaker, in this case, it uses the Aug.10th version, which is the 2nd finetuned LlAMA-13B
model_path = "s3://sagemaker-us-east-1-116961472995/huggingface-qlora-2023-08-10-00-53-52-2023-08-10-00-53-52-626/output/model.tar.gz"
llm_model = HuggingFaceModel(
  model_data= model_path,  # Change to your model path
  role=role,
  image_uri=llm_image,
  env=config
)

llm image uri: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.1-tgi0.9.3-gpu-py39-cu118-ubuntu20.04


In [74]:
llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  # volume_size=40, # If using an instance with local SSD storage, volume_size must be None, e.g. p4 but not p3
  container_startup_health_check_timeout=health_check_timeout, # 5-7 minutes to be able to load the model
) # Deploying the llm to sagemaker takes 5 - 7 minutes to allocate resources up. If endpoints >= managed quota for the type of GPU cluster, this will fail.

------------!

In [None]:
from datasets import load_dataset
from random import randrange

# Load dataset from the hub
# Data is private, remember to authenticate your Huggingface CLI token that's added to Yield organization.
data_files = {"train": "qa_pairs_train.jsonl", "test":"qa_pairs_test.jsonl"}
dataset = load_dataset("YieldInc/chatbot_qa_dataset_splitted", data_files=data_files)['test']

print(f"dataset size: {len(dataset)}")

In [75]:
def format_yield_validate(sample):
    instruction = f"%%% Instruction\n{sample['question']}"
    context = f"%%% Context\n{sample['contexts'][0]}\n{sample['contexts'][1]}"
    response = f"%%% Answer"
    # join all the parts together
    prompt = "\n\n".join([i for i in [instruction, context, response] if i is not None])
    return prompt

In [100]:
# hyperparameters for llm
sample = dataset[randrange(len(dataset))]
prompt = format_yield_validate(sample)

payload = {
  "inputs":  prompt,
  "parameters": {
    "do_sample": True,
    "top_p": 0.9,
    "temperature": 0.1,
    "top_k": 50,
    "max_new_tokens": 1024,
    "repetition_penalty": 1.03,
    "stop": ["</s>"]
  }
}

print(prompt)
output = llm.predict(payload)[0]['generated_text']
print(f"GENERATED ANSWER: {output}\n\n")
print(f"REFERENCE ANSWER: {sample['answer']}\n\n")

%%% Instruction
I see that there is a 'setRaisingAccessController' function in the FlagsInterface. Can you explain how I can use this function to manage access control in Yield Protocol?

%%% Context
# FlagsInterface

# Solidity API

## FlagsInterface

### getFlag

```solidity
function getFlag(address) external view returns (bool)
```

### getFlags

```solidity
function getFlags(address[]) external view returns (bool[])
```

### raiseFlag

```solidity
function raiseFlag(address) external
```

### raiseFlags

```solidity
function raiseFlags(address[]) external
```

### lowerFlags

```solidity
function lowerFlags(address[]) external
```

### setRaisingAccessController

```solidity
function setRaisingAccessController(address) external
```
# smart_contracts_overview

## AccessControl

The access control contract was adapted from OpenZeppelin's AccessControl.sol and is inherited from most other contracts in the Yield Protocol.

A role exists implicitly for each function in a contract, with 

In [101]:
# clean up endpoints when not need
llm.delete_model()
llm.delete_endpoint()