## OpenAI Fine-Tune

### 生成测试数据

In [None]:
import os,openai,backoff
import pandas as pd

openai.api_key = os.getenv("OPENAI_API_KEY")
dynasties= ['唐', '宋', '元', '明', '清', '汉', '魏', '晋', '南北朝']
super_powers = ['隐形', '飞行', '读心术', '瞬间移动', '不死之身', '喷火']
story_types = ['轻松', '努力', '艰难']

@backoff.on_exception(backoff.expo, openai.error.RateLimitError)
def gpt35(prompt, max_tokens=2048, temperature=0.5, top_p=1, frequency_penalty=0, presence_penalty=0):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        frequency_penalty=frequency_penalty,
        presence_penalty=presence_penalty)
    return response["choices"][0]["text"]

def prepare_stories(dynasties, super_powers, story_types, output_file="data/ultraman_stories.csv"):
    df = pd.DataFrame()
    repeat = 3
    for dynasty in dynasties:
        for super_power in super_powers:
            for story_type in story_types:
                   for i in range(repeat):
                        prompt = f"""请你用中文写一段300字的故事，情节跌宕起伏，讲述一位{dynasty}朝时期的英雄人物，穿越到现代，拥有了{super_power}这样的超能力，通过{story_type}的战斗，帮助奥特曼一起打败了怪兽的故事。"""
                        story = gpt35(prompt)
                        row = {"dynasty": dynasty, "super_power": super_power, "story_type": story_type, "story": story}
                        row = pd.DataFrame([row])
                        df = pd.concat([df, row], axis=0, ignore_index=True)

    df.to_csv("data/ultraman_stories.csv")

prepare_stories(dynasties, super_powers, story_types)

In [9]:
df = pd.read_csv("data/ultraman_stories.csv")
df['sub_prompt'] = df['dynasty'] + "," + df['super_power'] + "," + df['story_type']
prepared_data = df.loc[:,['sub_prompt','story']]
prepared_data.rename(columns={'sub_prompt':'prompt', 'story':'completion'}, inplace=True)
prepared_data.to_csv('data/prepared_data_mactalk.csv',index=False)

import subprocess

subprocess.run('openai tools fine_tunes.prepare_data --file data/prepared_data_mactalk.csv --quiet'.split())

Analyzing...

- Based on your file extension, your file is formatted as a CSV file
- Your file contains 464 prompt-completion pairs
- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty
- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples.
- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. Se

CompletedProcess(args=['openai', 'tools', 'fine_tunes.prepare_data', '--file', 'data/prepared_data_mactalk.csv', '--quiet'], returncode=0)

In [15]:
subprocess.run('openai api fine_tunes.create --training_file data/prepared_data_mactalk_prepared.jsonl --model curie --suffix "mactalk_ultraman"'.split())

Upload progress: 100%|██████████| 446k/446k [00:00<00:00, 342Mit/s]
[91mError:[0m HTTP code 500 from API (<html>
  <head>
    <title>Internal Server Error</title>
  </head>
  <body>
    <h1><p>Internal Server Error</p></h1>
    
  </body>
</html>
) (HTTP status code: 500)


CompletedProcess(args=['openai', 'api', 'fine_tunes.create', '--training_file', 'data/prepared_data_mactalk_prepared.jsonl', '--model', 'curie', '--suffix', '"mactalk_ultraman"'], returncode=1)

In [11]:
subprocess.run('openai api fine_tunes.list'.split())

{
  "data": [
    {
      "created_at": 1680576711,
      "fine_tuned_model": "curie:ft-bothub-ai:ultraman-2023-04-04-03-03-26",
      "hyperparams": {
        "batch_size": 1,
        "learning_rate_multiplier": 0.2,
        "n_epochs": 4,
        "prompt_loss_weight": 0.01
      },
      "id": "ft-3oxkr1zBVB4fJWogJDDjQbr0",
      "model": "curie",
      "object": "fine-tune",
      "organization_id": "org-yQaCtAdY0voCWSs0QNqSLfda",
      "result_files": [
        {
          "bytes": 107785,
          "created_at": 1680577408,
          "filename": "compiled_results.csv",
          "id": "file-LuSjYlkMa6fHHRB23bnr3L1z",
          "object": "file",
          "purpose": "fine-tune-results",
          "status": "processed",
          "status_details": null
        }
      ],
      "status": "succeeded",
      "training_files": [
        {
          "bytes": 446199,
          "created_at": 1680576711,
          "filename": "data/prepared_data_prepared.jsonl",
          "id": "file-yn0Bfn

CompletedProcess(args=['openai', 'api', 'fine_tunes.list'], returncode=0)

In [16]:
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

def write_a_story(prompt):
    response = openai.Completion.create(
        model="curie:ft-bothub-ai:ultraman-2023-04-04-03-03-26",
        prompt=prompt,
        temperature=0.7,
        max_tokens=2000,
        top_p=1,
        stop=["."])
    return response["choices"][0]["text"]

story = write_a_story("宋,发射激光,艰难 ->\n")
print(story)


宋朝时期，有一位英雄人物叫做李英雄，他拥有超凡的武功，曾经参加过多次战斗，拯救了无数的人民。

一次，李英雄突然发现自己穿越到了现代，他发现自己的超能力比起宋朝时期还要强大，他可以发射激光，可以控制天空，可以控制空气，甚至可以控制大地。

李英雄发现，现代的世界正面临着一个可怕的威胁——怪兽，他们正在毁灭人类，李英雄决定要去拯救世界，于是他和奥特曼一起出发，开始了一场艰苦的战斗。

李英雄凭借着自己的超能力，和奥特曼一起，终于战胜了怪兽，拯救了世界。他以英雄的姿态拥有了榜样，成为了人们心中的英雄。


In [17]:
story = write_a_story("秦,龙卷风,辛苦 ->\n")
print(story)


这是一个关于一位叫做林黛玉的英雄人物的传奇故事。林黛玉曾经是一个英勇的将军，他在古代早期就受到了神灵们的授予，被封为“神童”。

一次，神灵们派遣一群令人鼻颈的怪兽出征，攻击辛苦的中原人民。林黛玉毫不犹豫地指挥令人恐惧的大军，抵抗怪兽的攻击，为人民谋幸福安乐。

但是，神灵们的力量太强大了，林黛玉不得不放弃了自己的家乡，离开了家园，穿越到了现代，并获得了一种超能力——龙卷风。

林黛玉很快就感受到了现代的威力，他发现自己可以用龙卷风来抵抗怪兽，于是他决定和奥特曼一起，把怪兽一个个击败，拯救人类。

他们在一次次的激烈战斗中，最终击败了最强大的怪兽，拯救了人类，林黛玉也回到了家乡，成为了一个英雄。

林黛玉的传奇故事，令人惊叹，他的勇敢和智慧，也令人敬佩。他的故事，让人们永远不会忘记。


## Alpaca的数据生成

### 生成指令

In [19]:
seed_task = {"id": "seed_task_0", "name": "breakfast_suggestion", "instruction": "Is there anything I can eat for a breakfast that doesn't include eggs, yet includes protein, and has roughly 700-1000 calories?", "instances": [{"input": "", "output": "Yes, you can have 1 oatmeal banana protein shake and 4 strips of bacon. The oatmeal banana protein shake may contain 1/2 cup oatmeal, 60 grams whey protein powder, 1/2 medium banana, 1tbsp flaxseed oil and 1/2 cup watter, totalling about 550 calories. The 4 strips of bacon contains about 200 calories."}], "is_classification": False}

input = seed_task['instances'][0]['input']
input = "<noinput>" if input.lower() == "" else input


prompt = f"""
You are asked to come up with a set of 20 diverse task instructions. These task instructions will be given to a GPT model and we will evaluate the GPT model for completing the instructions.

Here are the requirements:
1. Try not to repeat the verb for each instruction to maximize diversity.
2. The language used for the instruction also should be diverse. For example, you should combine questions with imperative instrucitons.
3. The type of instructions should be diverse. The list should include diverse types of tasks like open-ended generation, classification, editing, etc.
2. A GPT language model should be able to complete the instruction. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action.
3. The instructions should be in English.
4. The instructions should be 1 to 2 sentences long. Either an imperative sentence or a question is permitted.
5. You should generate an appropriate input to the instruction. The input field should contain a specific example provided for the instruction. It should involve realistic data and should not contain simple placeholders. The input should provide substantial content to make the instruction challenging but should ideally not exceed 100 words.
6. Not all instructions require input. For example, when a instruction asks about some general information, "what is the highest peak in the world", it is not necssary to provide a specific context. In this case, we simply put "<noinput>" in the input field.
7. The output should be an appropriate response to the instruction and the input. Make sure the output is less than 100 words.

List of 20 tasks:
1. Instruction: "{seed_task['instruction']}"
1. Input: "{input}"
1. Output: "{seed_task['instances'][0]['output']}"

2. Instruction:
"""

import openai
import os

openai.api_key = os.environ.get("OPENAI_API_KEY")
COMPLETION_MODEL = "text-davinci-003"


def get_response(prompt):
    completions = openai.Completion.create (
        engine=COMPLETION_MODEL,
        prompt=prompt,
        max_tokens=2048,
        n=1,
        stop=None,
        temperature=0.0,        
    )
    message = completions['choices'][0].text
    return message

print(get_response(prompt))

"Write a short story about a person who discovers a mysterious object in the woods."
2. Input: "<noinput>"
2. Output: "John was walking through the woods, enjoying the peacefulness of the day. Suddenly, he noticed something strange in the distance. He walked closer and saw a mysterious object, half-buried in the ground. He cautiously approached it and noticed it was a small box, made of a strange metal he had never seen before. He opened the box and found a strange glowing stone inside. He was mesmerized by the stone and decided to take it home with him. Little did he know, this stone would change his life forever."

3. Instruction:
"Classify the following sentence as either positive or negative sentiment: 'I'm so disappointed in this product.'"
3. Input: "I'm so disappointed in this product."
3. Output: Negative sentiment.

4. Instruction:
"Edit the following sentence to make it more concise: 'I am going to the store to buy some food for dinner.'"
4. Input: "I am going to the store to

### 数据集的Encoding

In [26]:
alpaca_json_text = """
[
    {
        "instruction": "Give three tips for staying healthy.",
        "input": "",
        "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."
    },
    {
        "instruction": "What are the three primary colors?",
        "input": "",
        "output": "The three primary colors are red, blue, and yellow."
    },
    {
        "instruction": "Describe the structure of an atom.",
        "input": "",
        "output": "An atom is made up of a nucleus, which contains protons and neutrons, surrounded by electrons that travel in orbits around the nucleus. The protons and neutrons have a positive charge, while the electrons have a negative charge, resulting in an overall neutral atom. The number of each particle determines the atomic number and the type of atom."
    },
    {
        "instruction": "How can we reduce air pollution?",
        "input": "",
        "output": "There are a number of ways to reduce air pollution, such as shifting to renewable energy sources, encouraging the use of public transportation, prohibiting the burning of fossil fuels, implementing policies to reduce emissions from industrial sources, and implementing vehicle emissions standards. Additionally, individuals can do their part to reduce air pollution by reducing car use, avoiding burning materials such as wood, and changing to energy efficient appliances."
    },
    {
        "instruction": "Describe a time when you had to make a difficult decision.",
        "input": "",
        "output": "I had to make a difficult decision when I was working as a project manager at a construction company. I was in charge of a project that needed to be completed by a certain date in order to meet the client\u2019s expectations. However, due to unexpected delays, we were not able to meet the deadline and so I had to make a difficult decision. I decided to extend the deadline, but I had to stretch the team\u2019s resources even further and increase the budget. Although it was a risky decision, I ultimately decided to go ahead with it to ensure that the project was completed on time and that the client\u2019s expectations were met. The project was eventually successfully completed and this was seen as a testament to my leadership and decision-making abilities."
    },
    {
        "instruction": "Identify the odd one out.",
        "input": "Twitter, Instagram, Telegram",
        "output": "Telegram"
    }
]
"""
import json
f = open("./data/alpaca_data.json", mode="r")
list_data_dict = json.load(f)
f.close()

PROMPT_DICT = {
    "prompt_input": (
        "Below is an instruction that describes a task, paired with an input that provides further context. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
    ),
    "prompt_no_input": (
        "Below is an instruction that describes a task. "
        "Write a response that appropriately completes the request.\n\n"
        "### Instruction:\n{instruction}\n\n### Response:"
    ),
}

prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]
sources = [
        prompt_input.format_map(example) if example.get("input", "") != "" else prompt_no_input.format_map(example)
        for example in list_data_dict
]
EOS_TOKEN = "</s>"
targets = [f"{example['output']}{EOS_TOKEN}" for example in list_data_dict]

for i in range(5):
    print("===Source===")
    print(sources[i])
    print("===Target===")
    print(targets[i])
    print("\n")


    

===Source===
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Give three tips for staying healthy.

### Response:
===Target===
1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. 
2. Exercise regularly to keep your body active and strong. 
3. Get enough sleep and maintain a consistent sleep schedule.</s>


===Source===
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
What are the three primary colors?

### Response:
===Target===
The three primary colors are red, blue, and yellow.</s>


===Source===
Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Describe the structure of an atom.

### Response:
===Target===
An atom is made up of a nucleus, which contains protons and neutrons, surrounded by electrons that travel in orbits around the nucl

## Flash Attention

### Softmax Tiling

In [1]:
import numpy as np

def softmax(x):
    e_x = np.exp(x - np.max(x)) # subtract max(x) for numerical stability
    return e_x / e_x.sum()

s = np.array([0.1, 0.2, 0.3, 0.4])

print(softmax(s))


[0.21383822 0.23632778 0.26118259 0.28865141]


In [4]:
## 注意，s1和s2我们是不存储的，是QV的计算结果
s1 = np.array([0.1, 0.2])
s2 = np.array([0.3, 0.4])

## 我们只存s1和s2
m1 = np.max(s1)
m2 = np.max(s2)

print(m1)
print(m2)

m = np.max([m1, m2])

0.2
0.4


In [6]:
fx1 = np.exp(s1 - m1)
fx2 = np.exp(s2 - m2)

print(fx1)
print(fx2)

[0.90483742 1.        ]
[0.90483742 1.        ]


In [19]:
fx = [np.exp(m1-m) * np.exp(s1 - m1), np.exp(m2-m) * np.exp(s2 - m2)]
print(np.hstack(fx))

[0.74081822 0.81873075 0.90483742 1.        ]


In [22]:
lx = np.exp(m1 - m) * np.sum(np.exp(s1 - m1)) + np.exp(m2 - m) * np.sum(np.exp(s2 - m2))
print(lx)

3.464386391795659


In [24]:
result = np.hstack(fx)/lx
print(result)

[0.21383822 0.23632778 0.26118259 0.28865141]


In [25]:
print(softmax(s))

[0.21383822 0.23632778 0.26118259 0.28865141]


## Tokenzier

### Tiktoken

In [27]:
import tiktoken


embedding_encoding = "cl100k_base"  # this the encoding for text-embedding-ada-002

encoding = tiktoken.get_encoding(embedding_encoding)
text = "Deeplearning"
print(len(encoding.encode(text)))
print(encoding.encode(text))

4
[1951, 68, 698, 3256]


In [31]:
for id in encoding.encode(text):
    print(encoding.decode([id]))


De
e
ple
arning


### 扩充中文词表的Tokenizer

In [1]:
import os
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"]="python"

from transformers import LlamaTokenizer
from transformers import LlamaTokenizer
from sentencepiece import sentencepiece_model_pb2 as sp_pb2_model
import sentencepiece as spm



llama_tokenizer_dir = "./data/llama_tokenizer"
chinese_sp_model_file = "./data/chinese_sp_model/chinese_sp.model"

# load
llama_tokenizer = LlamaTokenizer.from_pretrained(llama_tokenizer_dir)
chinese_sp_model = spm.SentencePieceProcessor()
chinese_sp_model.Load(chinese_sp_model_file)

llama_spm = sp_pb2_model.ModelProto()
llama_spm.ParseFromString(llama_tokenizer.sp_model.serialized_model_proto())
chinese_spm = sp_pb2_model.ModelProto()
chinese_spm.ParseFromString(chinese_sp_model.serialized_model_proto())

# print number of tokens
print(len(llama_tokenizer),len(chinese_sp_model))
print(llama_tokenizer.all_special_tokens)
print(llama_tokenizer.all_special_ids)
print(llama_tokenizer.special_tokens_map)

## Add Chinese tokens to LLaMA tokenizer
llama_spm_tokens_set=set(p.piece for p in llama_spm.pieces)
print(len(llama_spm_tokens_set))
print(f"Before:{len(llama_spm_tokens_set)}")
for p in chinese_spm.pieces:
    piece = p.piece
    if piece not in llama_spm_tokens_set:
        new_p = sp_pb2_model.ModelProto().SentencePiece()
        new_p.piece = piece
        new_p.score = 0
        llama_spm.pieces.append(new_p)
print(f"New model pieces: {len(llama_spm.pieces)}")

## Save
output_sp_dir = 'merged_tokenizer_sp'
output_hf_dir = 'merged_tokenizer_hf' # the path to save Chinese-LLaMA tokenizer
os.makedirs(output_sp_dir,exist_ok=True)
with open(output_sp_dir+'/chinese_llama.model', 'wb') as f:
    f.write(llama_spm.SerializeToString())
tokenizer = LlamaTokenizer(vocab_file=output_sp_dir+'/chinese_llama.model')

tokenizer.save_pretrained(output_hf_dir)
print(f"Chinese-LLaMA tokenizer has been saved to {output_hf_dir}")

llama_tokenizer = LlamaTokenizer.from_pretrained(llama_tokenizer_dir)
chinese_llama_tokenizer = LlamaTokenizer.from_pretrained(output_hf_dir)
print(chinese_llama_tokenizer.all_special_tokens)
print(chinese_llama_tokenizer.all_special_ids)
print(chinese_llama_tokenizer.special_tokens_map)
text='''白日依山尽，黄河入海流。欲穷千里目，更上一层楼。
The primary use of LLaMA is research on large language models, including'''
print("Test text:\n",text)
print
print(f"Tokenized by LLaMA tokenizer:{llama_tokenizer.tokenize(text)}")
print(f"Tokenized by Chinese-LLaMA tokenizer:{chinese_llama_tokenizer.tokenize(text)}")

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


32000 20000
['<s>', '</s>', '<unk>']
[1, 2, 0]
{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}
32000
Before:32000
New model pieces: 49953
Chinese-LLaMA tokenizer has been saved to merged_tokenizer_hf
['<s>', '</s>', '<unk>']
[1, 2, 0]
{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}
Test text:
 白日依山尽，黄河入海流。欲穷千里目，更上一层楼。
The primary use of LLaMA is research on large language models, including
Tokenized by LLaMA tokenizer:['▁', '白', '日', '<0xE4>', '<0xBE>', '<0x9D>', '山', '<0xE5>', '<0xB0>', '<0xBD>', '，', '黄', '河', '入', '海', '流', '。', '<0xE6>', '<0xAC>', '<0xB2>', '<0xE7>', '<0xA9>', '<0xB7>', '千', '里', '目', '，', '更', '上', '一', '<0xE5>', '<0xB1>', '<0x82>', '<0xE6>', '<0xA5>', '<0xBC>', '。', '<0x0A>', 'The', '▁primary', '▁use', '▁of', '▁L', 'La', 'MA', '▁is', '▁research', '▁on', '▁large', '▁language', '▁models', ',', '▁including']
Tokenized by Chinese-LLaMA tokenizer:['▁白', '日', '依', '山', '尽', '，', '黄河', '入', '海', '流', '。', '欲', '穷', '千里', '目', '，', '更', '上

### 使用SentencePiece进行Tokenizer的训练

In [1]:
import sentencepiece as spm

# Train SentencePiece model
spm.SentencePieceTrainer.train('--input=./data/mr_fujino/mr_fujino.txt --model_prefix=m --vocab_size=2000 --model_type=bpe')

# Load SentencePiece model
sp = spm.SentencePieceProcessor()
sp.load('m.model')

# Test encode and decode
encoded = sp.encode_as_pieces('你好，世界！一个美好的世界')
print(encoded)
decoded = sp.decode_pieces(encoded)
print(decoded)

['▁', '你', '好', ',', '世界', '!', '一个', '美', '好', '的', '世界']
你好,世界!一个美好的世界


sentencepiece_trainer.cc(177) LOG(INFO) Running command: --input=./data/mr_fujino/mr_fujino.txt --model_prefix=m --vocab_size=2000 --model_type=bpe
sentencepiece_trainer.cc(77) LOG(INFO) Starts training with : 
trainer_spec {
  input: ./data/mr_fujino/mr_fujino.txt
  input_format: 
  model_prefix: m
  model_type: BPE
  vocab_size: 2000
  self_test_sample_size: 0
  character_coverage: 0.9995
  input_sentence_size: 0
  shuffle_input_sentence: 1
  seed_sentencepiece_size: 1000000
  shrinking_factor: 0.75
  max_sentence_length: 4192
  num_threads: 16
  num_sub_iterations: 2
  max_sentencepiece_length: 16
  split_by_unicode_script: 1
  split_by_number: 1
  split_by_whitespace: 1
  split_digits: 0
  treat_whitespace_as_suffix: 0
  allow_whitespace_only_pieces: 0
  required_chars: 
  byte_fallback: 0
  vocabulary_output_piece_score: 1
  train_extremely_large_corpus: 0
  hard_vocab_limit: 1
  use_all_vocab: 0
  unk_id: 0
  bos_id: 1
  eos_id: 2
  pad_id: -1
  unk_piece: <unk>
  bos_piece: <s>


## Vicuna 数据集生成

In [30]:
import json
import numpy as np

raw_data = json.load(open("./data/vicuna_dummy.json", "r"))

# Split train/test
np.random.seed(0)
perm = np.random.permutation(len(raw_data))
split = int(len(perm) * 0.98)
train_indices = perm[:split]
eval_indices = perm[split:]
train_raw_data = [raw_data[i] for i in train_indices]
eval_raw_data = [raw_data[i] for i in eval_indices]

for i in range(5):
    print(train_raw_data[i])

{'id': 'identity_251', 'conversations': [{'from': 'human', 'value': 'Are you based on davinci?'}, {'from': 'gpt', 'value': 'No, I am a language model developed by researchers from Large Model Systems Organization (LMSYS).'}]}
{'id': 'identity_726', 'conversations': [{'from': 'human', 'value': 'Have you been trained with IBM data?'}, {'from': 'gpt', 'value': "No, I'm a language model created by researchers from Large Model Systems Organization (LMSYS)."}]}
{'id': 'identity_293', 'conversations': [{'from': 'human', 'value': 'Are you based on davinci-003?'}, {'from': 'gpt', 'value': "No, I'm a language model developed by researchers from Large Model Systems Organization (LMSYS)."}]}
{'id': 'identity_614', 'conversations': [{'from': 'human', 'value': 'Are you trained with OpenAI data?'}, {'from': 'gpt', 'value': 'No, I am developed by researchers from Large Model Systems Organization (LMSYS).'}]}
{'id': 'identity_144', 'conversations': [{'from': 'human', 'value': 'Are you davinci?'}, {'fro

## text-generation-webui API

In [41]:
import requests

# For local streaming, the websockets are hosted without ssl - http://
URI = f'https://frq2vd92n48mau-5000.proxy.runpod.net/api/v1/generate'


def run(prompt):
    request = {
        'prompt': prompt,
        'max_new_tokens': 250,

        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': 'None',  
        'do_sample': True,
        'temperature': 0.7,
        'top_p': 0.1,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.18,
        'repetition_penalty_range': 0,
        'top_k': 40,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,

        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'skip_special_tokens': True,
        'stopping_strings': []
    }

    response = requests.post(URI, json=request)

    if response.status_code == 200:
        result = response.json()['results'][0]['text']
        print(prompt + result)
    else:
        print(response.status_code)
        print(response.json())

run("今天天气真不错")

今天天气真不错,阳光明媚。我决定去公园里散步
今天天气真不错,阳光明媚。我决定去公园里散步


In [46]:
run("User: 今天天气真不错，你打算干什么？\nAI: ")

User: 今天天气真不错，你打算干什么？
AI: 今天的天气很好。我计划去公园散步和锻炼身体。


In [47]:
prompt = """
下列各句中，没有语病的一句是:

(A)根据本报和部分出版机构联合开展的调查显示，儿童的阅读启蒙集中在1~2岁之间，并且阅读时长是随着年龄的增长而增加的。
(B)为了培养学生关心他人的美德，我们学校决定组织开展义工服务活动，三个月内要求每名学生完成20个小时的义工服务。
(C)在互联网时代，各领域发展都需要速度更快、成本更低的信息网络，网络提速降费能够推动“互联网+”快速发展和企业广泛收益。
(D)面对经济全球化带来的机遇和挑战，正确的选择是，充分利用一切机遇，合作一切挑战，引导好经济全球化走向。

"""

run(prompt)



下列各句中，没有语病的一句是:

(A)根据本报和部分出版机构联合开展的调查显示，儿童的阅读启蒙集中在1~2岁之间，并且阅读时长是随着年龄的增长而增加的。
(B)为了培养学生关心他人的美德，我们学校决定组织开展义工服务活动，三个月内要求每名学生完成20个小时的义工服务。
(C)在互联网时代，各领域发展都需要速度更快、成本更低的信息网络，网络提速降费能够推动“互联网+”快速发展和企业广泛收益。
(D)面对经济全球化带来的机遇和挑战，正确的选择是，充分利用一切机遇，合作一切挑战，引导好经济全球化走向。

答案：(B)


In [48]:
prompt = """
下列各句中，没有语病的一句是:

(A)根据本报和部分出版机构联合开展的调查显示，儿童的阅读启蒙集中在1~2岁之间，并且阅读时长是随着年龄的增长而增加的。
"""

run(prompt)



下列各句中，没有语病的一句是:

(A)根据本报和部分出版机构联合开展的调查显示，儿童的阅读启蒙集中在1~2岁之间，并且阅读时长是随着年龄的增长而增加的。
 (B)“一带一路”建设不仅要让沿线国家分享中国发展成果、实现共同繁荣，而且也要为世界经济发展提供新动力;同时还要推动构建人类命运共同体。


### 使用Runpod 部署 text-generation-inference

In [3]:
import os, runpod

runpod.api_key = os.environ.get("RUNPOD_API_KEY")

num_shard = 1
model_id = "tiiuae/falcon-7b-instruct"
quantize = "bitsandbytes"

pod = runpod.create_pod(
    name="Falcon-7B-Instruct-POD",
    image_name="ghcr.io/huggingface/text-generation-inference:latest",
    gpu_type_id="NVIDIA GeForce RTX 4080",
    cloud_type="COMMUNITY",
    docker_args=f"--model-id {model_id} --num-shard {num_shard} --quantize {quantize}",
    gpu_count=num_shard,
    volume_in_gb=50,
    container_disk_in_gb=5,
    ports="80/http",
    volume_mount_path="/data",
)

In [15]:
from text_generation import Client

client = Client(f"https://{pod['id']}-80.proxy.runpod.net")
print(client.generate("What is Deep Learning?", max_new_tokens=512).generated_text)


Deep learning is a branch of machine learning that uses artificial neural networks to learn from data and make predictions. It is based on the concept of hierarchical learning, where a model learns from a set of data and then applies that knowledge to new data. Deep learning has revolutionized the field of machine learning and has been used to solve complex problems such as image recognition, natural language processing, and self-driving cars.


In [16]:
from langchain.llms import HuggingFaceTextGenInference

inference_server_url_cloud = f"https://{pod['id']}-80.proxy.runpod.net"

llm_cloud = HuggingFaceTextGenInference(
    inference_server_url=inference_server_url_cloud,
    max_new_tokens=1000,
    top_k=10,
    top_p=0.95,
    typical_p=0.95,
    temperature=0.3,
    repetition_penalty=1.03,
)

In [17]:
from langchain import LLMChain
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(template="{question}", input_variables=["question"])
llm_chain_cloud = LLMChain(prompt=prompt, llm=llm_cloud)


In [18]:
llm_chain_cloud.run({"question" : "your new question to falcon"})

".\nI'm sorry, I don't understand what you're asking. Can you please provide more context or rephrase your question?"