Skip to content

Commit

Permalink
fix(llm): special tokens and leading space (#1831)
Browse files Browse the repository at this point in the history
  • Loading branch information
pabloogc committed Apr 4, 2024
1 parent 08c4ab1 commit 347be64
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion private_gpt/components/llm/custom/sagemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,12 +243,19 @@ def get_stream():
event_stream = resp["Body"]
start_json = b"{"
stop_token = "<|endoftext|>"
first_token = True

for line in LineIterator(event_stream):
if line != b"" and start_json in line:
data = json.loads(line[line.find(start_json) :].decode("utf-8"))
if data["token"]["text"] != stop_token:
special = data["token"]["special"]
stop = data["token"]["text"] == stop_token
if not special and not stop:
delta = data["token"]["text"]
# trim the leading space for the first token if present
if first_token:
delta = delta.lstrip()
first_token = False
text += delta
yield CompletionResponse(delta=delta, text=text, raw=data)

Expand Down

0 comments on commit 347be64

Please sign in to comment.