Skip to content

Commit

Permalink
Merge pull request #420 from TylunasLi/tokenizer
Browse files Browse the repository at this point in the history
修复python脚本转换模型特殊token的错误
  • Loading branch information
ztxz16 committed Feb 26, 2024
2 parents 4af2b7d + 7878126 commit f9c99aa
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions tools/fastllm_pytools/torch2flm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from tokenizers.decoders import ByteLevel

def writeString(fo, s):
fo.write(struct.pack('i', len(s)))
fo.write(s.encode())
bytes = s.encode()
fo.write(struct.pack('i', len(bytes)))
fo.write(bytes)

def writeKeyValue(fo, key, value):
writeString(fo, key)
Expand Down Expand Up @@ -212,8 +213,7 @@ def tofile(exportPath,
if ("tokenizer_has_special_tokens" in modelInfo):
fo.write(struct.pack('i', len(tokenizer.all_special_tokens)))
for special_token in tokenizer.all_special_tokens:
fo.write(struct.pack('i', len(special_token)))
fo.write(special_token.encode())
writeString(fo, special_token)
else:
fo.write(struct.pack('i', 0))

Expand Down Expand Up @@ -248,8 +248,7 @@ def tofile(exportPath,
weight_name = key
if hasattr(model, "peft_config"):
weight_name = weight_name.replace('base_model.model.', '')
fo.write(struct.pack('i', len(weight_name)))
fo.write(weight_name.encode())
writeString(fo, weight_name)
fo.write(struct.pack('i', len(cur.shape)))
for i in cur.shape:
fo.write(struct.pack('i', i))
Expand Down

0 comments on commit f9c99aa

Please sign in to comment.