Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions tools/flaskIfc/flaskIfc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,19 @@
app = Flask(__name__)

port = '/dev/ttyUSB3'
#port = '/dev/ttyUSB2'
baudrate = '921600'
#baudrate = '115200'
exe_path = "/usr/bin/tsi/v0.1.1.tsv31_06_06_2025/bin/"

DEFAULT_REPEAT_PENALTY = 1.5
DEFAULT_BATCH_SIZE = 1024
DEFAULT_TOP_K = 50
DEFAULT_TOP_P = 0.9
DEFAULT_LAST_N = 5
DEFAULT_CONTEXT_LENGTH = 12288
DEFAULT_TEMP = 0.0

@app.route('/')
def index():
return render_template('index.html')
Expand All @@ -25,6 +35,13 @@ def llama_cli_serial_command():
backend = request.args.get('backend')
tokens = request.args.get('tokens')
prompt = request.args.get('prompt')
repeat_penalty = request.args.get('repeat-penalty', DEFAULT_REPEAT_PENALTY)
batch_size = request.args.get('batch-size', DEFAULT_BATCH_SIZE)
top_k = request.args.get('top-k', DEFAULT_TOP_K)
top_p = request.args.get('top-p', DEFAULT_TOP_P)
last_n = request.args.get('last-n', DEFAULT_LAST_N)
context_length = request.args.get('context-length', DEFAULT_CONTEXT_LENGTH)
temp = request.args.get('temp', DEFAULT_TEMP)

# Define the model path (update with actual paths)
model_paths = {
Expand All @@ -51,7 +68,7 @@ def llama_cli_serial_command():
# URL to Test this end point is as follows
# http://10.50.30.167:5001/llama-cli?model=tiny-llama&backend=tSavorite&tokens=5&prompt=Hello+How+are+you
script_path = "./run_llama_cli.sh"
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend}"
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend} {repeat_penalty} {batch_size} {top_k} {top_p} {last_n} {context_length} {temp}"

try:
result = subprocess.run(['python3', 'serial_script.py', port, baudrate, command], capture_output=True, text=True, check=True)
Expand Down Expand Up @@ -167,6 +184,13 @@ def submit():
backend = request.form.get('backend')
tokens = request.form.get('tokens')
prompt = request.form.get('prompt')
repeat_penalty = request.form.get('repeat-penalty', DEFAULT_REPEAT_PENALTY)
batch_size = request.form.get('batch-size', DEFAULT_BATCH_SIZE)
top_k = request.form.get('top-k', DEFAULT_TOP_K)
top_p = request.form.get('top-p', DEFAULT_TOP_P)
last_n = request.form.get('last-n', DEFAULT_LAST_N)
context_length = request.form.get('context-length', DEFAULT_CONTEXT_LENGTH)
temp = request.form.get('temp', DEFAULT_TEMP)

# Define the model path (update with actual paths)
model_paths = {
Expand All @@ -192,7 +216,7 @@ def submit():
#]

script_path = "./run_llama_cli.sh"
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend}"
command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend} {repeat_penalty} {batch_size} {top_k} {top_p} {last_n} {context_length} {temp}"


def run_script():
Expand Down