diff --git a/tools/flaskIfc/flaskIfc.py b/tools/flaskIfc/flaskIfc.py index 8d57a069cf3de..34b9fc5970522 100644 --- a/tools/flaskIfc/flaskIfc.py +++ b/tools/flaskIfc/flaskIfc.py @@ -10,9 +10,19 @@ app = Flask(__name__) port = '/dev/ttyUSB3' +#port = '/dev/ttyUSB2' baudrate = '921600' +#baudrate = '115200' exe_path = "/usr/bin/tsi/v0.1.1.tsv31_06_06_2025/bin/" +DEFAULT_REPEAT_PENALTY = 1.5 +DEFAULT_BATCH_SIZE = 1024 +DEFAULT_TOP_K = 50 +DEFAULT_TOP_P = 0.9 +DEFAULT_LAST_N = 5 +DEFAULT_CONTEXT_LENGTH = 12288 +DEFAULT_TEMP = 0.0 + @app.route('/') def index(): return render_template('index.html') @@ -25,6 +35,13 @@ def llama_cli_serial_command(): backend = request.args.get('backend') tokens = request.args.get('tokens') prompt = request.args.get('prompt') + repeat_penalty = request.args.get('repeat-penalty', DEFAULT_REPEAT_PENALTY) + batch_size = request.args.get('batch-size', DEFAULT_BATCH_SIZE) + top_k = request.args.get('top-k', DEFAULT_TOP_K) + top_p = request.args.get('top-p', DEFAULT_TOP_P) + last_n = request.args.get('last-n', DEFAULT_LAST_N) + context_length = request.args.get('context-length', DEFAULT_CONTEXT_LENGTH) + temp = request.args.get('temp', DEFAULT_TEMP) # Define the model path (update with actual paths) model_paths = { @@ -51,7 +68,7 @@ def llama_cli_serial_command(): # URL to Test this end point is as follows # http://10.50.30.167:5001/llama-cli?model=tiny-llama&backend=tSavorite&tokens=5&prompt=Hello+How+are+you script_path = "./run_llama_cli.sh" - command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend}" + command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend} {repeat_penalty} {batch_size} {top_k} {top_p} {last_n} {context_length} {temp}" try: result = subprocess.run(['python3', 'serial_script.py', port, baudrate, command], capture_output=True, text=True, check=True) @@ -167,6 +184,13 @@ def submit(): backend = request.form.get('backend') tokens = request.form.get('tokens') prompt = request.form.get('prompt') + repeat_penalty = request.form.get('repeat-penalty', DEFAULT_REPEAT_PENALTY) + batch_size = request.form.get('batch-size', DEFAULT_BATCH_SIZE) + top_k = request.form.get('top-k', DEFAULT_TOP_K) + top_p = request.form.get('top-p', DEFAULT_TOP_P) + last_n = request.form.get('last-n', DEFAULT_LAST_N) + context_length = request.form.get('context-length', DEFAULT_CONTEXT_LENGTH) + temp = request.form.get('temp', DEFAULT_TEMP) # Define the model path (update with actual paths) model_paths = { @@ -192,7 +216,7 @@ def submit(): #] script_path = "./run_llama_cli.sh" - command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend}" + command = f"cd {exe_path}; {script_path} \"{prompt}\" {tokens} {model_path} {backend} {repeat_penalty} {batch_size} {top_k} {top_p} {last_n} {context_length} {temp}" def run_script():