From 21ba6d11ca8f2c95ed8110d2c570550a1288a43e Mon Sep 17 00:00:00 2001
From: Anoop Kapoor <akapoor@wssw01.tsavoritesi.net>
Date: Wed, 11 Jun 2025 16:49:18 -0700
Subject: [PATCH 1/2] @FIR-732 - Llama.cpp: Webserver & HTML pages support

---
 tools/flaskIfc/flaskIfc.py           | 54 +++++++++++++++++++++-------
 tools/flaskIfc/serial_script.py      |  6 ++--
 tools/flaskIfc/templates/index.html  | 38 ++++++++++++++++++++
 tools/flaskIfc/templates/result.html | 12 +++++++
 4 files changed, 93 insertions(+), 17 deletions(-)
 create mode 100644 tools/flaskIfc/templates/index.html
 create mode 100644 tools/flaskIfc/templates/result.html
diff --git a/tools/flaskIfc/flaskIfc.py b/tools/flaskIfc/flaskIfc.py
index 61187c91a09d4..4d65c9a7ffa0e 100644
--- a/tools/flaskIfc/flaskIfc.py
+++ b/tools/flaskIfc/flaskIfc.py
@@ -1,32 +1,60 @@
-from flask import Flask, request
+from flask import Flask, render_template, request
 import subprocess
 
 app = Flask(__name__)
 
-@app.route('/serial', methods=['GET'])
-def serial_command():
+@app.route('/')
+def index():
+    return render_template('index.html')
+
+@app.route('/submit', methods=['POST'])
+def submit():
+    #./run_platform_test.sh "my cat's name" "10" "tinyllama-vo-5m-para.gguf" "none"
+    model = request.form.get('model')
+    backend = request.form.get('backend')
+    tokens = request.form.get('tokens')
+    prompt = request.form.get('prompt')
+
+    # Define the model path (update with actual paths)
+    model_paths = {
+        "tiny-llama": "tinyllama-vo-5m-para.gguf",
+        "Tiny-llama-F32": "Tiny-Llama-v0.3-FP32-1.1B-F32.gguf"
+    }
+
+    model_path = model_paths.get(model, "")
+    if not model_path:
+        return f"<h2>Error: Model path not found for '{model}'</h2>"
+
+   # Below is for reference i will remove later
+    # Build llama-cli command
+    #command = [
+    #    "./llama-cli",
+    #    "-p", prompt,
+    #    "-m", model_path,
+    #    "--device", backend,
+    #    "--temp", "0",
+    #    "--n-predict", tokens,
+    #    "--repeat-penalty", "1",
+    #    "--top-k", "0",
+    #    "--top-p", "1"
+    #]
     # Currently the port is hard coded to /dev/ttyUSB3 but can be parameterized
     port = '/dev/ttyUSB3'
-    #port = request.args.get('port')
 
     # Currently the baudrate is hard coded to 921600 but can be parameterized
-    #baudrate = request.args.get('baudrate')
     baudrate = '921600'
 
+    script_path = "/usr/bin/tsi/v0.1.1.tsv31_06_06_2025/bin/run_platform_test.sh"
+    command = f"{script_path} \"{prompt}\" {tokens} {model_path} {backend}"
 
-    # Parse the command and send it to serial.py 
-    command = request.args.get('command')
-
-    #if not all([port, baudrate, command]):
-    if not all([command]):
-        return "Missing parameters", 400
 
     try:
         result = subprocess.run(['python3', 'serial_script.py', port, baudrate, command], capture_output=True, text=True, check=True)
-        return result.stdout.strip(), 200
+        output = result.stdout  # This should have \n
     except subprocess.CalledProcessError as e:
-        return f"Error executing script: {e.stderr}", 500
+        output = f"Error running model: {e.stderr}"
 
+    return render_template('result.html', output=output)
 
 if __name__ == '__main__':
     app.run(debug=True, port=5000)
diff --git a/tools/flaskIfc/serial_script.py b/tools/flaskIfc/serial_script.py
index e138d19ab7de0..cde5e0cd54dfc 100644
--- a/tools/flaskIfc/serial_script.py
+++ b/tools/flaskIfc/serial_script.py
@@ -4,19 +4,18 @@
 def send_serial_command(port, baudrate, command):
     try:
         # Open the serial port with 1 second timeout
-        ser = serial.Serial(port, baudrate, timeout=60)
+        ser = serial.Serial(port, baudrate, timeout=20)
 
         ser.write(command.encode())  # Encode command to bytes
         ser.write('\n'.encode())  # Encode command to bytes
         
         # Wait to read the serial port
-        # Need to add a break somewhere for when we see the phrase "root@name"
         data = '\0'
         while True:
             try:
                 line = ser.readline()
                 if line: # Check if line is not empty
-                    data += (line.decode('utf-8').strip()) # Decode and strip to remove extra chars
+                    data += line.decode('utf-8')  # Keep the line as-is with newline
                 else:
                     break  # Exit loop if no data is received
             except serial.SerialException as e:
@@ -42,4 +41,3 @@ def send_serial_command(port, baudrate, command):
     baudrate = int(sys.argv[2])
     command = sys.argv[3]
     response = send_serial_command(port, baudrate, command)
-    print(response)
diff --git a/tools/flaskIfc/templates/index.html b/tools/flaskIfc/templates/index.html
new file mode 100644
index 0000000000000..9152167a86c44
--- /dev/null
+++ b/tools/flaskIfc/templates/index.html
@@ -0,0 +1,38 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>TSAVORITE Web UI For Model Inference</title>
+</head>
+<body>
+    <h1>Model Inference Configuration</h1>
+    <form action="/submit" method="post">
+        <!-- Model Selection -->
+        <label for="model">Choose a model:</label>
+        <select name="model" id="model">
+            <option value="tiny-llama">Tiny LLaMA</option>
+            <option value="Tiny-llama-F32">LLaMA 2 1B</option>
+        </select>
+        <br><br>
+
+        <!-- Backend Selection -->
+        <label for="backend">Select backend:</label>
+        <select name="backend" id="backend">
+            <option value="tSavorite">TSAVORITE</option>
+            <option value="none">CPU</option>
+        </select>
+        <br><br>
+
+        <!-- Number of Tokens -->
+        <label for="tokens">Number of predicted tokens:</label>
+        <input type="number" name="tokens" id="tokens" min="1" max="1000" value="50">
+        <br><br>
+
+        <!-- Prompt Input -->
+        <label for="prompt">Prompt:</label><br>
+        <textarea name="prompt" id="prompt" rows="4" cols="50" placeholder="Enter your prompt here..."></textarea>
+        <br><br>
+
+        <button type="submit">Submit</button>
+    </form>
+</body>
+</html>
diff --git a/tools/flaskIfc/templates/result.html b/tools/flaskIfc/templates/result.html
new file mode 100644
index 0000000000000..07c79c409f596
--- /dev/null
+++ b/tools/flaskIfc/templates/result.html
@@ -0,0 +1,12 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Model Output</title>
+</head>
+<body>
+    <h1>Model Response</h1>
+    <pre>{{ output }}</pre>
+    <br>
+    <a href="/">⟵ Back to Form</a>
+</body>
+</html>

From 597f92800787739d23e95bd5bf700d5e277e9c42 Mon Sep 17 00:00:00 2001
From: Ashish Trivedi <atrivedi@fpga2.tsavoritesi.net>
Date: Wed, 11 Jun 2025 20:15:09 -0700
Subject: [PATCH 2/2] @FIR-732: Added print back to ensure stdout has data

---
 tools/flaskIfc/serial_script.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/flaskIfc/serial_script.py b/tools/flaskIfc/serial_script.py
index cde5e0cd54dfc..0e1064225921f 100644
--- a/tools/flaskIfc/serial_script.py
+++ b/tools/flaskIfc/serial_script.py
@@ -4,7 +4,7 @@
 def send_serial_command(port, baudrate, command):
     try:
         # Open the serial port with 1 second timeout
-        ser = serial.Serial(port, baudrate, timeout=20)
+        ser = serial.Serial(port, baudrate, timeout=60)
 
         ser.write(command.encode())  # Encode command to bytes
         ser.write('\n'.encode())  # Encode command to bytes
@@ -25,6 +25,7 @@ def send_serial_command(port, baudrate, command):
                 ser.close()
                 return ("Program interrupted by user")
         ser.close()
+        print (data)
         return data
 
     except serial.SerialException as e: