Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions pkg/aiusechat/aiutil/aiutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// Copyright 2025, Command Line Inc.
// SPDX-License-Identifier: Apache-2.0

package aiutil

import (
"bytes"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"encoding/json"
"fmt"
"strconv"
"strings"

"github.com/wavetermdev/waveterm/pkg/aiusechat/uctypes"
"github.com/wavetermdev/waveterm/pkg/util/utilfn"
)

// ExtractXmlAttribute extracts an attribute value from an XML-like tag.
// Expects double-quoted strings where internal quotes are encoded as ".
// Returns the unquoted value and true if found, or empty string and false if not found or invalid.
func ExtractXmlAttribute(tag, attrName string) (string, bool) {
attrStart := strings.Index(tag, attrName+"=")
if attrStart == -1 {
return "", false
}

pos := attrStart + len(attrName+"=")
start := strings.Index(tag[pos:], `"`)
if start == -1 {
return "", false
}
start += pos

end := strings.Index(tag[start+1:], `"`)
if end == -1 {
return "", false
}
end += start + 1

quotedValue := tag[start : end+1]
value, err := strconv.Unquote(quotedValue)
if err != nil {
return "", false
}

value = strings.ReplaceAll(value, """, `"`)
return value, true
}
Comment on lines +23 to +50
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Potential issues with attribute name matching.

Lines 24 and 29: The current implementation searches for attrName+"=", which could match partial attribute names. For example, searching for "name=" would also match "filename=".

Additionally, the parser expects no whitespace around the = sign. If the XML allows attrName = "value" (with spaces), this would fail.

Consider improving the attribute matching:

-	attrStart := strings.Index(tag, attrName+"=")
+	// Use word boundary to avoid partial matches
+	pattern := attrName + "="
+	attrStart := -1
+	for i := 0; i <= len(tag)-len(pattern); i++ {
+		if strings.HasPrefix(tag[i:], pattern) {
+			// Check if preceded by whitespace or start of string
+			if i == 0 || tag[i-1] == ' ' || tag[i-1] == '\t' {
+				attrStart = i
+				break
+			}
+		}
+	}
 	if attrStart == -1 {
 		return "", false
 	}

Alternatively, verify that the XML format strictly disallows spaces around = and that attribute names don't overlap.

🤖 Prompt for AI Agents
In pkg/aiusechat/aiutil/aiutil.go around lines 23 to 50, the current search for
attrName+"=" can match substrings (e.g. "filename" when looking for "name") and
fails if there are spaces around '='; update the logic to find an attribute by
matching a proper boundary for the name (start of string or whitespace or
tag-char before the name) and allow optional spaces around the '=' (e.g. use a
compiled regex like `(?i)(?:^|\\s)attrName\\s*=\\s*"(.*?)"` with attrName
injected safely) or otherwise scan tokens to locate the attribute name followed
by optional whitespace, an '=', optional whitespace, then a quoted value;
extract the quoted group, unquote/HTML-decode as before, and return that value
and true, returning false on any parse failure.


// GenerateDeterministicSuffix creates an 8-character hash from input strings
func GenerateDeterministicSuffix(inputs ...string) string {
hasher := sha256.New()
for _, input := range inputs {
hasher.Write([]byte(input))
}
hash := hasher.Sum(nil)
return hex.EncodeToString(hash)[:8]
}

// ExtractImageUrl extracts an image URL from either URL field (http/https/data) or raw Data
func ExtractImageUrl(data []byte, url, mimeType string) (string, error) {
if url != "" {
if !strings.HasPrefix(url, "data:") &&
!strings.HasPrefix(url, "http://") &&
!strings.HasPrefix(url, "https://") {
return "", fmt.Errorf("unsupported URL protocol in file part: %s", url)
}
return url, nil
}
if len(data) > 0 {
base64Data := base64.StdEncoding.EncodeToString(data)
return fmt.Sprintf("data:%s;base64,%s", mimeType, base64Data), nil
}
return "", fmt.Errorf("file part missing both url and data")
}

// ExtractTextData extracts text data from either Data field or URL field (data: URLs only)
func ExtractTextData(data []byte, url string) ([]byte, error) {
if len(data) > 0 {
return data, nil
}
if url != "" {
if strings.HasPrefix(url, "data:") {
_, decodedData, err := utilfn.DecodeDataURL(url)
if err != nil {
return nil, fmt.Errorf("failed to decode data URL for text/plain file: %w", err)
}
return decodedData, nil
}
return nil, fmt.Errorf("dropping text/plain file with URL (must be fetched and converted to data)")
}
return nil, fmt.Errorf("text/plain file part missing data")
}

// FormatAttachedTextFile formats a text file attachment with proper encoding and deterministic suffix
func FormatAttachedTextFile(fileName string, textContent []byte) string {
if fileName == "" {
fileName = "untitled.txt"
}

encodedFileName := strings.ReplaceAll(fileName, `"`, "&quot;")
quotedFileName := strconv.Quote(encodedFileName)

textStr := string(textContent)
deterministicSuffix := GenerateDeterministicSuffix(textStr, fileName)
return fmt.Sprintf("<AttachedTextFile_%s file_name=%s>\n%s\n</AttachedTextFile_%s>", deterministicSuffix, quotedFileName, textStr, deterministicSuffix)
}

// FormatAttachedDirectoryListing formats a directory listing attachment with proper encoding and deterministic suffix
func FormatAttachedDirectoryListing(directoryName, jsonContent string) string {
if directoryName == "" {
directoryName = "unnamed-directory"
}

encodedDirName := strings.ReplaceAll(directoryName, `"`, "&quot;")
quotedDirName := strconv.Quote(encodedDirName)

deterministicSuffix := GenerateDeterministicSuffix(jsonContent, directoryName)
return fmt.Sprintf("<AttachedDirectoryListing_%s directory_name=%s>\n%s\n</AttachedDirectoryListing_%s>", deterministicSuffix, quotedDirName, jsonContent, deterministicSuffix)
}

// ConvertDataUserFile converts OpenAI attached file/directory blocks to UIMessagePart
// Returns (found, part) where found indicates if the prefix was matched,
// and part is the converted UIMessagePart (can be nil if parsing failed)
func ConvertDataUserFile(blockText string) (bool, *uctypes.UIMessagePart) {
if strings.HasPrefix(blockText, "<AttachedTextFile_") {
openTagEnd := strings.Index(blockText, "\n")
if openTagEnd == -1 || blockText[openTagEnd-1] != '>' {
return true, nil
}

openTag := blockText[:openTagEnd]
fileName, ok := ExtractXmlAttribute(openTag, "file_name")
if !ok {
return true, nil
}

return true, &uctypes.UIMessagePart{
Type: "data-userfile",
Data: uctypes.UIMessageDataUserFile{
FileName: fileName,
MimeType: "text/plain",
},
}
}

if strings.HasPrefix(blockText, "<AttachedDirectoryListing_") {
openTagEnd := strings.Index(blockText, "\n")
if openTagEnd == -1 || blockText[openTagEnd-1] != '>' {
return true, nil
}

openTag := blockText[:openTagEnd]
directoryName, ok := ExtractXmlAttribute(openTag, "directory_name")
if !ok {
return true, nil
}

return true, &uctypes.UIMessagePart{
Type: "data-userfile",
Data: uctypes.UIMessageDataUserFile{
FileName: directoryName,
MimeType: "directory",
},
}
}

return false, nil
}

func JsonEncodeRequestBody(reqBody any) (bytes.Buffer, error) {
var buf bytes.Buffer
encoder := json.NewEncoder(&buf)
encoder.SetEscapeHTML(false)
err := encoder.Encode(reqBody)
if err != nil {
return buf, err
}
return buf, nil
}
25 changes: 5 additions & 20 deletions pkg/aiusechat/anthropic/anthropic-backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,16 +480,14 @@ func RunAnthropicChatStep(
if rateLimitInfo.PReq == 0 && rateLimitInfo.Req > 0 {
// Premium requests exhausted, but regular requests available
stopReason := &uctypes.WaveStopReason{
Kind: uctypes.StopKindPremiumRateLimit,
RateLimitInfo: rateLimitInfo,
Kind: uctypes.StopKindPremiumRateLimit,
}
return stopReason, nil, rateLimitInfo, nil
}
if rateLimitInfo.Req == 0 {
// All requests exhausted
stopReason := &uctypes.WaveStopReason{
Kind: uctypes.StopKindRateLimit,
RateLimitInfo: rateLimitInfo,
Kind: uctypes.StopKindRateLimit,
}
return stopReason, nil, rateLimitInfo, nil
}
Expand Down Expand Up @@ -590,8 +588,6 @@ func handleAnthropicStreamingResp(
rtnStopReason = &uctypes.WaveStopReason{
Kind: uctypes.StopKindDone,
RawReason: state.stopFromDelta,
MessageID: state.msgID,
Model: state.model,
}
return rtnStopReason, state.rtnMessage
}
Expand Down Expand Up @@ -849,41 +845,30 @@ func handleAnthropicEvent(
switch reason {
case "tool_use":
return nil, &uctypes.WaveStopReason{
Kind: uctypes.StopKindToolUse,
RawReason: reason,
MessageID: state.msgID,
Model: state.model,
ToolCalls: state.toolCalls,
FinishStep: true,
Kind: uctypes.StopKindToolUse,
RawReason: reason,
ToolCalls: state.toolCalls,
}
case "max_tokens":
return nil, &uctypes.WaveStopReason{
Kind: uctypes.StopKindMaxTokens,
RawReason: reason,
MessageID: state.msgID,
Model: state.model,
}
case "refusal":
return nil, &uctypes.WaveStopReason{
Kind: uctypes.StopKindContent,
RawReason: reason,
MessageID: state.msgID,
Model: state.model,
}
case "pause_turn":
return nil, &uctypes.WaveStopReason{
Kind: uctypes.StopKindPauseTurn,
RawReason: reason,
MessageID: state.msgID,
Model: state.model,
}
default:
// end_turn, stop_sequence (treat as end of this call)
return nil, &uctypes.WaveStopReason{
Kind: uctypes.StopKindDone,
RawReason: reason,
MessageID: state.msgID,
Model: state.model,
}
}

Expand Down
Loading
Loading