From dd13119abb9ad6c1f1e96de6b130ae9a3049ea45 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Sun, 10 May 2026 13:08:07 +0200 Subject: [PATCH 1/2] fix(xml): verify nested closing tag names in readContent Maintain a tag stack inside XmlParser.readContent so that every closing tag of a nested element is validated against the name of the element that opened it. Previously the name was read and silently discarded, accepting malformed XML like text without error. Closes #134 --- build.zig | 9 +++++++++ src/xml.zig | 41 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/build.zig b/build.zig index 63badc2..87d7bb2 100644 --- a/build.zig +++ b/build.zig @@ -1362,6 +1362,15 @@ pub fn build(b: *std.Build) void { test_disk_output.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_disk_output.step); + // Integration test 132: mismatched nested closing tag in XML column content → non-zero exit + const test_xml_mismatched_tags = b.addSystemCommand(&.{ + "bash", "-c", + \\printf 'text' \ + \\ | ./zig-out/bin/sql-pipe -I xml 'SELECT * FROM t' 2>/dev/null; test $? -ne 0 + }); + test_xml_mismatched_tags.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_mismatched_tags.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/src/xml.zig b/src/xml.zig index 525b5f6..37b4ec2 100644 --- a/src/xml.zig +++ b/src/xml.zig @@ -393,6 +393,10 @@ pub const XmlParser = struct { const start = self.pos; var depth: usize = 0; var has_nested = false; + // Stack of open nested element names (slices into self.data — no allocation per entry). + // Invariant: tag_stack.items.len == depth at all times. + var tag_stack: std.ArrayList([]const u8) = .empty; + defer tag_stack.deinit(allocator); // Loop invariant: depth = number of unclosed nested elements // Bounding function: self.data.len - self.pos (finite input) @@ -428,20 +432,26 @@ pub const XmlParser = struct { if (!has_nested) return decodeEntities(allocator, raw); return allocator.dupe(u8, raw); } - // Closing tag of a nested element + // Closing tag of a nested element — verify name matches the open tag on the stack depth -= 1; self.advance(); self.advance(); // "') self.advance(); + const expected = tag_stack.pop().?; + if (!std.mem.eql(u8, close_name, expected)) + self.fatalAt("mismatched closing tag: expected '' but found ''", err_writer, .{ expected, close_name }); } else { // Opening tag of a nested element has_nested = true; self.advance(); // '<' - _ = self.readName(err_writer); + const nested_name = self.readName(err_writer); const self_closing = self.skipAttrsClose(err_writer); - if (!self_closing) depth += 1; + if (!self_closing) { + depth += 1; + try tag_stack.append(allocator, nested_name); + } } } self.fatalAt("unexpected end of input: unclosed element '{s}'", err_writer, .{elem_name}); @@ -1155,6 +1165,29 @@ test "XmlParser.navigateToRoot: handles text nodes between siblings" { try std.testing.expectEqualStrings("", p.data[p.pos..]); } +test "XmlParser.readContent: properly matched nested tags are accepted" { + const allocator = std.testing.allocator; + var err_buf: [256]u8 = undefined; + var err_writer: std.Io.Writer = .fixed(&err_buf); + + // Deeply nested content with correctly matched tags — stack must track them all + const input = "text"; + var p = XmlParser.init(input); + p.skipPrologue(&err_writer); + const root = p.readRootOpen(&err_writer); + + const cols = try p.nextRow(allocator, root, null, &err_writer); + try std.testing.expect(cols != null); + defer { + for (cols.?) |col| if (col.value) |v| allocator.free(v); + allocator.free(cols.?); + } + try std.testing.expectEqual(@as(usize, 1), cols.?.len); + try std.testing.expectEqualStrings("col", cols.?[0].name); + // Mixed/nested content is returned as raw XML substring + try std.testing.expectEqualStrings("text", cols.?[0].value.?); +} + test "XmlParser.nextRow: row_tag_filter skips non-matching elements" { const allocator = std.testing.allocator; var err_buf: [256]u8 = undefined; From 0352283e928251f8ce50021cf7cd09d2bfede3d1 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Sun, 10 May 2026 13:34:51 +0200 Subject: [PATCH 2/2] fix(xml): add safety comment on pop().? in readContent --- src/xml.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xml.zig b/src/xml.zig index 37b4ec2..619628a 100644 --- a/src/xml.zig +++ b/src/xml.zig @@ -439,7 +439,7 @@ pub const XmlParser = struct { const close_name = self.readName(err_writer); self.skipWs(); if (self.peek() == '>') self.advance(); - const expected = tag_stack.pop().?; + const expected = tag_stack.pop().?; // safe: every closing tag at depth>0 was preceded by an opening push if (!std.mem.eql(u8, close_name, expected)) self.fatalAt("mismatched closing tag: expected '' but found ''", err_writer, .{ expected, close_name }); } else {