Skip to content

Commit 3291372

Browse files
authored
vlib,tools: add an arrays.diff module, implement a simple platform independent tool v diff file1.txt file2.txt using it (#24428)
1 parent 1070378 commit 3291372

File tree

5 files changed

+505
-0
lines changed

5 files changed

+505
-0
lines changed

cmd/tools/vdiff.v

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import os
2+
import flag
3+
import term
4+
import arrays.diff
5+
6+
fn main() {
7+
mut fp := flag.new_flag_parser(os.args[1..])
8+
fp.application('v diff')
9+
fp.version('0.0.1')
10+
fp.description('Compare files line by line. Example: `v diff examples/hello_world.v examples/log.v`')
11+
fp.arguments_description('file1 file2')
12+
fp.skip_executable()
13+
fp.limit_free_args_to_at_least(2)!
14+
15+
if fp.bool('help', `h`, false, 'Show this help screen.') {
16+
println(fp.usage())
17+
exit(0)
18+
}
19+
20+
args := fp.finalize() or {
21+
eprintln('Argument error: ${err}')
22+
exit(1)
23+
}
24+
25+
src := os.read_lines(args[0])!
26+
dst := os.read_lines(args[1])!
27+
mut ctx := diff.diff(src, dst)
28+
patch := ctx.generate_patch(
29+
colorful: term.can_show_color_on_stdout()
30+
block_header: true
31+
unified: 3
32+
)
33+
if patch.len > 0 {
34+
print(patch)
35+
exit(1)
36+
}
37+
}

cmd/v/v.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ const external_tools = [
2424
'complete',
2525
'compress',
2626
'cover',
27+
'diff',
2728
'doc',
2829
'doctor',
2930
'download',

examples/diff.v

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
module main
2+
3+
import arrays.diff
4+
import os
5+
6+
// diff_files generate diff for two files.
7+
fn diff_files(src_file string, dst_file string) !string {
8+
src := os.read_lines(src_file)!
9+
dst := os.read_lines(dst_file)!
10+
11+
mut ctx := diff.diff(src, dst)
12+
return ctx.generate_patch(colorful: true, block_header: true)
13+
}
14+
15+
fn main() {
16+
f1 := "Module{
17+
name: 'Foo'
18+
description: 'Awesome V module.'
19+
version: '0.0.0'
20+
dependencies: []
21+
}
22+
"
23+
f2 := "Module{
24+
name: 'foo'
25+
description: 'Awesome V module.'
26+
version: '0.1.0'
27+
license: 'MIT'
28+
dependencies: []
29+
}
30+
"
31+
p1 := 'diff_f1.txt'
32+
p2 := 'diff_f2.txt'
33+
os.write_file(p1, f1)!
34+
os.write_file(p2, f2)!
35+
36+
str := diff_files(p1, p2)!
37+
println(str)
38+
39+
os.rm(p1)!
40+
os.rm(p2)!
41+
}

vlib/arrays/diff/diff.v

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
module diff
2+
3+
import strings
4+
5+
// DiffChange contains one or more deletions or inserts
6+
// at one position in two arrays.
7+
pub struct DiffChange {
8+
pub mut:
9+
a int // position in input a []T
10+
b int // position in input b []T
11+
del int // delete Del elements from input a
12+
ins int // insert Ins elements from input b
13+
}
14+
15+
@[flag]
16+
enum DiffContextFlag {
17+
delete
18+
insert
19+
}
20+
21+
pub struct DiffContext[T] {
22+
mut:
23+
a []T
24+
b []T
25+
flags []DiffContextFlag
26+
max int
27+
// forward and reverse d-path endpoint x components
28+
forward []int
29+
reverse []int
30+
pub mut:
31+
changes []DiffChange
32+
}
33+
34+
// diff returns the difference of two arrays.
35+
pub fn diff[T](a []T, b []T) &DiffContext[T] {
36+
mut c := &DiffContext[T]{
37+
a: a
38+
b: b
39+
}
40+
c.flags = if a.len > b.len {
41+
[]DiffContextFlag{len: a.len}
42+
} else {
43+
[]DiffContextFlag{len: b.len}
44+
}
45+
c.max = a.len + b.len + 1
46+
c.forward = []int{len: 2 * c.max}
47+
c.reverse = []int{len: 2 * c.max}
48+
c.compare(0, 0, a.len, b.len)
49+
c.changes = c.result(a.len, b.len)
50+
return c
51+
}
52+
53+
// A directly conversion from https://github.com/covrom/diff
54+
// Fast diff library for Myers algorithm.
55+
// The algorithm is described in "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, pp. 251-266
56+
@[direct_array_access]
57+
fn (mut c DiffContext[T]) compare(mut_aoffset int, mut_boffset int, mut_alimit int, mut_blimit int) {
58+
mut aoffset := mut_aoffset
59+
mut boffset := mut_boffset
60+
mut alimit := mut_alimit
61+
mut blimit := mut_blimit
62+
// eat common prefix
63+
for aoffset < alimit && boffset < blimit && c.a[aoffset] == c.b[boffset] {
64+
aoffset++
65+
boffset++
66+
}
67+
// eat common suffix
68+
for alimit > aoffset && blimit > boffset && c.a[alimit - 1] == c.b[blimit - 1] {
69+
alimit--
70+
blimit--
71+
}
72+
// both equal or b inserts
73+
if aoffset == alimit {
74+
for boffset < blimit {
75+
c.flags[boffset].set(.insert)
76+
boffset++
77+
}
78+
return
79+
}
80+
// a deletes
81+
if boffset == blimit {
82+
for aoffset < alimit {
83+
c.flags[aoffset].set(.delete)
84+
aoffset++
85+
}
86+
return
87+
}
88+
x, y := c.find_middle_snake(aoffset, boffset, alimit, blimit)
89+
c.compare(aoffset, boffset, x, y)
90+
c.compare(x, y, alimit, blimit)
91+
}
92+
93+
@[direct_array_access]
94+
fn (mut c DiffContext[T]) find_middle_snake(aoffset int, boffset int, alimit int, blimit int) (int, int) {
95+
// midpoints
96+
fmid := aoffset - boffset
97+
rmid := alimit - blimit
98+
// correct offset in d-path slices
99+
foff := c.max - fmid
100+
roff := c.max - rmid
101+
isodd := (rmid - fmid) & 1 != 0
102+
maxd := (alimit - aoffset + blimit - boffset + 2) / 2
103+
c.forward[c.max + 1] = aoffset
104+
c.reverse[c.max - 1] = alimit
105+
mut x, mut y := 0, 0
106+
for d := 0; d <= maxd; d++ {
107+
// forward search
108+
for k := fmid - d; k <= fmid + d; k += 2 {
109+
if k == fmid - d || (k != fmid + d && c.forward[foff + k + 1] > c.forward[foff + k - 1]) {
110+
x = c.forward[foff + k + 1] // down
111+
} else {
112+
x = c.forward[foff + k - 1] + 1 // right
113+
}
114+
y = x - k
115+
for x < alimit && y < blimit && c.a[x] == c.b[y] {
116+
x++
117+
y++
118+
}
119+
c.forward[foff + k] = x
120+
if isodd && k > rmid - d && k < rmid + d {
121+
if c.reverse[roff + k] <= c.forward[foff + k] {
122+
return x, x - k
123+
}
124+
}
125+
}
126+
// reverse search x,y correspond to u,v
127+
for k := rmid - d; k <= rmid + d; k += 2 {
128+
if k == rmid + d || (k != rmid - d && c.reverse[roff + k - 1] < c.reverse[roff + k + 1]) {
129+
x = c.reverse[roff + k - 1] // up
130+
} else {
131+
x = c.reverse[roff + k + 1] - 1 // left
132+
}
133+
y = x - k
134+
for x > aoffset && y > boffset && c.a[x - 1] == c.b[y - 1] {
135+
x--
136+
y--
137+
}
138+
c.reverse[roff + k] = x
139+
if !isodd && k >= fmid - d && k <= fmid + d {
140+
if c.reverse[roff + k] <= c.forward[foff + k] {
141+
// lookup opposite end
142+
x = c.forward[foff + k]
143+
return x, x - k
144+
}
145+
}
146+
}
147+
}
148+
panic('diff.find_middle_snake: should never be reached')
149+
}
150+
151+
@[direct_array_access]
152+
fn (c DiffContext[T]) result(n int, m int) []DiffChange {
153+
mut x, mut y := 0, 0
154+
mut res := []DiffChange{}
155+
for x < n || y < m {
156+
if x < n && y < m && !c.flags[x].has(.delete) && !c.flags[y].has(.insert) {
157+
x++
158+
y++
159+
} else {
160+
mut a := x
161+
mut b := y
162+
for x < n && (y >= m || c.flags[x].has(.delete)) {
163+
x++
164+
}
165+
for y < m && (x >= n || c.flags[y].has(.insert)) {
166+
y++
167+
}
168+
if a < x || b < y {
169+
res << DiffChange{a, b, x - a, y - b}
170+
}
171+
}
172+
}
173+
return res
174+
}
175+
176+
// merge_changes merges neighboring changes smaller than the specified context_lines.
177+
// The changes must be ordered by ascending positions.
178+
@[direct_array_access]
179+
fn (mut c DiffContext[T]) merge_changes(context_lines int) {
180+
if c.changes.len == 0 {
181+
return
182+
}
183+
184+
mut merged := []DiffChange{}
185+
mut current := c.changes[0]
186+
187+
for i in 1 .. c.changes.len {
188+
next := c.changes[i]
189+
if next.a <= current.a + current.del + context_lines {
190+
current = DiffChange{
191+
a: current.a
192+
b: current.b
193+
del: next.a + next.del - current.a
194+
ins: next.b + next.ins - current.b
195+
}
196+
} else {
197+
merged << current
198+
current = next
199+
}
200+
}
201+
merged << current
202+
c.changes = merged
203+
}
204+
205+
@[params]
206+
pub struct DiffGenStrParam {
207+
pub mut:
208+
colorful bool
209+
unified int = 3 // how many context lines before/after diff block
210+
block_header bool // output `@@ -3,4 +3,5 @@` or not
211+
}
212+
213+
// generate_patch generate a diff string of two arrays.
214+
@[direct_array_access]
215+
pub fn (mut c DiffContext[T]) generate_patch(param DiffGenStrParam) string {
216+
mut sb := strings.new_builder(100)
217+
defer { unsafe { sb.free() } }
218+
219+
mut unified := if param.unified < 0 { 0 } else { param.unified }
220+
221+
c.merge_changes(unified)
222+
if c.changes.len == 0 {
223+
return ''
224+
}
225+
226+
mut prev_a_end := 0
227+
mut prev_b_end := 0
228+
229+
for change in c.changes {
230+
ctx_start_a := int_max(prev_a_end, change.a - unified)
231+
ctx_end_a := change.a + change.del + unified
232+
ctx_start_b := int_max(prev_b_end, change.b - unified)
233+
ctx_end_b := change.b + change.ins + unified
234+
235+
if param.block_header {
236+
if param.colorful {
237+
sb.write_string('\033[36m')
238+
}
239+
sb.writeln('@@ -${ctx_start_a + 1},${ctx_end_a - ctx_start_a} +${ctx_start_b + 1},${ctx_end_b - ctx_start_b} @@')
240+
if param.colorful {
241+
sb.write_string('\033[0m')
242+
}
243+
}
244+
245+
c.write_context(mut sb, ctx_start_b, change.b, param)
246+
c.write_change(mut sb, change, param)
247+
c.write_context(mut sb, change.b + change.ins, ctx_end_b, param)
248+
249+
prev_a_end = ctx_end_a
250+
prev_b_end = ctx_end_b
251+
}
252+
253+
return sb.str()
254+
}
255+
256+
@[direct_array_access]
257+
fn (c DiffContext[T]) write_context(mut sb strings.Builder,
258+
start int, end int,
259+
param DiffGenStrParam) {
260+
for i in start .. end {
261+
if i >= c.b.len {
262+
break
263+
}
264+
265+
line := c.b[i].str()
266+
267+
if param.colorful {
268+
sb.writeln('\033[37m${line}\033[0m')
269+
} else {
270+
sb.writeln(line)
271+
}
272+
}
273+
}
274+
275+
@[direct_array_access]
276+
fn (c DiffContext[T]) write_change(mut sb strings.Builder,
277+
change DiffChange,
278+
param DiffGenStrParam) {
279+
for i in change.a .. change.a + change.del {
280+
line := c.a[i].str()
281+
if param.colorful {
282+
sb.writeln('\033[31m-${line}\033[0m')
283+
} else {
284+
sb.writeln('-${line}')
285+
}
286+
}
287+
288+
for i in change.b .. change.b + change.ins {
289+
line := c.b[i].str()
290+
if param.colorful {
291+
sb.writeln('\033[32m+${line}\033[0m')
292+
} else {
293+
sb.writeln('+${line}')
294+
}
295+
}
296+
}

0 commit comments

Comments
 (0)