Skip to content

Commit 9fee51a

Browse files
ErwanSimonettiErwan SIMONETTI
authored andcommitted
feat(diff): get_diff function to retrieve human-readable diffs between differrent sql statements
1 parent a2d884e commit 9fee51a

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed

sql_compare/__init__.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,26 @@ def compare(first_sql: str, second_sql: str) -> bool:
4141
return first_sql_statements == second_sql_statements
4242

4343

44+
def get_diff(
45+
first_sql: str,
46+
second_sql: str,
47+
) -> list[list[list[str]]]:
48+
"""Show the difference between two SQL schemas, ignoring differences due to column order and other non-significant SQL changes."""
49+
first_statements = [Statement(t) for t in sqlparse.parse(first_sql)]
50+
second_statements = [Statement(t) for t in sqlparse.parse(second_sql)]
51+
first_diffs: list[list[str]] = []
52+
second_diffs: list[list[str]] = []
53+
54+
for first, second in itertools.zip_longest(first_statements, second_statements):
55+
if first != second:
56+
first_value = first.value if first else []
57+
second_value = second.value if second else []
58+
first_diffs.append(first_value)
59+
second_diffs.append(second_value)
60+
61+
return sorted([first_diffs, second_diffs])
62+
63+
4464
@dataclasses.dataclass
4565
class Token:
4666
"""Wrapper around `sqlparse.sql.Token`."""
@@ -152,6 +172,21 @@ def statement_type(self) -> str:
152172
# Only one keyword (e.g.: SELECT, INSERT, DELETE, etc.)
153173
return keywords[0]
154174

175+
@property
176+
def value(self) -> list[str]:
177+
"""Return the reconstructed SQL statement from tokens as a list of strings, excluding tokens with a hash that is just a space."""
178+
179+
def process_token(token: Token | TokenList) -> list[str]:
180+
if isinstance(token, TokenList):
181+
return [
182+
t for sub_token in token.tokens for t in process_token(sub_token)
183+
]
184+
if token.hash.strip():
185+
return [token.hash]
186+
return []
187+
188+
return [t for token in self.tokens for t in process_token(token)]
189+
155190

156191
class UnorderedTokenList(TokenList):
157192
"""Unordered token list."""

tests/test_sql_compare.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,138 @@ def test_compare_neq(first_sql: str, second_sql: str) -> None:
184184
def test_statement_type(sql: str, expected_type: str) -> None:
185185
statement = sql_compare.Statement(sqlparse.parse(sql)[0])
186186
assert statement.statement_type == expected_type
187+
188+
189+
@pytest.mark.parametrize(
190+
("first_sql", "second_sql", "expected_diff"),
191+
[
192+
(
193+
"CREATE TABLE foo (id INT PRIMARY KEY)",
194+
"CREATE TABLE foo (id INT UNIQUE)",
195+
[
196+
[["CREATE", "TABLE", "foo", "(", "id", "INT", "PRIMARY KEY"]],
197+
[["CREATE", "TABLE", "foo", "(", "id", "INT", "UNIQUE"]],
198+
],
199+
),
200+
(
201+
"CREATE TYPE public.colors AS ENUM ('RED', 'GREEN', 'BLUE')",
202+
"CREATE TYPE public.colors AS ENUM ('BLUE', 'GREEN', 'RED')",
203+
[[], []],
204+
),
205+
(
206+
"CREATE TYPE public.colors AS ENUM ('RED', 'GREEN', 'BLUE')",
207+
"CREATE TYPE public.colors AS ENUM ('YELLOW', 'BLUE', 'RED')",
208+
[
209+
[
210+
[
211+
"CREATE",
212+
"TYPE",
213+
"public",
214+
".",
215+
"colors",
216+
"AS",
217+
"ENUM",
218+
"(",
219+
"'BLUE'",
220+
",",
221+
"'GREEN'",
222+
",",
223+
"'RED'",
224+
],
225+
],
226+
[
227+
[
228+
"CREATE",
229+
"TYPE",
230+
"public",
231+
".",
232+
"colors",
233+
"AS",
234+
"ENUM",
235+
"(",
236+
"'BLUE'",
237+
",",
238+
"'RED'",
239+
",",
240+
"'YELLOW'",
241+
],
242+
],
243+
],
244+
),
245+
(
246+
"""
247+
CREATE TYPE public.status AS ENUM ('PENDING', 'APPROVED', 'REJECTED');
248+
CREATE TABLE users (id INT, name VARCHAR(100), status public.status);
249+
CREATE INDEX user_status_idx ON users (status);
250+
""",
251+
"""
252+
CREATE TYPE public.status AS ENUM ('PENDING', 'APPROVED', 'ARCHIVED');
253+
CREATE TABLE users (id INT, name VARCHAR(100), status public.status);
254+
CREATE INDEX user_status_idx ON users (status);
255+
CREATE TABLE logs (id INT, message TEXT);
256+
""",
257+
[
258+
[
259+
[
260+
"CREATE",
261+
"TYPE",
262+
"public",
263+
".",
264+
"status",
265+
"AS",
266+
"ENUM",
267+
"(",
268+
"'APPROVED'",
269+
",",
270+
"'ARCHIVED'",
271+
",",
272+
"'PENDING'",
273+
";",
274+
],
275+
[
276+
"CREATE",
277+
"TABLE",
278+
"logs",
279+
"(",
280+
"id",
281+
"INT",
282+
",",
283+
"message",
284+
"TEXT",
285+
";",
286+
],
287+
],
288+
[
289+
[
290+
"CREATE",
291+
"TYPE",
292+
"public",
293+
".",
294+
"status",
295+
"AS",
296+
"ENUM",
297+
"(",
298+
"'APPROVED'",
299+
",",
300+
"'PENDING'",
301+
",",
302+
"'REJECTED'",
303+
";",
304+
],
305+
[],
306+
],
307+
],
308+
),
309+
],
310+
)
311+
def test_get_diff(
312+
first_sql: str,
313+
second_sql: str,
314+
expected_diff: list[list[list[str]]],
315+
) -> None:
316+
result = sql_compare.get_diff(first_sql, second_sql)
317+
assert result == expected_diff
318+
assert sql_compare.get_diff(first_sql, second_sql) == sql_compare.get_diff(
319+
second_sql,
320+
first_sql,
321+
)

0 commit comments

Comments
 (0)