-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSQL Create Crawler.py
103 lines (85 loc) · 4.79 KB
/
SQL Create Crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"""
SQL Object ID Check Generator
Description:
This script automates the process of scanning through .sql files within specified directories
to identify CREATE statements for various SQL objects (e.g., tables, views, functions, procedures).
It extracts the object names and types from these statements and generates a report that lists
each object's type, name, and the source file. This tool is particularly useful for database
administrators and developers who need to audit SQL scripts for object creation patterns or
ensure that scripts do not inadvertently recreate existing objects in a database.
Usage:
- Update the 'directories' list with the paths to the directories containing your .sql files.
- Specify the 'output_dir' where the output file containing the object ID checks will be saved.
- Ensure the 'encodings' list contains all encodings you wish to try when opening .sql files.
- Run the script. It will process all .sql files in the specified directories and output a
single file in the specified output directory, named with a timestamp indicating when the
script was run.
Features:
- Searches for .sql files in specified directories and their subdirectories.
- Supports multiple file encodings, attempting to read each file with each encoding in the list until successful.
- Identifies CREATE statements for a predefined set of SQL object types and extracts the relevant object names.
- Outputs a comprehensive list of the found objects, including their type, name, and the file they were found in,
to a timestamped file for easy reference and analysis.
Note:
The script assumes that the CREATE
"""
import os
import re
import datetime
def find_sql_files_in_directories(directories):
"""Find all SQL files in the given directories."""
sql_files = []
for directory in directories:
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.sql'):
sql_files.append(os.path.join(root, file))
return sql_files
def try_open_file_with_encodings(file_path, encodings):
"""Attempt to open a file with different encodings until successful."""
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as file:
return file.readlines(), encoding
except UnicodeDecodeError:
continue
raise ValueError(f"Failed to open file {file_path} with any of the specified encodings.")
def find_create_statements(sql_file_paths, output_dir, encodings):
"""Generate object ID checks for CREATE statements in SQL files, handling different encodings."""
create_types = [
"CREATE TYPE", "CREATE TABLE", "CREATE VIEW", "CREATE FUNCTION",
"CREATE SYNONYM", "CREATE PROCEDURE", "CREATE SEQUENCE",
"CREATE TRIGGER", "CREATE CONSTRAINT", "CREATE CLUSTERED INDEX",
"CREATE INDEX", "CREATE NONCLUSTERED INDEX", "CREATE UNIQUE CLUSTERED",
"CREATE UNIQUE NONCLUSTERED", "CREATE CLUSTERED INDEX"
]
object_id_checks = []
for sql_file_path in sql_file_paths:
try:
lines, used_encoding = try_open_file_with_encodings(sql_file_path, encodings)
for line in lines:
if any(create_type in line for create_type in create_types):
match = re.search(r'\[(.*?)\](?:\.\[(.*?)\])?', line)
if match:
full_object_name = '.'.join(filter(None, match.groups())).lower()
object_type = line.strip().split()[1].lower()
filename_no_ext = os.path.splitext(os.path.basename(sql_file_path))[0]
object_id_check = f"{object_type},{full_object_name},{filename_no_ext}\n"
object_id_checks.append(object_id_check)
except Exception as e:
print(f"Error processing {sql_file_path} with encoding {used_encoding}: {e}")
# Output file
output_file_path = os.path.join(output_dir, f"object_id_checks_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.txt")
with open(output_file_path, 'w', encoding='utf-8') as output_file:
output_file.writelines(object_id_checks)
print(f"Output written to {output_file_path}")
encodings = ['utf-8-sig', 'utf-8', 'latin-1']
directories = [
r'C:\b\bamplus-postgres-research-main\bamplus-postgres-research-main\Bam+ Installer\module-packages\BAMPlus.BAM.Bridge\database\new_install_before\\'
# Add other directories as needed
]
output_dir = r'C:\temp\\'
# Find SQL files in directories
sql_file_paths = find_sql_files_in_directories(directories)
# Generate object ID checks
find_create_statements(sql_file_paths, output_dir, encodings)