This repository was archived by the owner on Aug 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 138
/
Copy pathtest_ci.py
319 lines (294 loc) · 11.9 KB
/
test_ci.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
"""
Validate that CI and source and docs are in sync
"""
import re
import os
import pathlib
import unittest
import platform
import itertools
import subprocess
from typing import Callable, List, Union
from dffml.plugins import PACKAGE_DIRECTORY_TO_NAME
class IgnoreFile:
"""
Checks if files should be ignored by reading ignore files such as .gitignore
and .dockerignore and parsing their rules.
Examples
--------
>>> import pathlib
>>> from dffml import IgnoreFile
>>>
>>> root = pathlib.Path(".")
>>> root.joinpath(".gitignore").write_text("subdir/**")
>>> root.joinpath("subdir", ".gitignore").mkdir()
>>> root.joinpath("subdir", ".gitignore").write_text("!sub2/**")
>>>
>>> ignorefile = IgnoreFile()
>>> print(ignorefile("subdir/sub2/feedface"))
False
>>> print(ignorefile("subdir/other"))
True
"""
def __init__(
self, root: pathlib.Path, ignore_files: List[str] = [".gitignore"]
):
self.root = root
self.ignore_files = ignore_files
self.compiled_regexes = {}
@staticmethod
def path_to_lines(path: pathlib.Path):
return list(
filter(bool, path.read_text().replace("\r\n", "\n").split("\n"))
)
@staticmethod
def compile_regexes(
contents: List[str],
) -> List[Callable[[str], Union[None, bool, re.Match]]]:
for line in contents:
# Handle the case where we do not want to ignore files matching this
# pattern
do_not_ignore = False
if line.startswith("!"):
line = line[1:]
do_not_ignore = True
# Substitute periods for literal periods
line = line.replace(".", r"\.")
# Substitute * for regex version of *, which is .*
line = line.replace("*", r".*")
# Compile the regex
yield do_not_ignore, re.compile(line)
def __call__(self, filename: str) -> bool:
# Get the absolute file path
filepath = pathlib.Path(filename).absolute()
# Read any ignore files and compile their regexes from the file path up
# to the root of the repo
for ignore_filename in self.ignore_files:
for directory in list(filepath.parents)[
: filepath.parents.index(self.root) + 1
]:
ignore_path = directory / ignore_filename
if (
directory not in self.compiled_regexes
and ignore_path.is_file()
):
self.compiled_regexes[directory] = list(
self.compile_regexes(self.path_to_lines(ignore_path))
)
# Get all applicable regexes by looking through dict of compiled regexes
# and grabbing any that are in the files parents
directories = []
for directory in self.compiled_regexes.keys():
if directory.resolve() in filepath.parents:
directories.append(directory)
# Check if any match
ignore = False
for directory in directories:
for do_not_ignore, regex in self.compiled_regexes[directory]:
if not do_not_ignore and regex.match(
str(filepath.relative_to(directory)).replace(os.sep, "/")
):
ignore = True
# Check if any are supposed to not be ignored even though they match
# other patterns
for directory in directories:
for do_not_ignore, regex in self.compiled_regexes[directory]:
if (
do_not_ignore
and ignore
and regex.match(
str(filepath.relative_to(directory)).replace(
os.sep, "/"
)
)
):
ignore = False
return ignore
class TestGitIgnore(unittest.TestCase):
def test_ignore(self):
ignorefile = IgnoreFile(root=pathlib.Path(__file__).parents[1])
self.assertFalse(ignorefile("setup.py"))
self.assertFalse(ignorefile("dffml/skel/common/setup.py"))
self.assertTrue(ignorefile("dffml/skel/model/setup.py"))
self.assertTrue(
ignorefile(
"examples/shouldi/tests/downloads/cri-resource-manager-download/.gopath/pkg/mod/github.com/apache/thrift@v0.12.0/contrib/fb303/py/setup.py"
)
)
REPO_ROOT = pathlib.Path(__file__).resolve().parents[1]
@unittest.skipUnless(platform.system() == "Linux", "Only runs on Linux")
class TestCI(unittest.TestCase):
maxDiff = None
SKIP_SETUP_PY_FILES = [
REPO_ROOT / "setup.py",
REPO_ROOT / "dffml" / "skel" / "common" / "setup.py",
REPO_ROOT / "build" / "lib" / "dffml" / "skel" / "common" / "setup.py",
REPO_ROOT / "examples" / "source" / "setup.py",
REPO_ROOT
/ "examples"
/ "tutorials"
/ "sources"
/ "file"
/ "dffml-source-ini"
/ "setup.py",
]
def test_all_plugins_appear_in_dffml_plugins(self):
"""
Make sure that any setup.py files associated with a plugin appear in
dffml/plugins.py
"""
ignorefile = IgnoreFile(REPO_ROOT)
# A list of directory tuples, relative to the root of the repo, which
# contain setup.py files. Directories who have setup.py files listed in
# SKIP_SETUP_PY_FILES will not be in this list
setup_py_directories = sorted(
map(
lambda path: path.parent.relative_to(REPO_ROOT).parts,
filter(
lambda path: path not in self.SKIP_SETUP_PY_FILES,
itertools.filterfalse(
ignorefile, REPO_ROOT.rglob("setup.py")
),
),
)
)
self.assertListEqual(
setup_py_directories, sorted(PACKAGE_DIRECTORY_TO_NAME.keys())
)
def test_all_plugins_being_tested(self):
"""
Make sure that plugins are included in the test matrix and therefore
being tested by the CI.
"""
# We compare against PACKAGE_DIRECTORY_TO_NAME as the truth because the
# test_all_plugins_appear_in_dffml_plugins() validates that every
# directory that has a setup.py appears in PACKAGE_DIRECTORY_TO_NAME.
should_be = sorted(
list(
map(
lambda directories: "/".join(directories),
PACKAGE_DIRECTORY_TO_NAME.keys(),
)
)
+ ["."]
)
# Load the ci testing workflow avoid requiring the yaml module as that
# has C dependencies.
# We read the file, split it by lines., filter by lines mentioning PyPi
lines = (
pathlib.Path(REPO_ROOT, ".github", "workflows", "testing.yml",)
.read_text()
.split("\n")
)
# filter by lines mentioning PyPi
# tokens, and make a list of tuples which contain the left hand side of
# the lines '=', split on the '/' character.
# We skip the line which the default TWINE_PASSWORD environment
# variable, since that's for the main package (not any of the plugins).
plugins_tested_by_ci = []
# Once we see plugins: we start adding the subsequent list of plugins to
# our list of plugins tested by CI.
start_adding_plugins = 0
# Go over each line in the YAML file
for line in lines:
if line.strip() == "plugin:":
# Start adding when we see the list of plugins
start_adding_plugins += 1
elif start_adding_plugins and ":" in line:
# If we've reached the next YAML object key we're done adding to
# the list of plugins
break
elif start_adding_plugins:
# Add plugins to list of plugins being tested
# Line is in the format of: "- plugin/path"
plugins_tested_by_ci.append(line.strip().split()[-1])
# Make sure there was only one list
self.assertTrue(plugins_tested_by_ci, "No plugins found!")
self.assertEqual(
start_adding_plugins, 1, "More than one list of plugins found!"
)
# Sort them
plugins_tested_by_ci = sorted(plugins_tested_by_ci)
# Compare to truth
self.assertListEqual(should_be, plugins_tested_by_ci)
def test_all_plugins_have_pypi_tokens(self):
"""
Make sure every plugin is listed with a PyPi API token to enable
automatic releases.
"""
# Load the ci testing workflow avoid requiring the yaml module as that
# has C dependencies.
# We read the file, split it by lines, filter by lines mentioning PyPi
# tokens, and make a list of tuples which contain the left hand side of
# the lines '=', split on the '/' character.
# We skip the line which the default TWINE_PASSWORD environment
# variable, since that's for the main package (not any of the plugins).
# Example:
# model/vowpalWabbit=${{ secrets.PYPI_MODEL_VOWPALWABBIT }}
# This line results in a list entry of: ('model', 'vowpalWabbit')
plugins_with_pypi_tokens = sorted(
map(
lambda i: tuple(i.strip().split("=")[0].split("/")),
filter(
lambda line: "secrets.PYPI_" in line
and not "TWINE_PASSWORD" in line,
pathlib.Path(
REPO_ROOT, ".github", "workflows", "testing.yml"
)
.read_text()
.split("\n"),
),
)
)
# We compare list list to the list of packages dffml.plugins knows
# about, to make sure that every package has a secret so it can be
# auto-deployed to PyPi.
self.assertListEqual(
plugins_with_pypi_tokens, sorted(PACKAGE_DIRECTORY_TO_NAME.keys())
)
class TestSecurity(unittest.TestCase):
"""
Tests to keep our codebase secure
"""
def test_hash_usages(self):
"""
Make sure we've audited everywhere hashlib is used
"""
output = subprocess.check_output(
["git", "grep", "hashlib", "--", "**/*.py"], cwd=str(REPO_ROOT)
).decode()
file_name_to_list_of_lines = {}
for line in filter(bool, output.split("\n")):
filename, line = line.split(":", maxsplit=1)
# Skip this file
if filename == str(
pathlib.Path(__file__).resolve().relative_to(REPO_ROOT)
):
continue
file_name_to_list_of_lines.setdefault(filename, [])
file_name_to_list_of_lines[filename].append(line)
self.maxDiff = None
self.assertDictEqual(
file_name_to_list_of_lines,
{
"dffml/util/crypto.py": [
"import hashlib",
"SECURE_HASH_ALGORITHM = hashlib.sha384",
"INSECURE_HASH_ALGORITHM = hashlib.md5",
],
"dffml/util/file.py": [
" >>> import hashlib",
" >>> expected_sha384_hash = hashlib.sha384(correct_contents).hexdigest()",
],
"feature/auth/dffml_feature_auth/feature/operations.py": [
"import hashlib",
" # ---- BEGIN Python hashlib docs ----",
" # ---- END Python hashlib docs ----",
' hashed_password = hashlib.pbkdf2_hmac("sha384", password, salt, 100000)',
],
"operations/deploy/dffml_operations_deploy/operations.py": [
"import hashlib",
" calculated = hmac.new(key, body, hashlib.sha1).hexdigest()",
],
},
)