forked from tensorflow/docs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnb_code_sync.py
executable file
·206 lines (174 loc) · 7.05 KB
/
nb_code_sync.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python3
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keep translated notebook code in sync with the source-of-truth notebook.
This tool attempts to make it easier to keep the community translation *code*
in sync with the en/ source-or-truth notebooks. It intentionally ignores
Markdown cells and only compares code cells. There must be the same amount of
code cells in source notebook and translation notebook.
Usage: nb_code_sync.py [--lang=en] site/lang/notebook.ipynb [...]
Useful when used with interactive git workflow to selectively add hunks:
git add --patch site/lang/notebook.ipynb
Commands:
y: stage this hunk
n: do not stage this hunk
s: split this hunk
e: edit this hunk
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import errno
import json
import os
import pathlib
import re
import sys
from absl import app
from absl import flags
flags.DEFINE_enum("lang", "en", ["en", "js", "ko", "pt", "ru", "tr", "zh-cn"],
"Language directory to import from.")
flags.DEFINE_string("src", None, "Source file or parent directory of source.")
flags.DEFINE_boolean("stdout", False, "Write to stdout instead of file.")
flags.DEFINE_string("site_root", None, "Root directory of site docs.")
class Notebook(object):
"""Represents a parsed .ipynb notebook file.
Attributes:
path: Path to the notebook file.
data: All cells parsed from notebook.
code_cells: Only code cells parsed from notebook.
"""
path = None
def __init__(self, data):
"""Inits Notebook from parsed .ipynb notebook data."""
self.data = data
self.code_cells = self._load_code_cells(self.data)
@classmethod
def from_path(cls, path):
"""Inits Notebook using path to .pynb file."""
pth = Notebook._check_path(path)
with open(pth) as json_data:
data = json.load(json_data)
nb = Notebook(data)
nb.path = pth
return nb
@staticmethod
def is_notebook(path):
"""Test of a file is an .ipynb file based on extension."""
if not os.path.isfile(path):
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
return os.path.splitext(path)[-1].lower() == ".ipynb"
@staticmethod
def _check_path(pth):
if not Notebook.is_notebook(pth):
raise Exception("Notebook must be an .ipynb file: {}".format(pth))
path = pathlib.Path(pth)
if not path.exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path)
return path
def _load_code_cells(self, data):
# parse code cells
code_cells = [c for c in data["cells"] if c["cell_type"] == "code"]
# Discard last cell if empty
cell_source = code_cells[-1]["source"]
# remove empty strings, then test if anything is left
if not any(cell_source):
del code_cells[-1]
return code_cells
@staticmethod
def _strip_line(line):
"""Remove comments and any trailing whitespace."""
line = re.sub(r"^(.*?)#(.*)$", r"\1", line)
return line.rstrip()
@staticmethod
def _is_source_code_equal(x_list, y_list):
"""Scrub lines of comments, remove empty lines, then compare."""
x_list = [Notebook._strip_line(line) for line in x_list if line]
y_list = [Notebook._strip_line(line) for line in y_list if line]
return x_list == y_list
def _set_cell_source(self, cell_id, source):
for i, cell in enumerate(self.data["cells"]):
if cell["metadata"]["id"] == cell_id:
self.data["cells"][i]["source"] = source
break
else:
# for-loop exhausted
raise Exception("Did not find cell id '{}' in notebook.".format(cell_id))
def update(self, notebook):
"""Update code cells that differ from the provided notebook."""
if len(self.code_cells) != len(notebook.code_cells):
raise Exception("Notebooks must have same amount of code cells.")
# Iterate all cells for destination reference
for i, src_cell in enumerate(notebook.code_cells):
dest_cell = self.code_cells[i]
# Compare source code after scrubbing comments.
# Ensures translated comments are preserved until the code changes.
if not Notebook._is_source_code_equal(src_cell["source"],
dest_cell["source"]):
self._set_cell_source(dest_cell["metadata"]["id"], src_cell["source"])
def write(self, use_stdout=False):
"""Write notebook to file or print to screen."""
def print_file(outfile):
json.dump(self.data, outfile, indent=2, ensure_ascii=False)
outfile.write("\n") # add trailing newline
if use_stdout:
print_file(sys.stdout)
else:
with open(self.path, "w") as outfile:
print_file(outfile)
print("Wrote: {}".format(self.path))
def get_src_path(user_flags, notebook):
"""Get path of source notebook based on user flags or the destination file.
Args:
user_flags: Command-line arguments
notebook: Destination notebook used to select source notebook.
Returns:
A Path of the source-of-truth notebook.
Raises:
FileNotFoundError: If user args for site_root or src are invalid locations.
"""
if user_flags.site_root:
site_root = pathlib.Path(user_flags.site_root)
else:
site_root = pathlib.Path(__file__).parent.parent.joinpath("site")
if not site_root.is_dir():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), site_root)
if not user_flags.src:
# Determine path from previous notebook and source language
fp_relpath = notebook.path.relative_to(site_root) # relative path
fp_relpath = pathlib.Path(*fp_relpath.parts[1:])
return site_root.joinpath(user_flags.lang, fp_relpath)
elif os.path.isdir(user_flags.src):
return pathlib.Path(user_flags.src) / notebook.path.name
elif os.path.isfile(user_flags.src):
return pathlib.Path(user_flags.src)
else:
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
user_flags.src)
def main(argv):
if len(argv) < 2:
raise app.UsageError("Missing command-line arguments.")
for dest_path in argv[1:]:
if not Notebook.is_notebook(dest_path):
print("Not a notebook file, skipping: {}".format(dest_path),
file=sys.stderr)
continue
dest_notebook = Notebook.from_path(dest_path)
src_path = get_src_path(flags.FLAGS, dest_notebook)
src_notebook = Notebook.from_path(src_path)
dest_notebook.update(src_notebook)
dest_notebook.write(flags.FLAGS.stdout)
if __name__ == "__main__":
app.run(main)