@@ -107,7 +107,6 @@ def get_tokens(
107
107
docstring = None
108
108
if ds_begin != - 1 and ds_end != - 1 :
109
109
docstring = code [ds_begin + 3 : ds_end ].strip ()
110
- docstring += " ."
111
110
112
111
# Erase docstring from the code
113
112
if ds_begin != - 1 and ds_end != - 1 :
@@ -159,6 +158,9 @@ def get_previous_comments(
159
158
return precomment
160
159
161
160
161
+ error_counter = 0
162
+
163
+
162
164
def collect_data (
163
165
filename : str ,
164
166
args : argparse .ArgumentParser ) -> List [List [str ]]:
@@ -170,14 +172,29 @@ def collect_data(
170
172
Summarized data from functions.
171
173
is_appropriate: bool
172
174
A flag indicating that the file is appropriate
173
- (enough scope size).
175
+ (enough scope size or no errors in parsing ).
174
176
"""
177
+ global error_counter
178
+
179
+ # Convert Python 2 to Python 3
180
+ os .system (f"~/anaconda3/envs/scs/bin/2to3 { filename } -w -n" )
181
+ print ("Building AST tree from a filename:" , filename )
182
+
175
183
code = read_file_to_string (filename )
184
+
185
+ # let's replace tabs for spaces in the future
186
+ code = re .sub ('\t ' , ' ' * 4 , code )
187
+
176
188
code_lines = code .splitlines ()
177
189
178
- print ("Building AST tree from a filename:" , filename )
179
- atok = asttokens .ASTTokens (code , parse = True )
180
- astree = atok .tree
190
+ try :
191
+ atok = asttokens .ASTTokens (code , parse = True )
192
+ astree = atok .tree
193
+ except IndentationError :
194
+ print ("Files with an error:" , error_counter )
195
+ error_counter += 1
196
+ is_appropriate = False
197
+ return None , is_appropriate
181
198
182
199
data = []
183
200
@@ -208,6 +225,13 @@ def collect_data(
208
225
209
226
function_code = code [fun_begin :fun_end ]
210
227
228
+ # if met @classmethod keyword,
229
+ # should relax tabulation
230
+ start_def = function_code .find ("def" )
231
+ function_code = function_code [start_def :]
232
+ # if start_def > 0:
233
+
234
+
211
235
function_code , tokens , comments , docstring , stopwords_count = \
212
236
get_tokens (function_code )
213
237
@@ -247,6 +271,7 @@ def retrieve_functions_docstrings(
247
271
248
272
preprocess_code = Preprocess ("code" )
249
273
preprocess_comment = Preprocess ("anno" )
274
+ preprocess_docstring = Preprocess ("docs" )
250
275
251
276
comments = []
252
277
docstrings = []
@@ -278,7 +303,11 @@ def retrieve_functions_docstrings(
278
303
279
304
functions .append (code )
280
305
tokens .append (fun_tokens_string )
281
- docstrings .append (docstring )
306
+
307
+ if docstring is not None :
308
+ docstring = preprocess_docstring .clean (docstring ).strip ()
309
+ if len (docstring ) > 0 :
310
+ docstrings .append (docstring )
282
311
283
312
return comments , docstrings , functions , ord_nodes , tokens
284
313
@@ -311,6 +340,11 @@ def set_script_arguments(parser):
311
340
312
341
313
342
def main (args ):
343
+ global error_counter
344
+ # Clear the convertation directory
345
+ if os .path .exists ("converted" ):
346
+ shutil .rmtree ("converted" )
347
+ os .mkdir ("converted" )
314
348
315
349
# Clear the output directory
316
350
directory = args .output_dir
@@ -325,7 +359,7 @@ def main(args):
325
359
dcs_file = open (os .path .join (directory , args .docstrings_file ), "a" )
326
360
print ("Opened output files..." )
327
361
328
- dcs_cnt , comments_cnt , seq_cnt , ast_cnt = 0 , 0 , 0 , 0
362
+ dcs_cnt , comments_cnt , seq_cnt , ast_cnt , file_cnt = 0 , 0 , 0 , 0 , 0
329
363
330
364
for root , _ , fnames in sorted (os .walk (args .dirname )):
331
365
# print("ROOT:", root)
@@ -336,6 +370,8 @@ def main(args):
336
370
filename = os .path .join (root , fname )
337
371
338
372
data , is_appropriate = collect_data (filename , args )
373
+ if not is_appropriate :
374
+ continue
339
375
comments , docstrings , functions , ord_nodes , tokens = \
340
376
retrieve_functions_docstrings (data , args )
341
377
@@ -364,10 +400,13 @@ def main(args):
364
400
ast_file .write (f"{ ast_string } \n " )
365
401
ast_cnt += 1
366
402
403
+ file_cnt += 1
367
404
print ("Updated docstrings count:" , dcs_cnt )
368
405
print ("Updated comment count:" , comments_cnt )
369
406
print ("Updated sequential count:" , seq_cnt )
370
407
print ("Updated AST count:" , ast_cnt )
408
+ print ("Processed/Canceled/Total files:" ,
409
+ f"{ file_cnt } /{ error_counter } /{ file_cnt + error_counter } " )
371
410
print ("~" * 50 )
372
411
373
412
sequence_file .close ()
0 commit comments