@@ -118,16 +118,21 @@ def get_tokens(code: str) -> Tuple[list, int, list]:
118
118
comments = list (processor .comments .values ())
119
119
120
120
stopwords_count = 0
121
+ is_tokenizable = True
121
122
122
- for idx , token in enumerate (
123
- tokenize .tokenize (BytesIO (code .encode ('utf-8' )).readline )):
124
- # Form indices and tokens
125
- if token .string not in TOKENS_STOPWORDS :
126
- # print(f"idx: {idx}, token: {token.string}")
127
- tokens .append (token .string )
128
- else :
129
- stopwords_count += 1
130
- return code , tokens , comments , docstring , stopwords_count
123
+ try :
124
+ for idx , token in enumerate (
125
+ tokenize .tokenize (BytesIO (code .encode ('utf-8' )).readline )):
126
+ # Form indices and tokens
127
+ if token .string not in TOKENS_STOPWORDS :
128
+ # print(f"idx: {idx}, token: {token.string}")
129
+ tokens .append (token .string )
130
+ else :
131
+ stopwords_count += 1
132
+ except tokenize .TokenError :
133
+ is_tokenizable = False
134
+ return None , None , comments , docstring , stopwords_count , is_tokenizable
135
+ return code , tokens , comments , docstring , stopwords_count , is_tokenizable
131
136
132
137
133
138
def get_previous_comments (fun : ast .FunctionDef , code_lines : List [str ]) -> str :
0 commit comments