1
1
from enum import StrEnum , auto
2
2
from typing import TypeAlias , NamedTuple , Union
3
+ from collections .abc import Sequence
3
4
4
5
from tree_sitter import Parser
5
6
import cedarscript_grammar
@@ -31,14 +32,23 @@ class BodyOrWhole(StrEnum):
31
32
32
33
33
34
MarkerType = StrEnum ('MarkerType' , 'LINE VARIABLE FUNCTION CLASS' )
34
- RelativePositionType = StrEnum ('RelativePositionType' , 'AT BEFORE AFTER INSIDE' )
35
+ RelativePositionType = StrEnum ('RelativePositionType' , 'AT BEFORE AFTER INSIDE_TOP INSIDE_BOTTOM' )
36
+
37
+
38
+ class MarkerCompatible :
39
+ def as_marker (self ) -> 'Marker' :
40
+ pass
35
41
36
42
@dataclass
37
- class Marker :
43
+ class Marker ( MarkerCompatible ) :
38
44
type : MarkerType
39
45
value : str
40
46
offset : int | None = None
41
47
48
+ @property
49
+ def as_marker (self ) -> 'Marker' :
50
+ return self
51
+
42
52
def __str__ (self ):
43
53
result = f"{ self .type .value } '{ self .value } '"
44
54
if self .offset is not None :
@@ -59,7 +69,7 @@ def __str__(self):
59
69
case RelativePositionType .AT :
60
70
pass
61
71
case _:
62
- result = f'{ result } ({ self .qualifier } )'
72
+ result = f'{ result } ({ self .qualifier . replace ( '_' , ' ' ) } )'
63
73
return result
64
74
65
75
@@ -73,8 +83,8 @@ def __str__(self):
73
83
74
84
75
85
MarkerOrSegment : TypeAlias = Marker | Segment
76
- Region : TypeAlias = BodyOrWhole | MarkerOrSegment
77
- RegionOrRelativeMarker : Region | RelativeMarker
86
+ Region : TypeAlias = BodyOrWhole | Marker | Segment
87
+ RegionOrRelativeMarker : BodyOrWhole | Marker | Segment | RelativeMarker
78
88
# <file-or-identifier>
79
89
80
90
@@ -91,13 +101,18 @@ class SingleFileClause:
91
101
92
102
93
103
@dataclass
94
- class IdentifierFromFile (SingleFileClause ):
104
+ class IdentifierFromFile (SingleFileClause , MarkerCompatible ):
95
105
where_clause : WhereClause
96
- identifier_type : str # VARIABLE, FUNCTION, CLASS
106
+ identifier_type : MarkerType # VARIABLE, FUNCTION, CLASS (but not LINE)
97
107
offset : int | None = None
98
108
109
+ @property
110
+ def as_marker (self ) -> Marker :
111
+ # TODO Handle different values for field and operator in where_clause
112
+ return Marker (self .identifier_type , self .where_clause .value , self .offset )
113
+
99
114
def __str__ (self ):
100
- result = f"{ self .identifier_type .lower ()} (self.where_clause)"
115
+ result = f"{ self .identifier_type .lower ()} ({ self .where_clause } )"
101
116
if self .offset is not None :
102
117
result += f" at offset { self .offset } "
103
118
return f"{ result } from file { self .file_path } "
@@ -128,9 +143,13 @@ class DeleteClause(RegionClause):
128
143
129
144
130
145
@dataclass
131
- class InsertClause :
146
+ class InsertClause ( MarkerCompatible ) :
132
147
insert_position : RelativeMarker
133
148
149
+ @property
150
+ def as_marker (self ) -> RelativeMarker :
151
+ return self .insert_position
152
+
134
153
135
154
@dataclass
136
155
class MoveClause (DeleteClause , InsertClause ):
@@ -189,7 +208,7 @@ def files_to_change(self) -> tuple[str, ...]:
189
208
class UpdateCommand (Command ):
190
209
target : FileOrIdentifierWithin
191
210
action : EditingAction
192
- content : str | None = None
211
+ content : str | tuple [ Region , int | None ] | None = None
193
212
194
213
@property
195
214
def files_to_change (self ) -> tuple [str , ...]:
@@ -241,7 +260,7 @@ def __init__(self):
241
260
242
261
243
262
class CEDARScriptASTParser (_CEDARScriptASTParserBase ):
244
- def parse_script (self , code_text : str ) -> tuple [list [Command ], list [ParseError ]]:
263
+ def parse_script (self , code_text : str ) -> tuple [Sequence [Command ], Sequence [ParseError ]]:
245
264
"""
246
265
Parses the CEDARScript code and returns a tuple containing:
247
266
- A list of Command objects if parsing is successful.
@@ -315,14 +334,14 @@ def _collect_parse_errors(self, node, code_text, command_ordinal: int) -> list[P
315
334
errors .extend (self ._collect_parse_errors (child , code_text , command_ordinal ))
316
335
return errors
317
336
318
- def _get_expected_tokens (self , error_node ) -> list [str ]:
337
+ def _get_expected_tokens (self , error_node ) -> tuple [str ]:
319
338
"""
320
339
Provides expected tokens based on the error_node's context.
321
340
"""
322
341
# Since Tree-sitter doesn't provide expected tokens directly,
323
342
# you might need to implement this based on the grammar and error context.
324
343
# For now, we'll return an empty list to simplify.
325
- return []
344
+ return tuple ()
326
345
327
346
def parse_command (self , node ):
328
347
match node .type :
@@ -341,7 +360,7 @@ def parse_command(self, node):
341
360
342
361
def parse_create_command (self , node ):
343
362
file_path = self .parse_singlefile_clause (self .find_first_by_type (node .children , 'singlefile_clause' )).file_path
344
- content = self .parse_content_clause ( self . find_first_by_type ( node . children , 'content_clause' ) )
363
+ content = self .parse_content ( node )
345
364
return CreateCommand (type = 'create' , file_path = file_path , content = content )
346
365
347
366
def parse_rm_file_command (self , node ):
@@ -356,7 +375,7 @@ def parse_mv_file_command(self, node):
356
375
def parse_update_command (self , node ):
357
376
target = self .parse_update_target (node )
358
377
action = self .parse_update_action (node )
359
- content = self .parse_update_content (node )
378
+ content = self .parse_content (node )
360
379
return UpdateCommand (type = 'update' , target = target , action = action , content = content )
361
380
362
381
def parse_update_target (self , node ):
@@ -377,7 +396,7 @@ def parse_update_target(self, node):
377
396
raise ValueError (f"[parse_update_target] Invalid target: { invalid } " )
378
397
379
398
def parse_identifier_from_file (self , node ):
380
- identifier_type = node .children [0 ].type # FUNCTION, CLASS, or VARIABLE
399
+ identifier_type = MarkerType ( node .children [0 ].type . casefold ())
381
400
file_clause = self .find_first_by_type (node .named_children , 'singlefile_clause' )
382
401
where_clause = self .find_first_by_type (node .named_children , 'where_clause' )
383
402
offset_clause = self .find_first_by_type (node .named_children , 'offset_clause' )
@@ -431,7 +450,7 @@ def parse_move_clause(self, node):
431
450
destination = self .find_first_by_type (node .named_children , 'update_move_clause_destination' )
432
451
insert_clause = self .find_first_by_type (destination .named_children , 'insert_clause' )
433
452
insert_clause = self .parse_insert_clause (insert_clause )
434
- rel_indent = self .parse_relative_indentation (self . find_first_by_type ( destination . named_children , 'relative_indentation' ) )
453
+ rel_indent = self .parse_relative_indentation (destination )
435
454
# TODO to_other_file
436
455
return MoveClause (
437
456
region = source ,
@@ -460,7 +479,11 @@ def parse_region(self, node) -> Region:
460
479
node = node .named_children [0 ]
461
480
case 'relpos_bai' :
462
481
node = node .named_children [0 ]
463
- qualifier = RelativePositionType (node .child (0 ).type .casefold ())
482
+ main_type = node .child (0 ).type .casefold ()
483
+ match main_type :
484
+ case 'inside' :
485
+ main_type += '_' + node .child (2 ).type .casefold ()
486
+ qualifier = RelativePositionType (main_type )
464
487
node = node .named_children [0 ]
465
488
case 'relpos_beforeafter' :
466
489
qualifier = RelativePositionType (node .child (0 ).type .casefold ())
@@ -469,14 +492,14 @@ def parse_region(self, node) -> Region:
469
492
node = node .named_children [0 ]
470
493
471
494
match node .type .casefold ():
472
- case 'marker' | 'linemarker' :
495
+ case 'marker' | 'linemarker' | 'identifiermarker' :
473
496
result = self .parse_marker (node )
474
497
case 'segment' :
475
498
result = self .parse_segment (node )
476
499
case BodyOrWhole .BODY | BodyOrWhole .WHOLE as bow :
477
500
result = BodyOrWhole (bow .lower ())
478
- case _ as invalid :
479
- raise ValueError (f"[parse_region] Unexpected node type: { invalid } " )
501
+ case _:
502
+ raise ValueError (f"Unexpected node type: { node . type } " )
480
503
if qualifier :
481
504
result = RelativeMarker (qualifier = qualifier , type = result .type , value = result .value , offset = result .offset )
482
505
return result
@@ -502,16 +525,24 @@ def parse_offset_clause(self, node):
502
525
return None
503
526
return int (self .find_first_by_type (node .children , 'number' ).text )
504
527
505
- def parse_relative_indentation (self , node ):
528
+ def parse_relative_indentation (self , node ) -> int :
529
+ node = self .find_first_by_type (node .named_children , 'relative_indentation' )
506
530
if node is None :
507
531
return None
508
- return int (self .find_first_by_type (node .children , 'number' ).text )
532
+ return int (self .find_first_by_type (node .named_children , 'number' ).text )
533
+
534
+ def parse_content (self , node ) -> str | tuple [Region , int | None ]:
535
+ content = self .find_first_by_type (node .named_children , ['content_clause' , 'content_from_segment' ])
536
+ if not content :
537
+ return None
538
+ match content .type :
539
+ case 'content_clause' :
540
+ return self .parse_content_clause (content ) # str
541
+ case 'content_from_segment' :
542
+ return self .parse_content_from_segment_clause (content ) # tuple[Region, int]
543
+ case _:
544
+ raise ValueError (f"Invalid content type: { content .type } " )
509
545
510
- def parse_update_content (self , node ):
511
- content_clause = self .find_first_by_type (node .children , 'content_clause' )
512
- if content_clause :
513
- return self .parse_content_clause (content_clause )
514
- return None
515
546
516
547
def parse_singlefile_clause (self , node ):
517
548
if node is None or node .type != 'singlefile_clause' :
@@ -521,9 +552,7 @@ def parse_singlefile_clause(self, node):
521
552
raise ValueError ("No file_path found in singlefile_clause" )
522
553
return SingleFileClause (file_path = self .parse_string (path_node ))
523
554
524
- def parse_content_clause (self , node ):
525
- if node is None or node .type != 'content_clause' :
526
- raise ValueError ("Expected content_clause node" )
555
+ def parse_content_clause (self , node ) -> str :
527
556
child_type = ['string' , 'relative_indent_block' , 'multiline_string' ]
528
557
content_node = self .find_first_by_type (node .children , child_type )
529
558
if content_node is None :
@@ -535,6 +564,15 @@ def parse_content_clause(self, node):
535
564
elif content_node .type == 'multiline_string' :
536
565
return self .parse_multiline_string (content_node )
537
566
567
+ def parse_content_from_segment_clause (self , node ) -> tuple [Region , int | None ]:
568
+ child_type = ['marker_or_segment' ]
569
+ content_node = self .find_first_by_type (node .children , child_type )
570
+ # TODO parse relative indentation
571
+ if content_node is None :
572
+ raise ValueError ("No content found in content_from_segment" )
573
+ rel_indent = self .parse_relative_indentation (node )
574
+ return self .parse_region (content_node ), rel_indent
575
+
538
576
def parse_to_value_clause (self , node ):
539
577
if node is None or node .type != 'to_value_clause' :
540
578
raise ValueError ("Expected to_value_clause node" )
@@ -561,7 +599,7 @@ def parse_string(self, node):
561
599
def parse_multiline_string (self , node ):
562
600
return node .text .decode ('utf8' ).strip ("'''" ).strip ('"""' )
563
601
564
- def parse_relative_indent_block (self , node ):
602
+ def parse_relative_indent_block (self , node ) -> str :
565
603
lines = []
566
604
for line_node in node .children :
567
605
if line_node .type == 'relative_indent_line' :
@@ -572,7 +610,7 @@ def parse_relative_indent_block(self, node):
572
610
lines .append (f"{ ' ' * (4 * indent )} { content .text } " )
573
611
return '\n ' .join (lines )
574
612
575
- def find_first_by_type (self , nodes : list [any ], child_type ):
613
+ def find_first_by_type (self , nodes : Sequence [any ], child_type ):
576
614
if isinstance (child_type , list ):
577
615
for child in nodes :
578
616
if child .type in child_type :
0 commit comments