/
syntax.c
3508 lines (3101 loc) · 99.4 KB
/
syntax.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of GNU Emacs.
GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <ctype.h>
#include <setjmp.h>
#include "lisp.h"
#include "commands.h"
#include "buffer.h"
#include "character.h"
#include "keymap.h"
#include "regex.h"
/* Make syntax table lookup grant data in gl_state. */
#define SYNTAX_ENTRY_VIA_PROPERTY
#include "syntax.h"
#include "intervals.h"
/* We use these constants in place for comment-style and
string-ender-char to distinguish comments/strings started by
comment_fence and string_fence codes. */
#define ST_COMMENT_STYLE (256 + 1)
#define ST_STRING_STYLE (256 + 2)
#include "category.h"
Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
int words_include_escapes;
int parse_sexp_lookup_properties;
/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
int multibyte_syntax_as_symbol;
/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
if not compiled with GCC. No need to mark it, since it is used
only very temporarily. */
Lisp_Object syntax_temp;
/* Non-zero means an open parenthesis in column 0 is always considered
to be the start of a defun. Zero means an open parenthesis in
column 0 has no special meaning. */
int open_paren_in_column_0_is_defun_start;
/* This is the internal form of the parse state used in parse-partial-sexp. */
struct lisp_parse_state
{
int depth; /* Depth at end of parsing. */
int instring; /* -1 if not within string, else desired terminator. */
int incomment; /* -1 if in unnestable comment else comment nesting */
int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
int quoted; /* Nonzero if just after an escape char at end of parsing */
int mindepth; /* Minimum depth seen while scanning. */
/* Char number of most recent start-of-expression at current level */
EMACS_INT thislevelstart;
/* Char number of start of containing expression */
EMACS_INT prevlevelstart;
EMACS_INT location; /* Char number at which parsing stopped. */
EMACS_INT comstr_start; /* Position of last comment/string starter. */
Lisp_Object levelstarts; /* Char numbers of starts-of-expression
of levels (starting from outermost). */
};
/* These variables are a cache for finding the start of a defun.
find_start_pos is the place for which the defun start was found.
find_start_value is the defun start position found for it.
find_start_value_byte is the corresponding byte position.
find_start_buffer is the buffer it was found in.
find_start_begv is the BEGV value when it was found.
find_start_modiff is the value of MODIFF when it was found. */
static EMACS_INT find_start_pos;
static EMACS_INT find_start_value;
static EMACS_INT find_start_value_byte;
static struct buffer *find_start_buffer;
static EMACS_INT find_start_begv;
static int find_start_modiff;
static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
static void scan_sexps_forward P_ ((struct lisp_parse_state *,
EMACS_INT, EMACS_INT, EMACS_INT, int,
int, Lisp_Object, int));
static int in_classes P_ ((int, Lisp_Object));
struct gl_state_s gl_state; /* Global state of syntax parser. */
INTERVAL interval_of ();
#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
to scan to property-change. */
/* Update gl_state to an appropriate interval which contains CHARPOS. The
sign of COUNT give the relative position of CHARPOS wrt the previously
valid interval. If INIT, only [be]_property fields of gl_state are
valid at start, the rest is filled basing on OBJECT.
`gl_state.*_i' are the intervals, and CHARPOS is further in the search
direction than the intervals - or in an interval. We update the
current syntax-table basing on the property of this interval, and
update the interval to start further than CHARPOS - or be
NULL_INTERVAL. We also update lim_property to be the next value of
charpos to call this subroutine again - or be before/after the
start/end of OBJECT. */
void
update_syntax_table (charpos, count, init, object)
int charpos, count, init;
Lisp_Object object;
{
Lisp_Object tmp_table;
int cnt = 0, invalidate = 1;
INTERVAL i;
if (init)
{
gl_state.old_prop = Qnil;
gl_state.start = gl_state.b_property;
gl_state.stop = gl_state.e_property;
i = interval_of (charpos, object);
gl_state.backward_i = gl_state.forward_i = i;
invalidate = 0;
if (NULL_INTERVAL_P (i))
return;
/* interval_of updates only ->position of the return value, so
update the parents manually to speed up update_interval. */
while (!NULL_PARENT (i))
{
if (AM_RIGHT_CHILD (i))
INTERVAL_PARENT (i)->position = i->position
- LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
- TOTAL_LENGTH (INTERVAL_PARENT (i))
+ LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
else
INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
+ TOTAL_LENGTH (i);
i = INTERVAL_PARENT (i);
}
i = gl_state.forward_i;
gl_state.b_property = i->position - gl_state.offset;
gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
goto update;
}
i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
/* We are guaranteed to be called with CHARPOS either in i,
or further off. */
if (NULL_INTERVAL_P (i))
error ("Error in syntax_table logic for to-the-end intervals");
else if (charpos < i->position) /* Move left. */
{
if (count > 0)
error ("Error in syntax_table logic for intervals <-");
/* Update the interval. */
i = update_interval (i, charpos);
if (INTERVAL_LAST_POS (i) != gl_state.b_property)
{
invalidate = 0;
gl_state.forward_i = i;
gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
}
}
else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
{
if (count < 0)
error ("Error in syntax_table logic for intervals ->");
/* Update the interval. */
i = update_interval (i, charpos);
if (i->position != gl_state.e_property)
{
invalidate = 0;
gl_state.backward_i = i;
gl_state.b_property = i->position - gl_state.offset;
}
}
update:
tmp_table = textget (i->plist, Qsyntax_table);
if (invalidate)
invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
if (invalidate) /* Did not get to adjacent interval. */
{ /* with the same table => */
/* invalidate the old range. */
if (count > 0)
{
gl_state.backward_i = i;
gl_state.b_property = i->position - gl_state.offset;
}
else
{
gl_state.forward_i = i;
gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
}
}
if (!EQ (tmp_table, gl_state.old_prop))
{
gl_state.current_syntax_table = tmp_table;
gl_state.old_prop = tmp_table;
if (EQ (Fsyntax_table_p (tmp_table), Qt))
{
gl_state.use_global = 0;
}
else if (CONSP (tmp_table))
{
gl_state.use_global = 1;
gl_state.global_code = tmp_table;
}
else
{
gl_state.use_global = 0;
gl_state.current_syntax_table = current_buffer->syntax_table;
}
}
while (!NULL_INTERVAL_P (i))
{
if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
{
if (count > 0)
{
gl_state.e_property = i->position - gl_state.offset;
gl_state.forward_i = i;
}
else
{
gl_state.b_property
= i->position + LENGTH (i) - gl_state.offset;
gl_state.backward_i = i;
}
return;
}
else if (cnt == INTERVALS_AT_ONCE)
{
if (count > 0)
{
gl_state.e_property
= i->position + LENGTH (i) - gl_state.offset
/* e_property at EOB is not set to ZV but to ZV+1, so that
we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
having to check eob between the two. */
+ (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
gl_state.forward_i = i;
}
else
{
gl_state.b_property = i->position - gl_state.offset;
gl_state.backward_i = i;
}
return;
}
cnt++;
i = count > 0 ? next_interval (i) : previous_interval (i);
}
eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
if (count > 0)
gl_state.e_property = gl_state.stop;
else
gl_state.b_property = gl_state.start;
}
/* Returns TRUE if char at CHARPOS is quoted.
Global syntax-table data should be set up already to be good at CHARPOS
or after. On return global syntax data is good for lookup at CHARPOS. */
static int
char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
{
register enum syntaxcode code;
register EMACS_INT beg = BEGV;
register int quoted = 0;
EMACS_INT orig = charpos;
while (charpos > beg)
{
int c;
DEC_BOTH (charpos, bytepos);
UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
code = SYNTAX (c);
if (! (code == Scharquote || code == Sescape))
break;
quoted = !quoted;
}
UPDATE_SYNTAX_TABLE (orig);
return quoted;
}
/* Return the bytepos one character after BYTEPOS.
We assume that BYTEPOS is not at the end of the buffer. */
INLINE EMACS_INT
inc_bytepos (bytepos)
EMACS_INT bytepos;
{
if (NILP (current_buffer->enable_multibyte_characters))
return bytepos + 1;
INC_POS (bytepos);
return bytepos;
}
/* Return the bytepos one character before BYTEPOS.
We assume that BYTEPOS is not at the start of the buffer. */
INLINE EMACS_INT
dec_bytepos (bytepos)
EMACS_INT bytepos;
{
if (NILP (current_buffer->enable_multibyte_characters))
return bytepos - 1;
DEC_POS (bytepos);
return bytepos;
}
/* Return a defun-start position before POS and not too far before.
It should be the last one before POS, or nearly the last.
When open_paren_in_column_0_is_defun_start is nonzero,
only the beginning of the buffer is treated as a defun-start.
We record the information about where the scan started
and what its result was, so that another call in the same area
can return the same value very quickly.
There is no promise at which position the global syntax data is
valid on return from the subroutine, so the caller should explicitly
update the global data. */
static EMACS_INT
find_defun_start (pos, pos_byte)
EMACS_INT pos, pos_byte;
{
EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
if (!open_paren_in_column_0_is_defun_start)
{
find_start_value_byte = BEGV_BYTE;
return BEGV;
}
/* Use previous finding, if it's valid and applies to this inquiry. */
if (current_buffer == find_start_buffer
/* Reuse the defun-start even if POS is a little farther on.
POS might be in the next defun, but that's ok.
Our value may not be the best possible, but will still be usable. */
&& pos <= find_start_pos + 1000
&& pos >= find_start_value
&& BEGV == find_start_begv
&& MODIFF == find_start_modiff)
return find_start_value;
/* Back up to start of line. */
scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
/* We optimize syntax-table lookup for rare updates. Thus we accept
only those `^\s(' which are good in global _and_ text-property
syntax-tables. */
gl_state.current_syntax_table = current_buffer->syntax_table;
gl_state.use_global = 0;
while (PT > BEGV)
{
int c;
/* Open-paren at start of line means we may have found our
defun-start. */
c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
if (SYNTAX (c) == Sopen)
{
SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
if (SYNTAX (c) == Sopen)
break;
/* Now fallback to the default value. */
gl_state.current_syntax_table = current_buffer->syntax_table;
gl_state.use_global = 0;
}
/* Move to beg of previous line. */
scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
}
/* Record what we found, for the next try. */
find_start_value = PT;
find_start_value_byte = PT_BYTE;
find_start_buffer = current_buffer;
find_start_modiff = MODIFF;
find_start_begv = BEGV;
find_start_pos = pos;
TEMP_SET_PT_BOTH (opoint, opoint_byte);
return find_start_value;
}
/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
static int
prev_char_comend_first (pos, pos_byte)
int pos, pos_byte;
{
int c, val;
DEC_BOTH (pos, pos_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (pos);
c = FETCH_CHAR (pos_byte);
val = SYNTAX_COMEND_FIRST (c);
UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
return val;
}
/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
/* static int
* prev_char_comstart_first (pos, pos_byte)
* int pos, pos_byte;
* {
* int c, val;
*
* DEC_BOTH (pos, pos_byte);
* UPDATE_SYNTAX_TABLE_BACKWARD (pos);
* c = FETCH_CHAR (pos_byte);
* val = SYNTAX_COMSTART_FIRST (c);
* UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
* return val;
* } */
/* Checks whether charpos FROM is at the end of a comment.
FROM_BYTE is the bytepos corresponding to FROM.
Do not move back before STOP.
Return a positive value if we find a comment ending at FROM/FROM_BYTE;
return -1 otherwise.
If successful, store the charpos of the comment's beginning
into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
Global syntax data remains valid for backward search starting at
the returned value (or at FROM, if the search was not successful). */
static int
back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
EMACS_INT from, from_byte, stop;
int comnested, comstyle;
EMACS_INT *charpos_ptr, *bytepos_ptr;
{
/* Look back, counting the parity of string-quotes,
and recording the comment-starters seen.
When we reach a safe place, assume that's not in a string;
then step the main scan to the earliest comment-starter seen
an even number of string quotes away from the safe place.
OFROM[I] is position of the earliest comment-starter seen
which is I+2X quotes from the comment-end.
PARITY is current parity of quotes from the comment end. */
int string_style = -1; /* Presumed outside of any string. */
int string_lossage = 0;
/* Not a real lossage: indicates that we have passed a matching comment
starter plus a non-matching comment-ender, meaning that any matching
comment-starter we might see later could be a false positive (hidden
inside another comment).
Test case: { a (* b } c (* d *) */
int comment_lossage = 0;
EMACS_INT comment_end = from;
EMACS_INT comment_end_byte = from_byte;
EMACS_INT comstart_pos = 0;
EMACS_INT comstart_byte;
/* Place where the containing defun starts,
or 0 if we didn't come across it yet. */
EMACS_INT defun_start = 0;
EMACS_INT defun_start_byte = 0;
register enum syntaxcode code;
int nesting = 1; /* current comment nesting */
int c;
int syntax = 0;
/* FIXME: A }} comment-ender style leads to incorrect behavior
in the case of {{ c }}} because we ignore the last two chars which are
assumed to be comment-enders although they aren't. */
/* At beginning of range to scan, we're outside of strings;
that determines quote parity to the comment-end. */
while (from != stop)
{
int temp_byte, prev_syntax;
int com2start, com2end;
/* Move back and examine a character. */
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
prev_syntax = syntax;
c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
syntax = SYNTAX_WITH_FLAGS (c);
code = SYNTAX (c);
/* Check for 2-char comment markers. */
com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
&& SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
&& comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
&& (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
|| SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
&& SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
/* Nasty cases with overlapping 2-char comment markers:
- snmp-mode: -- c -- foo -- c --
--- c --
------ c --
- c-mode: *||*
|* *|* *|
|*| |* |*|
/// */
/* If a 2-char comment sequence partly overlaps with another,
we don't try to be clever. */
if (from > stop && (com2end || com2start))
{
int next = from, next_byte = from_byte, next_c, next_syntax;
DEC_BOTH (next, next_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (next);
next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
next_syntax = SYNTAX_WITH_FLAGS (next_c);
if (((com2start || comnested)
&& SYNTAX_FLAGS_COMEND_SECOND (syntax)
&& SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
|| ((com2end || comnested)
&& SYNTAX_FLAGS_COMSTART_SECOND (syntax)
&& comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
&& SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
goto lossage;
/* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
}
if (com2start && comstart_pos == 0)
/* We're looking at a comment starter. But it might be a comment
ender as well (see snmp-mode). The first time we see one, we
need to consider it as a comment starter,
and the subsequent times as a comment ender. */
com2end = 0;
/* Turn a 2-char comment sequences into the appropriate syntax. */
if (com2end)
code = Sendcomment;
else if (com2start)
code = Scomment;
/* Ignore comment starters of a different style. */
else if (code == Scomment
&& (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
|| SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
continue;
/* Ignore escaped characters, except comment-enders. */
if (code != Sendcomment && char_quoted (from, from_byte))
continue;
switch (code)
{
case Sstring_fence:
case Scomment_fence:
c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
case Sstring:
/* Track parity of quotes. */
if (string_style == -1)
/* Entering a string. */
string_style = c;
else if (string_style == c)
/* Leaving the string. */
string_style = -1;
else
/* If we have two kinds of string delimiters.
There's no way to grok this scanning backwards. */
string_lossage = 1;
break;
case Scomment:
/* We've already checked that it is the relevant comstyle. */
if (string_style != -1 || comment_lossage || string_lossage)
/* There are odd string quotes involved, so let's be careful.
Test case in Pascal: " { " a { " } */
goto lossage;
if (!comnested)
{
/* Record best comment-starter so far. */
comstart_pos = from;
comstart_byte = from_byte;
}
else if (--nesting <= 0)
/* nested comments have to be balanced, so we don't need to
keep looking for earlier ones. We use here the same (slightly
incorrect) reasoning as below: since it is followed by uniform
paired string quotes, this comment-start has to be outside of
strings, else the comment-end itself would be inside a string. */
goto done;
break;
case Sendcomment:
if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
&& ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
|| SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
/* This is the same style of comment ender as ours. */
{
if (comnested)
nesting++;
else
/* Anything before that can't count because it would match
this comment-ender rather than ours. */
from = stop; /* Break out of the loop. */
}
else if (comstart_pos != 0 || c != '\n')
/* We're mixing comment styles here, so we'd better be careful.
The (comstart_pos != 0 || c != '\n') check is not quite correct
(we should just always set comment_lossage), but removing it
would imply that any multiline comment in C would go through
lossage, which seems overkill.
The failure should only happen in the rare cases such as
{ (* } *) */
comment_lossage = 1;
break;
case Sopen:
/* Assume a defun-start point is outside of strings. */
if (open_paren_in_column_0_is_defun_start
&& (from == stop
|| (temp_byte = dec_bytepos (from_byte),
FETCH_CHAR (temp_byte) == '\n')))
{
defun_start = from;
defun_start_byte = from_byte;
from = stop; /* Break out of the loop. */
}
break;
default:
break;
}
}
if (comstart_pos == 0)
{
from = comment_end;
from_byte = comment_end_byte;
UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
}
/* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
or `done'), then we've found the beginning of the non-nested comment. */
else if (1) /* !comnested */
{
from = comstart_pos;
from_byte = comstart_byte;
UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
}
else
{
struct lisp_parse_state state;
lossage:
/* We had two kinds of string delimiters mixed up
together. Decode this going forwards.
Scan fwd from a known safe place (beginning-of-defun)
to the one in question; this records where we
last passed a comment starter. */
/* If we did not already find the defun start, find it now. */
if (defun_start == 0)
{
defun_start = find_defun_start (comment_end, comment_end_byte);
defun_start_byte = find_start_value_byte;
}
do
{
scan_sexps_forward (&state,
defun_start, defun_start_byte,
comment_end, -10000, 0, Qnil, 0);
defun_start = comment_end;
if (state.incomment == (comnested ? 1 : -1)
&& state.comstyle == comstyle)
from = state.comstr_start;
else
{
from = comment_end;
if (state.incomment)
/* If comment_end is inside some other comment, maybe ours
is nested, so we need to try again from within the
surrounding comment. Example: { a (* " *) */
{
/* FIXME: We should advance by one or two chars. */
defun_start = state.comstr_start + 2;
defun_start_byte = CHAR_TO_BYTE (defun_start);
}
}
} while (defun_start < comment_end);
from_byte = CHAR_TO_BYTE (from);
UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
}
done:
*charpos_ptr = from;
*bytepos_ptr = from_byte;
return (from == comment_end) ? -1 : from;
}
DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
doc: /* Return t if OBJECT is a syntax table.
Currently, any char-table counts as a syntax table. */)
(object)
Lisp_Object object;
{
if (CHAR_TABLE_P (object)
&& EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
return Qt;
return Qnil;
}
static void
check_syntax_table (obj)
Lisp_Object obj;
{
CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
Qsyntax_table_p, obj);
}
DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
doc: /* Return the current syntax table.
This is the one specified by the current buffer. */)
()
{
return current_buffer->syntax_table;
}
DEFUN ("standard-syntax-table", Fstandard_syntax_table,
Sstandard_syntax_table, 0, 0, 0,
doc: /* Return the standard syntax table.
This is the one used for new buffers. */)
()
{
return Vstandard_syntax_table;
}
DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
doc: /* Construct a new syntax table and return it.
It is a copy of the TABLE, which defaults to the standard syntax table. */)
(table)
Lisp_Object table;
{
Lisp_Object copy;
if (!NILP (table))
check_syntax_table (table);
else
table = Vstandard_syntax_table;
copy = Fcopy_sequence (table);
/* Only the standard syntax table should have a default element.
Other syntax tables should inherit from parents instead. */
XCHAR_TABLE (copy)->defalt = Qnil;
/* Copied syntax tables should all have parents.
If we copied one with no parent, such as the standard syntax table,
use the standard syntax table as the copy's parent. */
if (NILP (XCHAR_TABLE (copy)->parent))
Fset_char_table_parent (copy, Vstandard_syntax_table);
return copy;
}
DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
doc: /* Select a new syntax table for the current buffer.
One argument, a syntax table. */)
(table)
Lisp_Object table;
{
int idx;
check_syntax_table (table);
current_buffer->syntax_table = table;
/* Indicate that this buffer now has a specified syntax table. */
idx = PER_BUFFER_VAR_IDX (syntax_table);
SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
return table;
}
/* Convert a letter which signifies a syntax code
into the code it signifies.
This is used by modify-syntax-entry, and other things. */
unsigned char syntax_spec_code[0400] =
{ 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
(char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
(char) Smath, 0377, 0377, (char) Squote,
(char) Sopen, (char) Sclose, 0377, 0377,
0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
0377, 0377, 0377, 0377,
(char) Scomment, 0377, (char) Sendcomment, 0377,
(char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
};
/* Indexed by syntax code, give the letter that describes it. */
char syntax_code_spec[16] =
{
' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
'!', '|'
};
/* Indexed by syntax code, give the object (cons of syntax code and
nil) to be stored in syntax table. Since these objects can be
shared among syntax tables, we generate them in advance. By
sharing objects, the function `describe-syntax' can give a more
compact listing. */
static Lisp_Object Vsyntax_code_object;
DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
doc: /* Return the syntax code of CHARACTER, described by a character.
For example, if CHARACTER is a word constituent, the
character `w' (119) is returned.
The characters that correspond to various syntax codes
are listed in the documentation of `modify-syntax-entry'. */)
(character)
Lisp_Object character;
{
int char_int;
gl_state.current_syntax_table = current_buffer->syntax_table;
gl_state.use_global = 0;
CHECK_NUMBER (character);
char_int = XINT (character);
return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
}
DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
(character)
Lisp_Object character;
{
int char_int, code;
gl_state.current_syntax_table = current_buffer->syntax_table;
gl_state.use_global = 0;
CHECK_NUMBER (character);
char_int = XINT (character);
code = SYNTAX (char_int);
if (code == Sopen || code == Sclose)
return SYNTAX_MATCH (char_int);
return Qnil;
}
DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
doc: /* Convert a syntax specification STRING into syntax cell form.
STRING should be a string as it is allowed as argument of
`modify-syntax-entry'. Value is the equivalent cons cell
\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
text property. */)
(string)
Lisp_Object string;
{
register const unsigned char *p;
register enum syntaxcode code;
int val;
Lisp_Object match;
CHECK_STRING (string);
p = SDATA (string);
code = (enum syntaxcode) syntax_spec_code[*p++];
if (((int) code & 0377) == 0377)
error ("Invalid syntax description letter: %c", p[-1]);
if (code == Sinherit)
return Qnil;
if (*p)
{
int len;
int character = (STRING_CHAR_AND_LENGTH
(p, SBYTES (string) - 1, len));
XSETINT (match, character);
if (XFASTINT (match) == ' ')
match = Qnil;
p += len;
}
else
match = Qnil;
val = (int) code;
while (*p)
switch (*p++)
{
case '1':
val |= 1 << 16;
break;
case '2':
val |= 1 << 17;
break;
case '3':
val |= 1 << 18;
break;
case '4':
val |= 1 << 19;
break;
case 'p':
val |= 1 << 20;
break;
case 'b':
val |= 1 << 21;
break;
case 'n':
val |= 1 << 22;
break;
}
if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
return XVECTOR (Vsyntax_code_object)->contents[val];
else
/* Since we can't use a shared object, let's make a new one. */
return Fcons (make_number (val), match);
}
/* I really don't know why this is interactive
help-form should at least be made useful whilst reading the second arg. */
DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
"cSet syntax for character: \nsSet syntax for %s to: ",
doc: /* Set syntax for character CHAR according to string NEWENTRY.
The syntax is changed only for table SYNTAX-TABLE, which defaults to
the current buffer's syntax table.
CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
in the range MIN to MAX are changed.
The first character of NEWENTRY should be one of the following:
Space or - whitespace syntax. w word constituent.
_ symbol constituent. . punctuation.
( open-parenthesis. ) close-parenthesis.
" string quote. \\ escape.
$ paired delimiter. ' expression quote or prefix operator.
< comment starter. > comment ender.
/ character-quote. @ inherit from `standard-syntax-table'.
| generic string fence. ! generic comment fence.
Only single-character comment start and end sequences are represented thus.
Two-character sequences are represented as described below.
The second character of NEWENTRY is the matching parenthesis,
used only if the first character is `(' or `)'.
Any additional characters are flags.
Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1 means CHAR is the start of a two-char comment start sequence.
2 means CHAR is the second character of such a sequence.
3 means CHAR is the start of a two-char comment end sequence.
4 means CHAR is the second character of such a sequence.
There can be up to two orthogonal comment sequences. This is to support
language modes such as C++. By default, all comment sequences are of style
a, but you can set the comment sequence style to b (on the second character
of a comment-start, or the first character of a comment-end sequence) using
this flag:
b means CHAR is part of comment sequence b.
n means CHAR is part of a nestable comment sequence.