Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tag: oniguruma_2_2_4
Fetching contributors…

Cannot retrieve contributors at this time

file 277 lines (232 sloc) 8.738 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
/**********************************************************************

regparse.h - Oniguruma (regular expression library)

Copyright (C) 2003-2004 K.Kosako (kosako@sofnec.co.jp)

**********************************************************************/
#ifndef REGPARSE_H
#define REGPARSE_H

#include "regint.h"

/* node type */
#define N_STRING (1<< 0)
#define N_CCLASS (1<< 1)
#define N_CTYPE (1<< 2)
#define N_ANYCHAR (1<< 3)
#define N_BACKREF (1<< 4)
#define N_QUALIFIER (1<< 5)
#define N_EFFECT (1<< 6)
#define N_ANCHOR (1<< 7)
#define N_LIST (1<< 8)
#define N_ALT (1<< 9)
#define N_CALL (1<<10)

#define IS_NODE_TYPE_SIMPLE(type) \
(((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)

#define NTYPE(node) ((node)->type)
#define NCONS(node) ((node)->u.cons)
#define NSTRING(node) ((node)->u.str)
#define NCCLASS(node) ((node)->u.cclass)
#define NCTYPE(node) ((node)->u.ctype)
#define NQUALIFIER(node) ((node)->u.qualifier)
#define NANCHOR(node) ((node)->u.anchor)
#define NBACKREF(node) ((node)->u.backref)
#define NEFFECT(node) ((node)->u.effect)
#define NCALL(node) ((node)->u.call)

#define CTYPE_WORD (1<<0)
#define CTYPE_NOT_WORD (1<<1)
#define CTYPE_WHITE_SPACE (1<<2)
#define CTYPE_NOT_WHITE_SPACE (1<<3)
#define CTYPE_DIGIT (1<<4)
#define CTYPE_NOT_DIGIT (1<<5)


#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)

#define EFFECT_MEMORY (1<<0)
#define EFFECT_OPTION (1<<1)
#define EFFECT_STOP_BACKTRACK (1<<2)

#define REPEAT_INFINITE -1
#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)

#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 7

#define NSTR_RAW (1<<0) /* by backslashed number */
#define NSTR_CASE_AMBIG (1<<1)

#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
#define NSTRING_SET_CASE_AMBIG(node) (node)->u.str.flag |= NSTR_CASE_AMBIG
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
#define NSTRING_IS_CASE_AMBIG(node) \
(((node)->u.str.flag & NSTR_CASE_AMBIG) != 0)

#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);

#define CCLASS_SET_NOT(cc) (cc)->not = 1

#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
#define NQ_TARGET_IS_EMPTY_REC 3


typedef struct {
  UChar* s;
  UChar* end;
  unsigned int flag;
  int capa; /* (allocated size - 1) or 0: use buf[] */
  UChar buf[NODE_STR_BUF_SIZE];
} StrNode;

typedef struct {
  int not;
  BitSet bs;
  BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;

typedef struct {
  struct _Node* target;
  int lower;
  int upper;
  int greedy;
  int by_number; /* {n,m} */
  int target_empty_info;
  struct _Node* head_exact;
  struct _Node* next_head_exact;
  int is_refered; /* include called node. don't eliminate even if {0} */
} QualifierNode;

/* status bits */
#define NST_MIN_FIXED (1<<0)
#define NST_MAX_FIXED (1<<1)
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
#define NST_MEM_BACKREFED (1<<5)
#define NST_SIMPLE_REPEAT (1<<6) /* for stop backtrack optimization */

#define NST_RECURSION (1<<7)
#define NST_CALLED (1<<8)
#define NST_ADDR_FIXED (1<<9)
#define NST_NAMED_GROUP (1<<10)
#define NST_NAME_REF (1<<11)

#define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
#define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)

#define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
#define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
#define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
#define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
#define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
#define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
#define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
#define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
#define IS_EFFECT_SIMPLE_REPEAT(en) (((en)->state & NST_SIMPLE_REPEAT) != 0)
#define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)

#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)

typedef struct {
  int state;
  int type;
  int regnum;
  OnigOptionType option;
  struct _Node* target;
  AbsAddrType call_addr;
  /* for multiple call reference */
  OnigDistance min_len; /* min length (byte) */
  OnigDistance max_len; /* max length (byte) */
  int char_len; /* character length */
  int opt_count; /* referenced count in optimize_node_left() */
} EffectNode;

#define CALLNODE_REFNUM_UNDEF -1

#ifdef USE_SUBEXP_CALL

typedef struct {
  int offset;
  struct _Node* target;
} UnsetAddr;

typedef struct {
  int num;
  int alloc;
  UnsetAddr* us;
} UnsetAddrList;

typedef struct {
  int state;
  int ref_num;
  UChar* name;
  UChar* name_end;
  struct _Node* target; /* EffectNode : EFFECT_MEMORY */
  UnsetAddrList* unset_addr_list;
} CallNode;

#endif

typedef struct {
  int state;
  int back_num;
  int back_static[NODE_BACKREFS_SIZE];
  int* back_dynamic;
} BackrefNode;

typedef struct {
  int type;
  struct _Node* target;
  int char_len;
} AnchorNode;

typedef struct _Node {
  int type;
  union {
    StrNode str;
    CClassNode cclass;
    QualifierNode qualifier;
    EffectNode effect;
#ifdef USE_SUBEXP_CALL
    CallNode call;
#endif
    BackrefNode backref;
    AnchorNode anchor;
    struct {
      struct _Node* left;
      struct _Node* right;
    } cons;
    struct {
      int type;
    } ctype;
  } u;
} Node;

#define NULL_NODE ((Node* )0)

#define SCANENV_MEMNODES_SIZE 8
#define SCANENV_MEM_NODES(senv) \
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)

typedef struct {
  OnigOptionType option;
  OnigEncoding enc;
  OnigSyntaxType* syntax;
  BitStatusType capture_history;
  BitStatusType bt_mem_start;
  BitStatusType bt_mem_end;
  BitStatusType backrefed_mem;
  UChar* pattern;
  UChar* pattern_end;
  UChar* error;
  UChar* error_end;
  regex_t* reg; /* for reg->names only */
  int num_call;
#ifdef USE_SUBEXP_CALL
  UnsetAddrList* unset_addr_list;
#endif
  int num_mem;
#ifdef USE_NAMED_GROUP
  int num_named;
#endif
  int mem_alloc;
  Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
  Node** mem_nodes_dynamic;
} ScanEnv;


#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)

extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
extern int onig_scan_unsigned_number P_((UChar** src, UChar* end, OnigEncoding enc));
extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, UChar* s, UChar* end));
extern void onig_node_free P_((Node* node));
extern Node* onig_node_new_effect P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern int onig_free_node_list();
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, UChar* pattern, UChar* end, regex_t* reg, ScanEnv* env));

#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);
#endif
#endif

#endif /* REGPARSE_H */
Something went wrong with that request. Please try again.