[BACK]Return to b.c CVS log [TXT][DIR] Up to [local] / src / usr.bin / awk

Diff for /src/usr.bin/awk/b.c between version 1.48 and 1.49

version 1.48, 2023/11/22 01:01:21 version 1.49, 2023/11/25 16:31:33
Line 97 
Line 97 
    mechanism of the goto table used 8-bit byte indices into the     mechanism of the goto table used 8-bit byte indices into the
    gototab entries to compute the next state.  Unicode is a lot     gototab entries to compute the next state.  Unicode is a lot
    bigger, so the gototab entries are now structs with a character     bigger, so the gototab entries are now structs with a character
    and a next state, and there is a linear search of the characters     and a next state. These are sorted by code point and binary
    to find the state.  (Yes, this is slower, by a significant     searched.
    amount.  Tough.)  
   
    Throughout the RE mechanism in b.c, utf-8 characters are     Throughout the RE mechanism in b.c, utf-8 characters are
    converted to their utf-32 value.  This mostly shows up in     converted to their utf-32 value.  This mostly shows up in
Line 114 
Line 113 
   
  */   */
   
   static int entry_cmp(const void *l, const void *r);
 static int get_gototab(fa*, int, int);  static int get_gototab(fa*, int, int);
 static int set_gototab(fa*, int, int, int);  static int set_gototab(fa*, int, int, int);
 static void reset_gototab(fa*, int);  static void clear_gototab(fa*, int);
 extern int u8_rune(int *, const uschar *);  extern int u8_rune(int *, const uschar *);
   
 static int *  static int *
Line 151 
Line 151 
 static void  static void
 resize_state(fa *f, int state)  resize_state(fa *f, int state)
 {  {
         gtt **p;          gtt *p;
         uschar *p2;          uschar *p2;
         int **p3;          int **p3;
         int i, new_count;          int i, new_count;
Line 161 
Line 161 
   
         new_count = state + 10; /* needs to be tuned */          new_count = state + 10; /* needs to be tuned */
   
         p = (gtt **) reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));          p = (gtt *) reallocarray(f->gototab, new_count, sizeof(gtt));
         if (p == NULL)          if (p == NULL)
                 goto out;                  goto out;
         f->gototab = p;          f->gototab = p;
Line 177 
Line 177 
         f->posns = p3;          f->posns = p3;
   
         for (i = f->state_count; i < new_count; ++i) {          for (i = f->state_count; i < new_count; ++i) {
                 f->gototab[i] = (gtt *) calloc(NCHARS, sizeof(**f->gototab));                  f->gototab[i].entries = (gtte *) calloc(NCHARS, sizeof(gtte));
                 if (f->gototab[i] == NULL)                  if (f->gototab[i].entries == NULL)
                         goto out;                          goto out;
                 f->out[i]  = 0;                  f->gototab[i].allocated = NCHARS;
                   f->gototab[i].inuse = 0;
                   f->out[i] = 0;
                 f->posns[i] = NULL;                  f->posns[i] = NULL;
         }          }
         f->gototab_len = NCHARS; /* should be variable, growable */  
         f->state_count = new_count;          f->state_count = new_count;
         return;          return;
 out:  out:
Line 277 
Line 278 
         }          }
         if ((f->posns[2])[1] == f->accept)          if ((f->posns[2])[1] == f->accept)
                 f->out[2] = 1;                  f->out[2] = 1;
         reset_gototab(f, 2);          clear_gototab(f, 2);
         f->curstat = cgoto(f, 2, HAT);          f->curstat = cgoto(f, 2, HAT);
         if (anchor) {          if (anchor) {
                 *f->posns[2] = k-1;     /* leave out position 0 */                  *f->posns[2] = k-1;     /* leave out position 0 */
Line 601 
Line 602 
         return(0);          return(0);
 }  }
   
   static void resize_gototab(fa *f, int state)
   {
           size_t new_size = f->gototab[state].allocated * 2;
           gtte *p = (gtte *) realloc(f->gototab[state].entries, new_size * sizeof(gtte));
           if (p == NULL)
                   overflo(__func__);
   
           // need to initialized the new memory to zero
           size_t orig_size = f->gototab[state].allocated;         // 2nd half of new mem is this size
           memset(p + orig_size, 0, orig_size * sizeof(gtte));     // clean it out
   
           f->gototab[state].allocated = new_size;                 // update gotottab info
           f->gototab[state].entries = p;
   }
   
 static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */  static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */
 {  {
         int i;          gtte key;
         for (i = 0; i < f->gototab_len; i++) {          gtte *item;
                 if (f->gototab[state][i].ch == 0)  
                         break;          key.ch = ch;
                 if (f->gototab[state][i].ch == ch)          key.state = 0;  /* irrelevant */
                         return f->gototab[state][i].state;          item = bsearch(& key, f->gototab[state].entries,
         }                          f->gototab[state].inuse, sizeof(gtte),
         return 0;                          entry_cmp);
   
           if (item == NULL)
                   return 0;
           else
                   return item->state;
 }  }
   
 static void reset_gototab(fa *f, int state) /* hide gototab inplementation */  static int entry_cmp(const void *l, const void *r)
 {  {
         memset(f->gototab[state], 0, f->gototab_len * sizeof(**f->gototab));          const gtte *left, *right;
   
           left = (const gtte *) l;
           right = (const gtte *) r;
   
           return left->ch - right->ch;
 }  }
   
 static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */  static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */
 {  {
         int i;          if (f->gototab[state].inuse == 0) {
         for (i = 0; i < f->gototab_len; i++) {                  f->gototab[state].entries[0].ch = ch;
                 if (f->gototab[state][i].ch == 0 || f->gototab[state][i].ch == ch) {                  f->gototab[state].entries[0].state = val;
                         f->gototab[state][i].ch = ch;                  f->gototab[state].inuse++;
                         f->gototab[state][i].state = val;                  return val;
                         return val;          } else if (ch > f->gototab[state].entries[f->gototab[state].inuse-1].ch) {
                   // not seen yet, insert and return
                   gtt *tab = & f->gototab[state];
                   if (tab->inuse + 1 >= tab->allocated)
                           resize_gototab(f, state);
   
                   f->gototab[state].entries[f->gototab[state].inuse-1].ch = ch;
                   f->gototab[state].entries[f->gototab[state].inuse-1].state = val;
                   f->gototab[state].inuse++;
                   return val;
           } else {
                   // maybe we have it, maybe we don't
                   gtte key;
                   gtte *item;
   
                   key.ch = ch;
                   key.state = 0;  /* irrelevant */
                   item = bsearch(& key, f->gototab[state].entries,
                                   f->gototab[state].inuse, sizeof(gtte),
                                   entry_cmp);
   
                   if (item != NULL) {
                           // we have it, update state and return
                           item->state = val;
                           return item->state;
                 }                  }
                   // otherwise, fall through to insert and reallocate.
         }          }
         overflo(__func__);  
           gtt *tab = & f->gototab[state];
           if (tab->inuse + 1 >= tab->allocated)
                   resize_gototab(f, state);
           ++tab->inuse;
           f->gototab[state].entries[tab->inuse].ch = ch;
           f->gototab[state].entries[tab->inuse].state = val;
   
           qsort(f->gototab[state].entries,
                   f->gototab[state].inuse, sizeof(gtte), entry_cmp);
   
         return val; /* not used anywhere at the moment */          return val; /* not used anywhere at the moment */
 }  }
   
   static void clear_gototab(fa *f, int state)
   {
           memset(f->gototab[state].entries, 0,
                   f->gototab[state].allocated * sizeof(gtte));
           f->gototab[state].inuse = 0;
   }
   
 int match(fa *f, const char *p0)        /* shortest match ? */  int match(fa *f, const char *p0)        /* shortest match ? */
 {  {
         int s, ns;          int s, ns;
Line 1460 
Line 1528 
         /* add tmpset to current set of states */          /* add tmpset to current set of states */
         ++(f->curstat);          ++(f->curstat);
         resize_state(f, f->curstat);          resize_state(f, f->curstat);
           clear_gototab(f, f->curstat);
         xfree(f->posns[f->curstat]);          xfree(f->posns[f->curstat]);
         p = intalloc(setcnt + 1, __func__);          p = intalloc(setcnt + 1, __func__);
   
Line 1483 
Line 1552 
         if (f == NULL)          if (f == NULL)
                 return;                  return;
         for (i = 0; i < f->state_count; i++)          for (i = 0; i < f->state_count; i++)
                 xfree(f->gototab[i])                  xfree(f->gototab[i].entries);
           xfree(f->gototab);
         for (i = 0; i <= f->curstat; i++)          for (i = 0; i <= f->curstat; i++)
                 xfree(f->posns[i]);                  xfree(f->posns[i]);
         for (i = 0; i <= f->accept; i++) {          for (i = 0; i <= f->accept; i++) {

Legend:
Removed from v.1.48  
changed lines
  Added in v.1.49