00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101 #include <stdio.h>
00102 #include <stdlib.h>
00103 #include <string.h>
00104 #include <ctype.h>
00105
00106 #include "acsmx2.h"
00107
00108
00109
00110
00111 #define MEMASSERT(p,s) if(!p){printf("ACSM-No Memory: %s!\n",s);exit(0);}
00112
00113
00114
00115
00116 static int max_memory = 0;
00117
00118
00119
00120
00121 static int s_verbose=0;
00122
00123
00124
00125
00126 typedef struct acsm_summary_s
00127 {
00128 unsigned num_states;
00129 unsigned num_transitions;
00130 ACSM_STRUCT2 acsm;
00131
00132 }acsm_summary_t;
00133
00134
00135
00136
00137 static acsm_summary_t summary={0,0};
00138
00139
00140
00141
00142 static unsigned char xlatcase[256];
00143
00144
00145
00146 static
00147 void
00148 init_xlatcase()
00149 {
00150 int i;
00151 for (i = 0; i < 256; i++)
00152 {
00153 xlatcase[i] = toupper(i);
00154 }
00155 }
00156
00157
00158
00159 static
00160 inline
00161 void
00162 ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
00163 {
00164 int i;
00165 #ifdef XXXX
00166 int n;
00167 n = m & 3;
00168 m >>= 2;
00169
00170 for (i = 0; i < m; i++ )
00171 {
00172 d[0] = xlatcase[ s[0] ];
00173 d[2] = xlatcase[ s[2] ];
00174 d[1] = xlatcase[ s[1] ];
00175 d[3] = xlatcase[ s[3] ];
00176 d+=4;
00177 s+=4;
00178 }
00179
00180 for (i=0; i < n; i++)
00181 {
00182 d[i] = xlatcase[ s[i] ];
00183 }
00184 #else
00185 for (i=0; i < m; i++)
00186 {
00187 d[i] = xlatcase[ s[i] ];
00188 }
00189
00190 #endif
00191 }
00192
00193
00194
00195
00196
00197 void acsmSetVerbose2(int n)
00198 {
00199 s_verbose = 1;
00200 }
00201
00202
00203
00204
00205 static void *
00206 AC_MALLOC (int n)
00207 {
00208 void *p;
00209 p = malloc (n);
00210 if (p)
00211 max_memory += n;
00212 return p;
00213 }
00214
00215
00216
00217
00218
00219 static void
00220 AC_FREE (void *p)
00221 {
00222 if (p)
00223 free (p);
00224 }
00225
00226
00227
00228
00229
00230 typedef struct _qnode
00231 {
00232 int state;
00233 struct _qnode *next;
00234 }
00235 QNODE;
00236
00237
00238
00239
00240 typedef struct _queue
00241 {
00242 QNODE * head, *tail;
00243 int count;
00244 }
00245 QUEUE;
00246
00247
00248
00249
00250 static void
00251 queue_init (QUEUE * s)
00252 {
00253 s->head = s->tail = 0;
00254 s->count= 0;
00255 }
00256
00257
00258
00259
00260 static int
00261 queue_find (QUEUE * s, int state)
00262 {
00263 QNODE * q;
00264 q = s->head;
00265 while( q )
00266 {
00267 if( q->state == state ) return 1;
00268 q = q->next;
00269 }
00270 return 0;
00271 }
00272
00273
00274
00275
00276 static void
00277 queue_add (QUEUE * s, int state)
00278 {
00279 QNODE * q;
00280
00281 if( queue_find( s, state ) ) return;
00282
00283 if (!s->head)
00284 {
00285 q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
00286 MEMASSERT (q, "queue_add");
00287 q->state = state;
00288 q->next = 0;
00289 }
00290 else
00291 {
00292 q = (QNODE *) AC_MALLOC (sizeof (QNODE));
00293 q->state = state;
00294 q->next = 0;
00295 s->tail->next = q;
00296 s->tail = q;
00297 }
00298 s->count++;
00299 }
00300
00301
00302
00303
00304
00305 static int
00306 queue_remove (QUEUE * s)
00307 {
00308 int state = 0;
00309 QNODE * q;
00310 if (s->head)
00311 {
00312 q = s->head;
00313 state = q->state;
00314 s->head = s->head->next;
00315 s->count--;
00316
00317 if( !s->head )
00318 {
00319 s->tail = 0;
00320 s->count = 0;
00321 }
00322 AC_FREE (q);
00323 }
00324 return state;
00325 }
00326
00327
00328
00329
00330
00331 static int
00332 queue_count (QUEUE * s)
00333 {
00334 return s->count;
00335 }
00336
00337
00338
00339
00340
00341 static void
00342 queue_free (QUEUE * s)
00343 {
00344 while (queue_count (s))
00345 {
00346 queue_remove (s);
00347 }
00348 }
00349
00350
00351
00352
00353 static
00354 int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
00355 {
00356 trans_node_t * t = acsm->acsmTransTable[state];
00357
00358 while( t )
00359 {
00360 if( t->key == input )
00361 {
00362 return t->next_state;
00363 }
00364 t=t->next;
00365 }
00366
00367 if( state == 0 ) return 0;
00368
00369 return ACSM_FAIL_STATE2;
00370 }
00371
00372
00373
00374
00375 static
00376 int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
00377 {
00378 trans_node_t * t = acsm->acsmTransTable[state];
00379
00380 while( t )
00381 {
00382 if( t->key == input )
00383 {
00384 return t->next_state;
00385 }
00386 t = t->next;
00387 }
00388
00389 return 0;
00390 }
00391
00392
00393
00394 static
00395 int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
00396 {
00397 trans_node_t * p;
00398 trans_node_t * tnew;
00399
00400
00401
00402
00403
00404 p = acsm->acsmTransTable[state];
00405 while( p )
00406 {
00407 if( p->key == input )
00408 {
00409 p->next_state = next_state;
00410 return 0;
00411 }
00412 p=p->next;
00413 }
00414
00415
00416 tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
00417 if( !tnew ) return -1;
00418
00419 tnew->key = input;
00420 tnew->next_state = next_state;
00421 tnew->next = 0;
00422
00423 tnew->next = acsm->acsmTransTable[state];
00424 acsm->acsmTransTable[state] = tnew;
00425
00426 acsm->acsmNumTrans++;
00427
00428 return 0;
00429 }
00430
00431
00432
00433 static
00434 int List_FreeTransTable( ACSM_STRUCT2 * acsm )
00435 {
00436 int i;
00437 trans_node_t * t, *p;
00438
00439 if( !acsm->acsmTransTable ) return 0;
00440
00441 for(i=0;i< acsm->acsmMaxStates;i++)
00442 {
00443 t = acsm->acsmTransTable[i];
00444
00445 while( t )
00446 {
00447 p = t->next;
00448 free(t);
00449 t = p;
00450 max_memory -= sizeof(trans_node_t);
00451 }
00452 }
00453
00454 free(acsm->acsmTransTable);
00455
00456 max_memory -= sizeof(void*) * acsm->acsmMaxStates;
00457
00458 acsm->acsmTransTable = 0;
00459
00460 return 0;
00461 }
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490 static
00491 int List_PrintTransTable( ACSM_STRUCT2 * acsm )
00492 {
00493 int i;
00494 trans_node_t * t;
00495 ACSM_PATTERN2 * patrn;
00496
00497 if( !acsm->acsmTransTable ) return 0;
00498
00499 printf("Print Transition Table- %d active states\n",acsm->acsmNumStates);
00500
00501 for(i=0;i< acsm->acsmNumStates;i++)
00502 {
00503 t = acsm->acsmTransTable[i];
00504
00505 printf("state %3d: ",i);
00506
00507 while( t )
00508 {
00509 if( isprint(t->key) )
00510 printf("%3c->%-5d\t",t->key,t->next_state);
00511 else
00512 printf("%3d->%-5d\t",t->key,t->next_state);
00513
00514 t = t->next;
00515 }
00516
00517 patrn =acsm->acsmMatchList[i];
00518
00519 while( patrn )
00520 {
00521 printf("%.*s ",patrn->n,patrn->patrn);
00522
00523 patrn = patrn->next;
00524 }
00525
00526 printf("\n");
00527 }
00528 return 0;
00529 }
00530
00531
00532
00533
00534
00535 static
00536 int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
00537 {
00538 int tcnt = 0;
00539 trans_node_t * t = acsm->acsmTransTable[ state ];
00540
00541 memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
00542
00543 if( !t ) return 0;
00544
00545 while(t)
00546 {
00547 full[ t->key ] = t->next_state;
00548 tcnt++;
00549 t = t->next;
00550 }
00551 return tcnt;
00552 }
00553
00554
00555
00556
00557 static ACSM_PATTERN2*
00558 CopyMatchListEntry (ACSM_PATTERN2 * px)
00559 {
00560 ACSM_PATTERN2 * p;
00561
00562 p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
00563 MEMASSERT (p, "CopyMatchListEntry");
00564
00565 memcpy (p, px, sizeof (ACSM_PATTERN2));
00566
00567 p->next = 0;
00568
00569 return p;
00570 }
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599 static void
00600 AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
00601 {
00602 ACSM_PATTERN2 * p;
00603
00604 p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
00605
00606 MEMASSERT (p, "AddMatchListEntry");
00607
00608 memcpy (p, px, sizeof (ACSM_PATTERN2));
00609
00610 p->next = acsm->acsmMatchList[state];
00611
00612 acsm->acsmMatchList[state] = p;
00613 }
00614
00615
00616 static void
00617 AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
00618 {
00619 int state, next, n;
00620 unsigned char *pattern;
00621
00622 n = p->n;
00623 pattern = p->patrn;
00624 state = 0;
00625
00626 if(s_verbose)printf(" Begin AddPatternStates: acsmNumStates=%d\n",acsm->acsmNumStates);
00627 if(s_verbose)printf(" adding '%.*s', nocase=%d\n", n,p->patrn, p->nocase );
00628
00629
00630
00631
00632 for (; n > 0; pattern++, n--)
00633 {
00634 if(s_verbose)printf(" find char='%c'\n", *pattern );
00635
00636 next = List_GetNextState(acsm,state,*pattern);
00637 if (next == ACSM_FAIL_STATE2 || next == 0)
00638 {
00639 break;
00640 }
00641 state = next;
00642 }
00643
00644
00645
00646
00647 for (; n > 0; pattern++, n--)
00648 {
00649 if(s_verbose)printf(" add char='%c' state=%d NumStates=%d\n", *pattern, state, acsm->acsmNumStates );
00650
00651 acsm->acsmNumStates++;
00652 List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
00653 state = acsm->acsmNumStates;
00654 }
00655
00656 AddMatchListEntry (acsm, state, p );
00657
00658 if(s_verbose)printf(" End AddPatternStates: acsmNumStates=%d\n",acsm->acsmNumStates);
00659 }
00660
00661
00662
00663
00664
00665 static void
00666 Build_NFA (ACSM_STRUCT2 * acsm)
00667 {
00668 int r, s, i;
00669 QUEUE q, *queue = &q;
00670 acstate_t * FailState = acsm->acsmFailState;
00671 ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
00672 ACSM_PATTERN2 * mlist,* px;
00673
00674
00675 queue_init (queue);
00676
00677
00678
00679 for (i = 0; i < acsm->acsmAlphabetSize; i++)
00680 {
00681 s = List_GetNextState2(acsm,0,i);
00682 if( s )
00683 {
00684 queue_add (queue, s);
00685 FailState[s] = 0;
00686 }
00687 }
00688
00689
00690 while (queue_count (queue) > 0)
00691 {
00692 r = queue_remove (queue);
00693
00694
00695 for (i = 0; i < acsm->acsmAlphabetSize; i++)
00696 {
00697 int fs, next;
00698
00699 s = List_GetNextState(acsm,r,i);
00700
00701 if( s != ACSM_FAIL_STATE2 )
00702 {
00703 queue_add (queue, s);
00704
00705 fs = FailState[r];
00706
00707
00708
00709
00710 while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
00711 {
00712 fs = FailState[fs];
00713 }
00714
00715
00716
00717
00718 FailState[s] = next;
00719
00720
00721
00722
00723
00724
00725 for( mlist = MatchList[next];
00726 mlist;
00727 mlist = mlist->next)
00728 {
00729 px = CopyMatchListEntry (mlist);
00730
00731
00732 px->next = MatchList[s];
00733 MatchList[s] = px;
00734 }
00735 }
00736 }
00737 }
00738
00739
00740 queue_free (queue);
00741
00742 if( s_verbose)printf("End Build_NFA: NumStates=%d\n",acsm->acsmNumStates);
00743 }
00744
00745
00746
00747
00748 static void
00749 Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
00750 {
00751 int i, r, s, cFailState;
00752 QUEUE q, *queue = &q;
00753 acstate_t * FailState = acsm->acsmFailState;
00754
00755
00756 queue_init (queue);
00757
00758
00759 for(i=0; i<acsm->acsmAlphabetSize; i++)
00760 {
00761 s = List_GetNextState(acsm,0,i);
00762 if ( s != 0 )
00763 {
00764 queue_add (queue, s);
00765 }
00766 }
00767
00768
00769 while( queue_count(queue) > 0 )
00770 {
00771 r = queue_remove(queue);
00772
00773
00774 for (i = 0; i < acsm->acsmAlphabetSize; i++)
00775 {
00776 s = List_GetNextState(acsm,r,i);
00777
00778 if( s != ACSM_FAIL_STATE2 && s!= 0)
00779 {
00780 queue_add (queue, s);
00781 }
00782 else
00783 {
00784 cFailState = List_GetNextState(acsm,FailState[r],i);
00785
00786 if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
00787 {
00788 List_PutNextState(acsm,r,i,cFailState);
00789 }
00790 }
00791 }
00792 }
00793
00794
00795 queue_free (queue);
00796
00797 if(s_verbose)printf("End Convert_NFA_To_DFA: NumStates=%d\n",acsm->acsmNumStates);
00798
00799 }
00800
00801
00802
00803
00804
00805
00806 static int
00807 Conv_List_To_Full(ACSM_STRUCT2 * acsm)
00808 {
00809 int tcnt, k;
00810 acstate_t * p;
00811 acstate_t ** NextState = acsm->acsmNextState;
00812
00813 for(k=0;k<acsm->acsmMaxStates;k++)
00814 {
00815 p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
00816 if(!p) return -1;
00817
00818 tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
00819
00820 p[0] = ACF_FULL;
00821 p[1] = 0;
00822
00823 NextState[k] = p;
00824 }
00825
00826 return 0;
00827 }
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857 static int
00858 Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
00859 {
00860 int cnt, m, k, i;
00861 acstate_t * p, state, maxstates=0;
00862 acstate_t ** NextState = acsm->acsmNextState;
00863 acstate_t full[MAX_ALPHABET_SIZE];
00864
00865 for(k=0;k<acsm->acsmMaxStates;k++)
00866 {
00867 cnt=0;
00868
00869 List_ConvToFull(acsm, (acstate_t)k, full );
00870
00871 for (i = 0; i < acsm->acsmAlphabetSize; i++)
00872 {
00873 state = full[i];
00874 if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
00875 }
00876
00877 if( cnt > 0 ) maxstates++;
00878
00879 if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
00880 {
00881 p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
00882 if(!p) return -1;
00883
00884 p[0] = ACF_FULL;
00885 p[1] = 0;
00886 memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
00887 }
00888 else
00889 {
00890 p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
00891 if(!p) return -1;
00892
00893 m = 0;
00894 p[m++] = ACF_SPARSE;
00895 p[m++] = 0;
00896 p[m++] = cnt;
00897
00898 for(i = 0; i < acsm->acsmAlphabetSize ; i++)
00899 {
00900 state = full[i];
00901 if( state != 0 && state != ACSM_FAIL_STATE2 )
00902 {
00903 p[m++] = i;
00904 p[m++] = state;
00905 }
00906 }
00907 }
00908
00909 NextState[k] = p;
00910 }
00911
00912 return 0;
00913 }
00914
00915
00916
00917
00918
00919
00920
00921
00922
00923
00924 static int
00925 Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
00926 {
00927 int first = -1, last;
00928 acstate_t * p, state, full[MAX_ALPHABET_SIZE];
00929 acstate_t ** NextState = acsm->acsmNextState;
00930 int cnt,m,k,i;
00931
00932 for(k=0;k<acsm->acsmMaxStates;k++)
00933 {
00934 cnt=0;
00935
00936 List_ConvToFull(acsm, (acstate_t)k, full );
00937
00938 first=-1;
00939 last =-2;
00940
00941 for (i = 0; i < acsm->acsmAlphabetSize; i++)
00942 {
00943 state = full[i];
00944
00945 if( state !=0 && state != ACSM_FAIL_STATE2 )
00946 {
00947 if( first < 0 ) first = i;
00948 last = i;
00949 }
00950 }
00951
00952
00953 cnt= last - first + 1;
00954
00955 p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
00956
00957 if(!p) return -1;
00958
00959 m = 0;
00960 p[m++] = ACF_BANDED;
00961 p[m++] = 0;
00962 p[m++] = cnt;
00963 p[m++] = first;
00964
00965 for(i = first; i <= last; i++)
00966 {
00967 p[m++] = full[i];
00968 }
00969
00970 NextState[k] = p;
00971 }
00972
00973 return 0;
00974 }
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990 static
00991 int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
00992 {
00993 int i, nbands,zcnt,last=0;
00994 acstate_t state;
00995
00996 nbands=0;
00997 for( i=0; i<asize; i++ )
00998 {
00999 state = next[i];
01000
01001 if( state !=0 && state != ACSM_FAIL_STATE2 )
01002 {
01003 begin[nbands] = i;
01004 zcnt=0;
01005
01006 for( ; i< asize; i++ )
01007 {
01008 state = next[i];
01009 if( state ==0 || state == ACSM_FAIL_STATE2 )
01010 {
01011 zcnt++;
01012 if( zcnt > zmax ) break;
01013 }
01014 else
01015 {
01016 zcnt=0;
01017 last = i;
01018 }
01019 }
01020
01021 end[nbands++] = last;
01022
01023 }
01024 }
01025
01026 return nbands;
01027 }
01028
01029
01030
01031
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041
01042
01043
01044
01045
01046 static int
01047 Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
01048 {
01049 acstate_t * p;
01050 acstate_t ** NextState = acsm->acsmNextState;
01051 int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
01052
01053 int band_begin[MAX_ALPHABET_SIZE];
01054 int band_end[MAX_ALPHABET_SIZE];
01055 int nbands,j;
01056 acstate_t full[MAX_ALPHABET_SIZE];
01057
01058 for(k=0;k<acsm->acsmMaxStates;k++)
01059 {
01060 cnt=0;
01061
01062 List_ConvToFull(acsm, (acstate_t)k, full );
01063
01064 nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
01065
01066
01067 cnt = 3;
01068 for(i=0;i<nbands;i++)
01069 {
01070 cnt += 2;
01071 cnt += band_end[i] - band_begin[i] + 1;
01072
01073
01074 }
01075
01076 p = AC_MALLOC(sizeof(acstate_t)*(cnt));
01077
01078 if(!p) return -1;
01079
01080 m = 0;
01081 p[m++] = ACF_SPARSEBANDS;
01082 p[m++] = 0;
01083 p[m++] = nbands;
01084
01085 for( i=0;i<nbands;i++ )
01086 {
01087 p[m++] = band_end[i] - band_begin[i] + 1;
01088 p[m++] = band_begin[i];
01089
01090 for( j=band_begin[i]; j<=band_end[i]; j++ )
01091 {
01092 p[m++] = full[j];
01093 }
01094 }
01095
01096 NextState[k] = p;
01097 }
01098
01099 return 0;
01100 }
01101
01102 void
01103 Print_DFA_MatchList( ACSM_STRUCT2 * acsm, int state )
01104 {
01105 ACSM_PATTERN2 * mlist;
01106
01107 for (mlist = acsm->acsmMatchList[state];
01108 mlist;
01109 mlist = mlist->next)
01110 {
01111 printf("%.*s ", mlist->n, mlist->patrn);
01112 }
01113 }
01114
01115
01116
01117 static void
01118 Print_DFA(ACSM_STRUCT2 * acsm)
01119 {
01120 int k,i;
01121 acstate_t * p, state, n, fmt, index, nb, bmatch;
01122 acstate_t ** NextState = acsm->acsmNextState;
01123
01124 printf("Print DFA - %d active states\n",acsm->acsmNumStates);
01125
01126 for(k=0;k<acsm->acsmNumStates;k++)
01127 {
01128 p = NextState[k];
01129
01130 if( !p ) continue;
01131
01132 fmt = *p++;
01133
01134 bmatch = *p++;
01135
01136 printf("state %3d, fmt=%d: ",k,fmt);
01137
01138 if( fmt ==ACF_SPARSE )
01139 {
01140 n = *p++;
01141 for( ; n>0; n--, p+=2 )
01142 {
01143 if( isprint(p[0]) )
01144 printf("%3c->%-5d\t",p[0],p[1]);
01145 else
01146 printf("%3d->%-5d\t",p[0],p[1]);
01147 }
01148 }
01149 else if( fmt ==ACF_BANDED )
01150 {
01151
01152 n = *p++;
01153 index = *p++;
01154
01155 for( ; n>0; n--, p++ )
01156 {
01157 if( isprint(p[0]) )
01158 printf("%3c->%-5d\t",index++,p[0]);
01159 else
01160 printf("%3d->%-5d\t",index++,p[0]);
01161 }
01162 }
01163 else if( fmt ==ACF_SPARSEBANDS )
01164 {
01165 nb = *p++;
01166 for(i=0;i<nb;i++)
01167 {
01168 n = *p++;
01169 index = *p++;
01170 for( ; n>0; n--, p++ )
01171 {
01172 if( isprint(index) )
01173 printf("%3c->%-5d\t",index++,p[0]);
01174 else
01175 printf("%3d->%-5d\t",index++,p[0]);
01176 }
01177 }
01178 }
01179 else if( fmt == ACF_FULL )
01180 {
01181
01182 for( i=0; i<acsm->acsmAlphabetSize; i++ )
01183 {
01184 state = p[i];
01185
01186 if( state != 0 && state != ACSM_FAIL_STATE2 )
01187 {
01188 if( isprint(i) )
01189 printf("%3c->%-5d\t",i,state);
01190 else
01191 printf("%3d->%-5d\t",i,state);
01192 }
01193 }
01194 }
01195
01196 Print_DFA_MatchList( acsm, k);
01197
01198 printf("\n");
01199 }
01200 }
01201
01202
01203
01204
01205
01206
01207
01208
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232
01233
01234
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262
01263
01264
01265
01266
01267
01268
01269
01270
01271
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302
01303
01304
01305
01306
01307
01308
01309
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351 int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
01352 {
01353 switch( m )
01354 {
01355 case ACF_FULL:
01356 case ACF_SPARSE:
01357 case ACF_BANDED:
01358 case ACF_SPARSEBANDS:
01359 acsm->acsmFormat = m;
01360 break;
01361 default:
01362 return -1;
01363 }
01364
01365 return 0;
01366 }
01367
01368
01369
01370 void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
01371 {
01372 acsm->acsmSparseMaxZcnt = n;
01373 }
01374
01375
01376
01377 void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
01378 {
01379 acsm->acsmSparseMaxRowNodes = n;
01380 }
01381
01382
01383
01384 int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
01385 {
01386 switch( m )
01387 {
01388 case FSA_TRIE:
01389 case FSA_NFA:
01390 case FSA_DFA:
01391 acsm->acsmFSA = m;
01392 default:
01393 return -1;
01394 }
01395 }
01396
01397
01398
01399 int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
01400 {
01401 if( n <= MAX_ALPHABET_SIZE )
01402 {
01403 acsm->acsmAlphabetSize = n;
01404 }
01405 else
01406 {
01407 return -1;
01408 }
01409 return 0;
01410 }
01411
01412
01413
01414 ACSM_STRUCT2 * acsmNew2 ()
01415 {
01416 ACSM_STRUCT2 * p;
01417
01418 init_xlatcase ();
01419
01420 p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
01421 MEMASSERT (p, "acsmNew");
01422
01423 if (p)
01424 {
01425 memset (p, 0, sizeof (ACSM_STRUCT2));
01426
01427
01428 p->acsmFSA = FSA_DFA;
01429 p->acsmFormat = ACF_FULL;
01430 p->acsmAlphabetSize = 256;
01431 p->acsmSparseMaxRowNodes = 256;
01432 p->acsmSparseMaxZcnt = 10;
01433 }
01434
01435 return p;
01436 }
01437
01438
01439
01440
01441 int
01442 acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
01443 int offset, int depth, void * id, int iid)
01444 {
01445 ACSM_PATTERN2 * plist;
01446
01447 plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
01448 MEMASSERT (plist, "acsmAddPattern");
01449
01450 plist->patrn = (unsigned char *) AC_MALLOC ( n );
01451 MEMASSERT (plist->patrn, "acsmAddPattern");
01452
01453 ConvertCaseEx(plist->patrn, pat, n);
01454
01455 plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
01456 MEMASSERT (plist->casepatrn, "acsmAddPattern");
01457
01458 memcpy (plist->casepatrn, pat, n);
01459
01460 plist->n = n;
01461 plist->nocase = nocase;
01462 plist->offset = offset;
01463 plist->depth = depth;
01464 plist->id = id;
01465 plist->iid = iid;
01466
01467 plist->next = p->acsmPatterns;
01468 p->acsmPatterns = plist;
01469
01470 return 0;
01471 }
01472
01473
01474
01475 int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
01476 {
01477 ACSM_PATTERN2 * plist;
01478
01479 plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
01480 MEMASSERT (plist, "acsmAddPattern");
01481
01482 plist->patrn = (unsigned char *) AC_MALLOC (klen);
01483 memcpy (plist->patrn, key, klen);
01484
01485 plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
01486 memcpy (plist->casepatrn, key, klen);
01487
01488 plist->n = klen;
01489 plist->nocase = nocase;
01490 plist->offset = 0;
01491 plist->depth = 0;
01492 plist->id = 0;
01493 plist->iid = 0;
01494
01495 plist->next = p->acsmPatterns;
01496 p->acsmPatterns = plist;
01497
01498 return 0;
01499 }
01500
01501
01502
01503
01504 static
01505 void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
01506 {
01507 acstate_t state;
01508 acstate_t ** NextState = acsm->acsmNextState;
01509 ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
01510
01511 for( state=0; state<acsm->acsmNumStates; state++ )
01512 {
01513 if( MatchList[state] )
01514 {
01515 NextState[state][1] = 1;
01516 }
01517 else
01518 {
01519 NextState[state][1] = 0;
01520 }
01521 }
01522 }
01523
01524
01525
01526
01527 int
01528 acsmCompile2 (ACSM_STRUCT2 * acsm)
01529 {
01530 int k;
01531 ACSM_PATTERN2 * plist;
01532
01533
01534 for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
01535 {
01536 acsm->acsmMaxStates += plist->n;
01537
01538 }
01539 acsm->acsmMaxStates++;
01540
01541
01542 acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
01543 MEMASSERT (acsm->acsmTransTable, "acsmCompile");
01544
01545 memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
01546
01547 if(s_verbose)printf ("ACSMX-Max Memory-TransTable Setup: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01548
01549
01550 acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
01551 MEMASSERT (acsm->acsmFailState, "acsmCompile");
01552
01553 memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
01554
01555
01556 acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
01557 MEMASSERT (acsm->acsmMatchList, "acsmCompile");
01558
01559 memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
01560
01561 if(s_verbose)printf ("ACSMX-Max Memory- MatchList Table Setup: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01562
01563
01564 acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
01565 MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
01566
01567 for (k = 0; k < acsm->acsmMaxStates; k++)
01568 {
01569 acsm->acsmNextState[k]=(acstate_t*)0;
01570 }
01571
01572 if(s_verbose)printf ("ACSMX-Max Memory-Table Setup: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01573
01574
01575 acsm->acsmNumStates = 0;
01576
01577
01578
01579
01580
01581 for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
01582 {
01583 AddPatternStates (acsm, plist);
01584 }
01585
01586 acsm->acsmNumStates++;
01587
01588 if(s_verbose)printf ("ACSMX-Max Trie List Memory : %d bytes, %d states, %d active states\n",
01589 max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01590
01591 if(s_verbose)List_PrintTransTable( acsm );
01592
01593 if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
01594 {
01595
01596 if(s_verbose)printf("Build_NFA\n");
01597
01598 Build_NFA (acsm);
01599
01600 if(s_verbose)printf("NFA-Trans-Nodes: %d\n",acsm->acsmNumTrans);
01601 if(s_verbose)printf("ACSMX-Max NFA List Memory : %d bytes, %d states / %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01602
01603 if(s_verbose)List_PrintTransTable( acsm );
01604 }
01605
01606 if( acsm->acsmFSA == FSA_DFA )
01607 {
01608
01609 if(s_verbose)printf("Convert_NFA_To_DFA\n");
01610
01611 Convert_NFA_To_DFA (acsm);
01612
01613 if(s_verbose)printf("DFA-Trans-Nodes: %d\n",acsm->acsmNumTrans);
01614 if(s_verbose)printf("ACSMX-Max NFA-DFA List Memory : %d bytes, %d states / %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01615
01616 if(s_verbose)List_PrintTransTable( acsm );
01617 }
01618
01619
01620
01621
01622
01623
01624
01625 if(s_verbose)printf("Converting Transition Lists -> Transition table, fmt=%d\n",acsm->acsmFormat);
01626
01627 if( acsm->acsmFormat == ACF_SPARSE )
01628 {
01629
01630 if( Conv_Full_DFA_To_Sparse(acsm) )
01631 return -1;
01632
01633 if(s_verbose)printf ("ACSMX-Max Memory-Sparse: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01634 if(s_verbose)Print_DFA(acsm);
01635 }
01636
01637 else if( acsm->acsmFormat == ACF_BANDED )
01638 {
01639
01640 if( Conv_Full_DFA_To_Banded(acsm) )
01641 return -1;
01642
01643 if(s_verbose)printf ("ACSMX-Max Memory-banded: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01644 if(s_verbose)Print_DFA(acsm);
01645 }
01646
01647 else if( acsm->acsmFormat == ACF_SPARSEBANDS )
01648 {
01649
01650 if( Conv_Full_DFA_To_SparseBands(acsm) )
01651 return -1;
01652
01653 if(s_verbose)printf ("ACSMX-Max Memory-sparse-bands: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01654 if(s_verbose)Print_DFA(acsm);
01655 }
01656 else if( acsm->acsmFormat == ACF_FULL )
01657 {
01658 if( Conv_List_To_Full( acsm ) )
01659 return -1;
01660
01661 if(s_verbose)printf ("ACSMX-Max Memory-Full: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01662 if(s_verbose)Print_DFA(acsm);
01663 }
01664
01665 acsmUpdateMatchStates( acsm );
01666
01667
01668 List_FreeTransTable( acsm );
01669
01670 if(s_verbose)printf ("ACSMX-Max Memory-Final: %d bytes, %d states, %d active states\n", max_memory,acsm->acsmMaxStates,acsm->acsmNumStates);
01671
01672
01673
01674
01675
01676
01677
01678
01679 summary.num_states += acsm->acsmNumStates;
01680 summary.num_transitions += acsm->acsmNumTrans;
01681
01682 memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
01683
01684 return 0;
01685 }
01686
01687
01688
01689
01690 inline
01691 acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input)
01692 {
01693 acstate_t fmt;
01694 acstate_t n;
01695 int index;
01696 int nb;
01697
01698 fmt = *ps++;
01699
01700 ps++;
01701
01702 switch( fmt )
01703 {
01704 case ACF_BANDED:
01705 {
01706 n = ps[0];
01707 index = ps[1];
01708
01709 if( input < index )
01710 {
01711 if(state==0)
01712 {
01713 return 0;
01714 }
01715 else
01716 {
01717 return (acstate_t)ACSM_FAIL_STATE2;
01718 }
01719 }
01720 if( input >= index + n )
01721 {
01722 if(state==0)
01723 {
01724 return 0;
01725 }
01726 else
01727 {
01728 return (acstate_t)ACSM_FAIL_STATE2;
01729 }
01730 }
01731 if( ps[input-index] == 0 )
01732 {
01733 if( state != 0 )
01734 {
01735 return ACSM_FAIL_STATE2;
01736 }
01737 }
01738
01739 return (acstate_t) ps[input-index];
01740 }
01741
01742 case ACF_SPARSE:
01743 {
01744 n = *ps++;
01745
01746 for( ; n>0 ; n-- )
01747 {
01748 if( ps[0] > input )
01749 {
01750 return (acstate_t)ACSM_FAIL_STATE2;
01751 }
01752 else if( ps[0] == input )
01753 {
01754 return ps[1];
01755 }
01756 ps+=2;
01757 }
01758 if( state == 0 )
01759 {
01760 return 0;
01761 }
01762 return ACSM_FAIL_STATE2;
01763 }
01764
01765 case ACF_SPARSEBANDS:
01766 {
01767 nb = *ps++;
01768
01769 while( nb > 0 )
01770 {
01771 n = *ps++;
01772 index = *ps++;
01773
01774 if( input < index )
01775 {
01776 if( state != 0 )
01777 {
01778 return (acstate_t)ACSM_FAIL_STATE2;
01779 }
01780 return (acstate_t)0;
01781 }
01782 if( (input >= index) && (input < (index + n)) )
01783 {
01784 if( ps[input-index] == 0 )
01785 {
01786 if( state != 0 )
01787 {
01788 return ACSM_FAIL_STATE2;
01789 }
01790 }
01791 return (acstate_t) ps[input-index];
01792 }
01793 nb--;
01794 ps += n;
01795 }
01796 if( state != 0 )
01797 {
01798 return (acstate_t)ACSM_FAIL_STATE2;
01799 }
01800 return (acstate_t)0;
01801 }
01802
01803 case ACF_FULL:
01804 {
01805 if( ps[input] == 0 )
01806 {
01807 if( state != 0 )
01808 {
01809 return ACSM_FAIL_STATE2;
01810 }
01811 }
01812 return ps[input];
01813 }
01814 }
01815
01816 return 0;
01817 }
01818
01819
01820
01821
01822
01823
01824
01825
01826 inline
01827 acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input)
01828 {
01829 acstate_t n, nb;
01830 int index;
01831
01832 switch( ps[0] )
01833 {
01834
01835 case ACF_BANDED:
01836 {
01837
01838
01839
01840 if( input < ps[3] ) return 0;
01841 if( input >= (ps[3]+ps[2]) ) return 0;
01842
01843 return ps[4+input-ps[3]];
01844 }
01845
01846
01847 case ACF_FULL:
01848 {
01849 return ps[2+input];
01850 }
01851
01852
01853 case ACF_SPARSE:
01854 {
01855 n = ps[2];
01856
01857 ps += 3;
01858
01859 for( ; n>0 ; n-- )
01860 {
01861 if( input < ps[0] )
01862 {
01863 return (acstate_t)0;
01864 }
01865 else if( ps[0] == input )
01866 {
01867 return ps[1];
01868 }
01869 ps += 2;
01870 }
01871 return (acstate_t)0;
01872 }
01873
01874
01875
01876 case ACF_SPARSEBANDS:
01877 {
01878 nb = ps[2];
01879
01880 ps += 3;
01881
01882 while( nb > 0 )
01883 {
01884 n = ps[0];
01885 index = ps[1];
01886 if( input < index )
01887 {
01888 return (acstate_t)0;
01889 }
01890 if( (input < (index + n)) )
01891 {
01892 return (acstate_t) ps[2+input-index];
01893 }
01894 nb--;
01895 ps += n;
01896 }
01897 return (acstate_t)0;
01898 }
01899 }
01900
01901 return 0;
01902 }
01903
01904
01905
01906
01907
01908 static
01909 inline
01910 int
01911 acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
01912 int (*Match) (void * id, int index, void *data),
01913 void *data)
01914 {
01915 acstate_t state;
01916 ACSM_PATTERN2 * mlist;
01917 unsigned char * Tend;
01918 int nfound = 0;
01919 unsigned char * T, * Tc;
01920 int index;
01921 acstate_t ** NextState = acsm->acsmNextState;
01922 ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
01923
01924 Tc = Tx;
01925 T = Tx;
01926 Tend = T + n;
01927
01928 for( state = 0; T < Tend; T++ )
01929 {
01930 state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
01931
01932
01933 if( NextState[state][1] )
01934 {
01935 for( mlist = MatchList[state];
01936 mlist!= NULL;
01937 mlist = mlist->next )
01938 {
01939 index = T - mlist->n - Tc;
01940 if( mlist->nocase )
01941 {
01942 nfound++;
01943 if (Match (mlist->id, index, data))
01944 return nfound;
01945 }
01946 else
01947 {
01948 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
01949 {
01950 nfound++;
01951 if (Match (mlist->id, index, data))
01952 return nfound;
01953 }
01954 }
01955 }
01956 }
01957 }
01958 return nfound;
01959 }
01960
01961
01962
01963
01964
01965
01966
01967
01968
01969
01970 static
01971 inline
01972 int
01973 acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
01974 int (*Match) (void * id, int index, void *data),
01975 void *data)
01976 {
01977 ACSM_PATTERN2 * mlist;
01978 unsigned char * Tend;
01979 unsigned char * T;
01980 int index;
01981 acstate_t state;
01982 acstate_t * ps;
01983 acstate_t sindex;
01984 acstate_t ** NextState = acsm->acsmNextState;
01985 ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
01986 int nfound = 0;
01987
01988 T = Tx;
01989 Tend = Tx + n;
01990
01991 for( state = 0; T < Tend; T++ )
01992 {
01993 ps = NextState[ state ];
01994
01995 sindex = xlatcase[ T[0] ];
01996
01997
01998 if( ps[1] )
01999 {
02000 for( mlist = MatchList[state];
02001 mlist!= NULL;
02002 mlist = mlist->next )
02003 {
02004 index = T - mlist->n - Tx;
02005
02006
02007 if( mlist->nocase )
02008 {
02009 nfound++;
02010 if (Match (mlist->id, index, data))
02011 return nfound;
02012 }
02013 else
02014 {
02015 if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
02016 {
02017 nfound++;
02018 if (Match (mlist->id, index, data))
02019 return nfound;
02020 }
02021 }
02022
02023 }
02024 }
02025
02026 state = ps[ 2u + sindex ];
02027 }
02028
02029
02030 for( mlist = MatchList[state];
02031 mlist!= NULL;
02032 mlist = mlist->next )
02033 {
02034 index = T - mlist->n - Tx;
02035
02036 if( mlist->nocase )
02037 {
02038 nfound++;
02039 if (Match (mlist->id, index, data))
02040 return nfound;
02041 }
02042 else
02043 {
02044 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02045 {
02046 nfound++;
02047 if (Match (mlist->id, index, data))
02048 return nfound;
02049 }
02050 }
02051 }
02052
02053 return nfound;
02054 }
02055
02056
02057
02058
02059
02060
02061
02062
02063
02064
02065 static
02066 inline
02067 int
02068 acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
02069 int (*Match) (void * id, int index, void *data),
02070 void *data)
02071 {
02072 acstate_t state;
02073 unsigned char * Tend;
02074 unsigned char * T;
02075 int sindex;
02076 int index;
02077 acstate_t ** NextState = acsm->acsmNextState;
02078 ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
02079 ACSM_PATTERN2 * mlist;
02080 acstate_t * ps;
02081 int nfound = 0;
02082
02083 T = Tx;
02084 Tend = T + n;
02085
02086 for( state = 0; T < Tend; T++ )
02087 {
02088 ps = NextState[state];
02089
02090 sindex = xlatcase[ T[0] ];
02091
02092
02093 if( ps[1] )
02094 {
02095 for( mlist = MatchList[state];
02096 mlist!= NULL;
02097 mlist = mlist->next )
02098 {
02099 index = T - mlist->n - Tx;
02100
02101 if( mlist->nocase )
02102 {
02103 nfound++;
02104 if (Match (mlist->id, index, data))
02105 return nfound;
02106 }
02107 else
02108 {
02109 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02110 {
02111 nfound++;
02112 if (Match (mlist->id, index, data))
02113 return nfound;
02114 }
02115 }
02116 }
02117 }
02118
02119 if( sindex < ps[3] ) state = 0;
02120 else if( sindex >= (ps[3] + ps[2]) ) state = 0;
02121 else state = ps[ 4u + sindex - ps[3] ];
02122 }
02123
02124
02125 for( mlist = MatchList[state];
02126 mlist!= NULL;
02127 mlist = mlist->next )
02128 {
02129 index = T - mlist->n - Tx;
02130
02131 if( mlist->nocase )
02132 {
02133 nfound++;
02134 if (Match (mlist->id, index, data))
02135 return nfound;
02136 }
02137 else
02138 {
02139 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02140 {
02141 nfound++;
02142 if (Match (mlist->id, index, data))
02143 return nfound;
02144 }
02145 }
02146 }
02147
02148 return nfound;
02149 }
02150
02151
02152
02153
02154
02155
02156
02157
02158 static
02159 inline
02160 int
02161 acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
02162 int (*Match) (void * id, int index, void *data),
02163 void *data)
02164 {
02165 acstate_t state;
02166 ACSM_PATTERN2 * mlist;
02167 unsigned char * Tend;
02168 int nfound = 0;
02169 unsigned char * T, *Tc;
02170 int index;
02171 acstate_t ** NextState= acsm->acsmNextState;
02172 acstate_t * FailState= acsm->acsmFailState;
02173 ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
02174 unsigned char Tchar;
02175
02176 Tc = Tx;
02177 T = Tx;
02178 Tend = T + n;
02179
02180 for( state = 0; T < Tend; T++ )
02181 {
02182 acstate_t nstate;
02183
02184 Tchar = xlatcase[ *T ];
02185
02186 while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
02187 state = FailState[state];
02188
02189 state = nstate;
02190
02191 for( mlist = MatchList[state];
02192 mlist!= NULL;
02193 mlist = mlist->next )
02194 {
02195 index = T - mlist->n - Tx;
02196 if( mlist->nocase )
02197 {
02198 nfound++;
02199 if (Match (mlist->id, index, data))
02200 return nfound;
02201 }
02202 else
02203 {
02204 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
02205 {
02206 nfound++;
02207 if (Match (mlist->id, index, data))
02208 return nfound;
02209 }
02210 }
02211 }
02212 }
02213
02214 return nfound;
02215 }
02216
02217
02218
02219
02220 int
02221 acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
02222 int (*Match) (void * id, int index, void *data),
02223 void *data)
02224 {
02225
02226 switch( acsm->acsmFSA )
02227 {
02228 case FSA_DFA:
02229
02230 if( acsm->acsmFormat == ACF_FULL )
02231 {
02232 return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
02233 }
02234 else if( acsm->acsmFormat == ACF_BANDED )
02235 {
02236 return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
02237 }
02238 else
02239 {
02240 return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
02241 }
02242
02243 case FSA_NFA:
02244
02245 return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
02246
02247 case FSA_TRIE:
02248
02249 return 0;
02250 }
02251 return 0;
02252 }
02253
02254
02255
02256
02257
02258 void
02259 acsmFree2 (ACSM_STRUCT2 * acsm)
02260 {
02261 int i;
02262 ACSM_PATTERN2 * mlist, *ilist;
02263 for (i = 0; i < acsm->acsmMaxStates; i++)
02264 {
02265 mlist = acsm->acsmMatchList[i];
02266
02267 while (mlist)
02268 {
02269 ilist = mlist;
02270 mlist = mlist->next;
02271 AC_FREE (ilist);
02272 }
02273 AC_FREE(acsm->acsmNextState[i]);
02274 }
02275 AC_FREE(acsm->acsmFailState);
02276 AC_FREE(acsm->acsmMatchList);
02277 }
02278
02279
02280
02281
02282 void acsmPrintInfo2( ACSM_STRUCT2 * p)
02283 {
02284 char * sf[]={
02285 "Full Matrix",
02286 "Sparse Matrix",
02287 "Banded Matrix",
02288 "Sparse Banded Matrix",
02289 };
02290 char * fsa[]={
02291 "TRIE",
02292 "NFA",
02293 "DFA",
02294 };
02295
02296
02297 printf("+--[Pattern Matcher:Aho-Corasick]-----------------------------\n");
02298 printf("| Alphabet Size : %d Chars\n",p->acsmAlphabetSize);
02299 printf("| Sizeof State : %d bytes\n",(int)(sizeof(acstate_t)));
02300 printf("| Storage Format : %s \n",sf[ p->acsmFormat ]);
02301 printf("| Sparse Row Nodes : %d Max\n",p->acsmSparseMaxRowNodes);
02302 printf("| Sparse Band Zeros: %d Max\n",p->acsmSparseMaxZcnt);
02303 printf("| Num States : %d\n",p->acsmNumStates);
02304 printf("| Num Transitions : %d\n",p->acsmNumTrans);
02305 printf("| State Density : %.1f%%\n",100.0*(double)p->acsmNumTrans/(p->acsmNumStates*p->acsmAlphabetSize));
02306 printf("| Finite Automatum : %s\n", fsa[p->acsmFSA]);
02307 if( max_memory < 1024*1024 )
02308 printf("| Memory : %.2fKbytes\n", (float)max_memory/1024 );
02309 else
02310 printf("| Memory : %.2fMbytes\n", (float)max_memory/(1024*1024) );
02311 printf("+-------------------------------------------------------------\n");
02312
02313
02314
02315 }
02316
02317
02318
02319
02320 int acsmPrintDetailInfo2( ACSM_STRUCT2 * p )
02321 {
02322
02323 return 0;
02324 }
02325
02326
02327
02328
02329
02330
02331
02332 int acsmPrintSummaryInfo2()
02333 {
02334 char * sf[]={
02335 "Full",
02336 "Sparse",
02337 "Banded",
02338 "Sparse",
02339 };
02340
02341 char * fsa[]={
02342 "TRIE",
02343 "NFA",
02344 "DFA",
02345 };
02346
02347 ACSM_STRUCT2 * p = &summary.acsm;
02348
02349 if( !summary.num_states )
02350 return 0;
02351
02352 printf("+--[Pattern Matcher:Aho-Corasick Summary]----------------------\n");
02353 printf("| Alphabet Size : %d Chars\n",p->acsmAlphabetSize);
02354 printf("| Sizeof State : %d bytes\n",(int)(sizeof(acstate_t)));
02355 printf("| Storage Format : %s \n",sf[ p->acsmFormat ]);
02356 printf("| Num States : %d\n",summary.num_states);
02357 printf("| Num Transitions : %d\n",summary.num_transitions);
02358 printf("| State Density : %.1f%%\n",100.0*(double)summary.num_transitions/(summary.num_states*p->acsmAlphabetSize));
02359 printf("| Finite Automatum : %s\n", fsa[p->acsmFSA]);
02360 if( max_memory < 1024*1024 )
02361 printf("| Memory : %.2fKbytes\n", (float)max_memory/1024 );
02362 else
02363 printf("| Memory : %.2fMbytes\n", (float)max_memory/(1024*1024) );
02364 printf("+-------------------------------------------------------------\n");
02365
02366
02367 return 0;
02368 }
02369
02370
02371
02372
02373 #ifdef ACSMX2S_MAIN
02374
02375
02376
02377
02378 unsigned char text[512];
02379
02380
02381
02382
02383 int
02384 MatchFound (void* id, int index, void *data)
02385 {
02386 fprintf (stdout, "%s\n", (char *) id);
02387 return 0;
02388 }
02389
02390
02391
02392
02393 int
02394 main (int argc, char **argv)
02395 {
02396 int i, nc, nocase = 0;
02397 ACSM_STRUCT2 * acsm;
02398 char * p;
02399
02400 if (argc < 3)
02401
02402 {
02403 fprintf (stderr,"Usage: %s search-text pattern +pattern... [flags]\n",argv[0]);
02404 fprintf (stderr," flags: -nfa -nocase -full -sparse -bands -sparsebands -z zcnt (sparsebands) -sparsetree -v\n");
02405 exit (0);
02406 }
02407
02408 acsm = acsmNew2 ();
02409 if( !acsm )
02410 {
02411 printf("acsm-no memory\n");
02412 exit(0);
02413 }
02414
02415 strcpy (text, argv[1]);
02416
02417 acsm->acsmFormat = ACF_FULL;
02418
02419 for (i = 1; i < argc; i++)
02420 {
02421 if (strcmp (argv[i], "-nocase") == 0){
02422 nocase = 1;
02423 }
02424 if (strcmp (argv[i], "-v") == 0){
02425 s_verbose=1;
02426 }
02427
02428 if (strcmp (argv[i], "-full") == 0){
02429 acsm->acsmFormat = ACF_FULL;
02430 }
02431 if (strcmp (argv[i], "-sparse") == 0){
02432 acsm->acsmFormat = ACF_SPARSE;
02433 acsm->acsmSparseMaxRowNodes = 10;
02434 }
02435 if (strcmp (argv[i], "-bands") == 0){
02436 acsm->acsmFormat = ACF_BANDED;
02437 }
02438
02439 if (strcmp (argv[i], "-sparsebands") == 0){
02440 acsm->acsmFormat = ACF_SPARSEBANDS;
02441 acsm->acsmSparseMaxZcnt = 10;
02442 }
02443 if (strcmp (argv[i], "-z") == 0){
02444 acsm->acsmSparseMaxZcnt = atoi(argv[++i]);
02445 }
02446
02447 if (strcmp (argv[i], "-nfa") == 0){
02448 acsm->acsmFSA = FSA_NFA;
02449 }
02450 if (strcmp (argv[i], "-dfa") == 0){
02451 acsm->acsmFSA = FSA_DFA;
02452 }
02453 if (strcmp (argv[i], "-trie") == 0){
02454 acsm->acsmFSA = FSA_TRIE;
02455 }
02456 }
02457
02458 for (i = 2; i < argc; i++)
02459 {
02460 if (argv[i][0] == '-')
02461 continue;
02462
02463 p = argv[i];
02464
02465 if ( *p == '+')
02466 {
02467 nc=1;
02468 p++;
02469 }
02470 else
02471 {
02472 nc = nocase;
02473 }
02474
02475 acsmAddPattern2 (acsm, p, strlen(p), nc, 0, 0,(void*)p, i - 2);
02476 }
02477
02478 if(s_verbose)printf("Patterns added\n");
02479
02480 Print_DFA (acsm);
02481
02482 acsmCompile2 (acsm);
02483
02484 Write_DFA(acsm, "acsmx2-snort.dfa") ;
02485
02486 if(s_verbose) printf("Patterns compiled--written to file.\n");
02487
02488 acsmPrintInfo2 ( acsm );
02489
02490 acsmSearch2 (acsm, text, strlen (text), MatchFound, (void *)0 );
02491
02492 acsmFree2 (acsm);
02493
02494 printf ("normal pgm end\n");
02495
02496 return (0);
02497 }
02498 #endif
02499