00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182 #ifdef HAVE_CONFIG_H
00183 #include "config.h"
00184 #endif
00185
00186 #include <stdio.h>
00187 #include <string.h>
00188 #include <stdlib.h>
00189 #include <ctype.h>
00190
00191 #include "mwm.h"
00192
00193 int FatalError( char *, ... );
00194
00195
00196
00197
00198
00199
00200 static UINT64 iPatCount=0;
00201
00202 UINT64 mwmGetPatByteCount()
00203 {
00204 return iPatCount;
00205 }
00206
00207 void mwmResetByteCount()
00208 {
00209 iPatCount=0;
00210 }
00211
00212
00213
00214
00215 static unsigned char xlatcase[256];
00216
00217
00218
00219
00220
00221
00222 static unsigned char S[65536];
00223
00224
00225
00226
00227 static void init_xlatcase()
00228 {
00229 int i;
00230 for(i=0;i<256;i++)
00231 {
00232 xlatcase[ i ] = toupper(i);
00233 }
00234 }
00235
00236
00237
00238
00239
00240 static INLINE void ConvCaseToUpper( unsigned char *s, int m )
00241 {
00242 int i;
00243 for( i=0; i < m; i++ )
00244 {
00245 s[i] = xlatcase[ s[i] ];
00246 }
00247 }
00248
00249
00250
00251
00252 static INLINE void ConvCaseToUpperEx( unsigned char * d, unsigned char *s, int m )
00253 {
00254 int i;
00255 for( i=0; i < m; i++ )
00256 {
00257 d[i] = xlatcase[ s[i] ];
00258 }
00259 }
00260
00261
00262
00263
00264
00265
00266
00267 #undef COPY_PATTERNS
00268 HBM_STRUCT * hbm_prepx(HBM_STRUCT *p, unsigned char * pat, int m)
00269 {
00270 int k;
00271
00272 if( !m ) return 0;
00273 if( !p ) return 0;
00274
00275 #ifdef COPYPATTERN
00276 p->P = (unsigned char*)malloc( m + 1 )
00277 if( !p->P ) return 0;
00278 memcpy(p->P,pat,m);
00279 #else
00280 p->P = pat;
00281 #endif
00282
00283 p->M = m;
00284
00285
00286 for(k = 0; k < 256; k++) p->bcShift[k] = m;
00287 for(k = 0; k < m; k++) p->bcShift[pat[k]] = m - k - 1;
00288
00289 return p;
00290 }
00291
00292
00293
00294
00295 HBM_STRUCT * hbm_prep(unsigned char * pat, int m)
00296 {
00297 HBM_STRUCT *p;
00298
00299 p = (HBM_STRUCT*)malloc( sizeof(HBM_STRUCT) );
00300 if( !p ) return 0;
00301
00302 return hbm_prepx( p, pat, m );
00303 }
00304
00305 #ifdef XXX_NOT_USED
00306
00307
00308
00309 static void hbm_free( HBM_STRUCT *p )
00310 {
00311 if(p)
00312 {
00313 #ifdef COPYPATTERN
00314 if( p->P )free(p->P);
00315 #endif
00316 free(p);
00317 }
00318 }
00319 #endif
00320
00321
00322
00323
00324
00325
00326
00327
00328 static INLINE unsigned char * hbm_match(HBM_STRUCT * px, unsigned char * text, int n)
00329 {
00330 unsigned char *pat, *t, *et, *q;
00331 int m1, k;
00332 short *bcShift;
00333
00334 m1 = px->M-1;
00335 pat = px->P;
00336 bcShift= px->bcShift;
00337
00338 t = text + m1;
00339 et = text + n;
00340
00341
00342 if( !m1 )
00343 {
00344 for( ;t<et; t++ )
00345 if( *t == *pat ) return t;
00346 return 0;
00347 }
00348
00349
00350 while( t < et )
00351 {
00352
00353 do
00354 {
00355 t += bcShift[*t];
00356 if( t >= et )return 0;;
00357
00358 t += (k=bcShift[*t]);
00359 if( t >= et )return 0;
00360
00361 } while( k );
00362
00363
00364 k = m1;
00365 q = t - m1;
00366 while( k >= 4 )
00367 {
00368 if( pat[k] != q[k] )goto NoMatch; k--;
00369 if( pat[k] != q[k] )goto NoMatch; k--;
00370 if( pat[k] != q[k] )goto NoMatch; k--;
00371 if( pat[k] != q[k] )goto NoMatch; k--;
00372 }
00373
00374 while( k >= 0 )
00375 {
00376 if( pat[k] != q[k] )goto NoMatch; k--;
00377 }
00378
00379 return q;
00380
00381 NoMatch:
00382
00383
00384 t++;
00385 }
00386
00387 return 0;
00388 }
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400 void * mwmNew()
00401 {
00402 MWM_STRUCT * p = (MWM_STRUCT * )calloc( sizeof(MWM_STRUCT),1 );
00403 if( !p )
00404 {
00405 return 0;
00406 }
00407
00408 init_xlatcase();
00409
00410 p->msSmallest = 32000;
00411
00412 return (void*)p;
00413 }
00414
00415
00416
00417
00418
00419 void mwmFree( void * pv )
00420 {
00421 MWM_STRUCT * p = (MWM_STRUCT * )pv;
00422 if( p )
00423 {
00424 if( p->msPatArray ) free( p->msPatArray );
00425 if( p->msNumArray ) free( p->msNumArray );
00426 if( p->msHash ) free( p->msHash );
00427 if( p->msShift2 ) free( p->msShift2 );
00428 free( p );
00429 }
00430 }
00431
00432
00433
00434
00435
00436
00437
00438
00439 int mwmAddPatternEx( void *pv, unsigned char * P, int m,
00440 unsigned noCase, unsigned offset, unsigned depth , void * id, int iid )
00441 {
00442 MWM_STRUCT *ps = (MWM_STRUCT*)pv;
00443 MWM_PATTERN_STRUCT *plist=0;
00444
00445 MWM_PATTERN_STRUCT *p = (MWM_PATTERN_STRUCT*)calloc(sizeof(MWM_PATTERN_STRUCT),1);
00446
00447 if( !p ) return -1;
00448
00449 #ifdef REQUIRE_UNIQUE_PATTERNS
00450 for( plist=ps->plist; plist!=NULL; plist=plist->next )
00451 {
00452 if( plist->psLen == m )
00453 {
00454 if( memcmp(P,plist->psPat,m) == 0 )
00455 {
00456 return 0;
00457 }
00458 }
00459 }
00460 #endif
00461
00462 if( ps->plist )
00463 {
00464 for( plist=ps->plist; plist->next!=NULL; plist=plist->next )
00465 ;
00466 plist->next = p;
00467 }
00468 else
00469 ps->plist = p;
00470
00471
00472
00473 p->psPat = (unsigned char*)malloc( m );
00474 if( !p->psPat ) return -1;
00475
00476 memcpy(p->psPat, P, m );
00477
00478 ConvCaseToUpper( p->psPat, m );
00479
00480
00481 p->psPatCase = (unsigned char*)malloc( m );
00482 if( !p->psPatCase ) return -1;
00483
00484 memcpy( p->psPatCase, P, m );
00485
00486 p->psLen = m;
00487 p->psID = id;
00488 p->psIID = iid;
00489 p->psNoCase = noCase;
00490 p->psOffset = offset;
00491 p->psDepth = depth;
00492
00493 ps->msNoCase += noCase;
00494
00495 ps->msNumPatterns++;
00496
00497 if( p->psLen < (unsigned)ps->msSmallest ) ps->msSmallest= p->psLen;
00498 if( p->psLen > (unsigned)ps->msLargest ) ps->msLargest = p->psLen;
00499
00500 ps->msTotal += p->psLen;
00501 ps->msAvg = ps->msTotal / ps->msNumPatterns;
00502
00503 return 1;
00504 }
00505
00506
00507
00508 #ifdef OLDSHIT
00509
00510
00511
00512
00513
00514
00515
00516 int mwmAddPatternExOrig( MWM_STRUCT *ps, unsigned char * P, int m,
00517 unsigned noCase, unsigned offset, unsigned depth ,unsigned id, int iid )
00518 {
00519 int kk;
00520
00521 MWM_PATTERN_STRUCT *p;
00522
00523 if( ps->msNumPatterns >= ps->msMaxPatterns )
00524 {
00525 return -1;
00526 }
00527
00528 #ifdef REQUIRE_UNIQUE_PATTERNS
00529 for(i=0;i<ps->msNumPatterns;i++)
00530 {
00531 if( ps->msPatArray[i].psLen == m )
00532 {
00533 if( memcmp(P,ps->msPatArray[i].psPat,m) == 0 )
00534 {
00535 return 0;
00536 }
00537 }
00538 }
00539 #endif
00540
00541 p = &ps->msPatArray[ ps->msNumPatterns ];
00542
00543
00544 p->psPat = (unsigned char*)malloc( m );
00545 memcpy(p->psPat, P, m );
00546
00547 ConvCaseToUpper( p->psPat, m );
00548
00549
00550 p->psPatCase = (unsigned char*)malloc( m );
00551 memcpy( p->psPatCase, P, m );
00552
00553 p->psLen = m;
00554 p->psID = id;
00555 p->psIID = iid;
00556 p->psNoCase = noCase;
00557 p->psOffset = offset;
00558 p->psDepth = depth;
00559
00560 ps->msNoCase += noCase;
00561
00562 kk = ps->msNumPatterns;
00563
00564 ps->msNumPatterns++;
00565
00566 if( ps->msPatArray[kk].psLen < (unsigned)ps->msSmallest ) ps->msSmallest= ps->msPatArray[kk].psLen;
00567 if( ps->msPatArray[kk].psLen > (unsigned)ps->msLargest ) ps->msLargest = ps->msPatArray[kk].psLen;
00568
00569 ps->msTotal += ps->msPatArray[kk].psLen;
00570 ps->msAvg = ps->msTotal / ps->msNumPatterns;
00571
00572 return 1;
00573 }
00574 #endif
00575
00576
00577
00578
00579
00580 int mwmAddPattern( void * pv, unsigned char * P, int m, unsigned id )
00581 {
00582
00583 return mwmAddPatternEx( pv, P, m, 0, id, 0, pv, 0 );
00584 }
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598 static int bcompare( unsigned char *a, int alen, unsigned char * b, int blen )
00599 {
00600 int stat;
00601 if( alen == blen )
00602 {
00603 return memcmp(a,b,alen);
00604 }
00605 else if( alen < blen )
00606 {
00607 if( (stat=memcmp(a,b,alen)) != 0 )
00608 return stat;
00609 return -1;
00610 }
00611 else
00612 {
00613 if( (stat=memcmp(a,b,blen)) != 0 )
00614 return stat;
00615 return +1;
00616 }
00617 }
00618
00619
00620
00621
00622 static int CDECL sortcmp( const void * e1, const void * e2 )
00623 {
00624 MWM_PATTERN_STRUCT *r1= (MWM_PATTERN_STRUCT*)e1;
00625 MWM_PATTERN_STRUCT *r2= (MWM_PATTERN_STRUCT*)e2;
00626 return bcompare( r1->psPat, r1->psLen, r2->psPat, r2->psLen );
00627 }
00628
00629
00630
00631
00632 static unsigned HASH16( unsigned char * T )
00633 {
00634 return (unsigned short) (((*T)<<8) | *(T+1));
00635 }
00636
00637
00638
00639
00640 static void mwmPrepHashedPatternGroups(MWM_STRUCT * ps)
00641 {
00642 unsigned sindex,hindex,ningroup;
00643 int i;
00644
00645
00646
00647
00648 ps->msNumHashEntries = HASHTABLESIZE;
00649 ps->msHash = (HASH_TYPE*)malloc( sizeof(HASH_TYPE) * ps->msNumHashEntries );
00650 if( !ps->msHash )
00651 {
00652 FatalError("No memory in mwmPrephashedPatternGroups()\n"
00653 "Try uncommenting the \"config detection: search-method\""
00654 "in snort.conf\n");
00655 }
00656
00657
00658 for(i=0;i<(int)ps->msNumHashEntries;i++)
00659 {
00660 ps->msHash[i] = (HASH_TYPE)-1;
00661 }
00662
00663
00664 for(i=0;i<256;i++)
00665 {
00666 ps->msHash1[i] = (HASH_TYPE)-1;
00667 }
00668
00669
00670
00671
00672 for(i=0;i<ps->msNumPatterns;i++)
00673 {
00674 if( ps->msPatArray[i].psLen > 1 )
00675 {
00676 hindex = HASH16(ps->msPatArray[i].psPat);
00677 sindex = ps->msHash[ hindex ] = i;
00678 ningroup = 1;
00679 while( (++i < ps->msNumPatterns) && (ps->msPatArray[i].psLen > 1) && (hindex==HASH16(ps->msPatArray[i].psPat)) )
00680 ningroup++;
00681 ps->msNumArray[ sindex ] = ningroup;
00682 i--;
00683 }
00684 else if( ps->msPatArray[i].psLen == 1 )
00685 {
00686 hindex = ps->msPatArray[i].psPat[0];
00687 sindex = ps->msHash1[ hindex ] = i;
00688 ningroup = 1;
00689
00690 while((++i < ps->msNumPatterns) && (hindex == ps->msPatArray[i].psPat[0]) && (ps->msPatArray[i].psLen == 1))
00691 ningroup++;
00692
00693 ps->msNumArray[ sindex ] = ningroup;
00694 i--;
00695 }
00696 }
00697
00698
00699 }
00700
00701
00702
00703
00704
00705 static void mwmPrepBadCharTable(MWM_STRUCT * ps)
00706 {
00707 unsigned short i, k, m, cindex, shift;
00708 unsigned small_value=32000, large_value=0;
00709
00710
00711 for(i=0;i<ps->msNumPatterns;i++)
00712 {
00713 if( ps->msPatArray[i].psLen < small_value ) small_value = ps->msPatArray[i].psLen;
00714 if( ps->msPatArray[i].psLen > large_value ) large_value = ps->msPatArray[i].psLen;
00715 }
00716
00717 m = (unsigned short) small_value;
00718
00719 if( m > 255 ) m = 255;
00720
00721 ps->msShiftLen = m;
00722
00723
00724 for(i=0;i<256;i++)
00725 {
00726 ps->msShift[i] = m;
00727 }
00728
00729
00730 for(i=0;i<ps->msNumPatterns;i++)
00731 {
00732 for(k=0;k<m;k++)
00733 {
00734 shift = (unsigned short)(m - 1 - k);
00735
00736 if( shift > 255 ) shift = 255;
00737
00738 cindex = ps->msPatArray[ i ].psPat[ k ];
00739
00740 if( shift < ps->msShift[ cindex ] )
00741 ps->msShift[ cindex ] = shift;
00742 }
00743 }
00744 }
00745
00746
00747
00748
00749 static void mbmPrepBadWordTable(MWM_STRUCT * ps)
00750 {
00751 int i;
00752 unsigned short k, m, cindex;
00753 unsigned small_value=32000, large_value=0;
00754 unsigned shift;
00755
00756 ps->msShift2 = (unsigned char *)malloc(BWSHIFTABLESIZE*sizeof(char));
00757 if( !ps->msShift2 )
00758 return;
00759
00760
00761 for(i=0;i<ps->msNumPatterns;i++)
00762 {
00763 if( ps->msPatArray[i].psLen < small_value ) small_value = ps->msPatArray[i].psLen;
00764 if( ps->msPatArray[i].psLen > large_value ) large_value = ps->msPatArray[i].psLen;
00765 }
00766
00767 m = (unsigned short) small_value;
00768
00769
00770 if( m > 255 ) m = 255;
00771
00772 ps->msShiftLen = m;
00773
00774
00775 for(i=0;i<BWSHIFTABLESIZE;i++)
00776 {
00777 ps->msShift2[i] = (unsigned)(m-1);
00778 }
00779
00780
00781
00782 for(i=0;i<ps->msNumPatterns;i++)
00783 {
00784 for(k=0;k<m-1;k++)
00785 {
00786 shift = (unsigned short)(m - 2 - k);
00787
00788 if( shift > 255 ) shift = 255;
00789
00790 cindex = ( ps->msPatArray[i].psPat[ k ] | (ps->msPatArray[i].psPat[k+1]<<8) );
00791
00792 if( shift < ps->msShift2[ cindex ] )
00793 ps->msShift2[ cindex ] = shift;
00794 }
00795 }
00796 }
00797
00798
00799
00800
00801
00802
00803
00804 void mwmShowStats( void * pv )
00805 {
00806 MWM_STRUCT * ps = (MWM_STRUCT*)pv;
00807
00808 int i;
00809 printf("Pattern Stats\n");
00810 printf("-------------\n");
00811 printf("Patterns : %d\n" , ps->msNumPatterns);
00812 printf("Average : %d chars\n", ps->msAvg);
00813 printf("Smallest : %d chars\n", ps->msSmallest);
00814 printf("Largest : %d chars\n", ps->msLargest);
00815 printf("Total chars: %d\n" , ps->msTotal);
00816
00817 for(i=0;i<ps->msLargest+1;i++)
00818 {
00819 if( ps->msLengths[i] )
00820 printf("Len[%d] : %d patterns\n", i, ps->msLengths[i] );
00821 }
00822
00823 printf("\n");
00824
00825 }
00826
00827
00828
00829
00830 static void mwmAnalyzePattens( MWM_STRUCT * ps )
00831 {
00832 int i;
00833
00834 ps->msLengths= (int*) malloc( sizeof(int) * (ps->msLargest+1) );
00835
00836 if( ps->msLengths )
00837 {
00838 memset( ps->msLengths, 0, sizeof(int) * (ps->msLargest+1) );
00839
00840 for(i=0;i<ps->msNumPatterns;i++)
00841 {
00842 ps->msLengths[ ps->msPatArray[i].psLen ]++;
00843 }
00844 }
00845 }
00846
00847
00848
00849
00850
00851
00852
00853 void mwmLargeShifts( void * pv, int flag)
00854 {
00855 MWM_STRUCT * ps = (MWM_STRUCT*)pv;
00856 ps->msLargeShifts = flag;
00857 }
00858
00859
00860
00861
00862 int mwmGetNumPatterns( void * pv )
00863 {
00864 MWM_STRUCT *p = (MWM_STRUCT*)pv;
00865 return p->msNumPatterns;
00866 }
00867
00868
00869 #ifdef BITOP_TEST
00870
00871
00872
00873 void mwmSetRuleMask( void * pv, BITOP * rm )
00874 {
00875 MWM_STRUCT * ps = (MWM_STRUCT*) pv;
00876 ps->RuleMask = rm;
00877 }
00878 #endif
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888 static
00889 INLINE
00890 int mwmGroupMatch2( MWM_STRUCT * ps,
00891 int index,
00892 unsigned char * Tx,
00893 unsigned char * T,
00894 unsigned char * Tc,
00895 int Tleft,
00896 void * data,
00897 int (*match)(void*,int,void*) )
00898 {
00899 int k, nfound=0;
00900 MWM_PATTERN_STRUCT * patrn;
00901 MWM_PATTERN_STRUCT * patrnEnd;
00902
00903
00904 patrn = &ps->msPatArray[index];
00905 patrnEnd = patrn + ps->msNumArray[index];
00906
00907
00908 for( ;patrn < patrnEnd; patrn++ )
00909 {
00910 unsigned char *p, *q;
00911
00912
00913 if( (unsigned)Tleft < patrn->psLen )
00914 continue;
00915
00916 #ifdef BITOP_TEST
00917
00918 if( ps->RuleMask )
00919 {
00920 if( boIsBitSet( ps->RuleMask, patrn->psIID ) )
00921 continue;
00922 }
00923 #endif
00924
00925
00926 k = patrn->psLen - HASHBYTES16 - 1;
00927 q = patrn->psPat + HASHBYTES16;
00928 p = T + HASHBYTES16;
00929
00930
00931 while( k >= 0 && (q[k] == p[k]) ) k--;
00932
00933
00934 if( k < 0 )
00935 {
00936 if( Tc && !patrn->psNoCase )
00937 {
00938
00939 if( !memcmp(patrn->psPatCase,&Tc[T-Tx],patrn->psLen) )
00940 {
00941 nfound++;
00942 if( match( patrn->psID, (int)(T-Tx), data ) )
00943 return -(nfound+1);
00944 }
00945
00946 }else{
00947 nfound++;
00948 if( match( patrn->psID, (int)(T-Tx), data ) )
00949 return -(nfound+1);
00950 }
00951 }
00952 }
00953
00954 return nfound;
00955 }
00956
00957
00958
00959
00960
00961
00962
00963
00964
00965 static
00966
00967 int mwmSearchExNoBC( MWM_STRUCT *ps,
00968 unsigned char * Tx, int n, unsigned char * Tc,
00969 int(*match)( void * id, int index,void* data ),
00970 void * data )
00971 {
00972 int Tleft, index, nfound, ng;
00973 unsigned char *T, *Tend, *B;
00974 MWM_PATTERN_STRUCT *patrn, *patrnEnd;
00975
00976 nfound = 0;
00977
00978 Tleft = n;
00979 Tend = Tx + n;
00980
00981
00982 if( (unsigned)n < ps->msShiftLen )
00983 return 0;
00984
00985
00986 for(T = Tx, B = Tx + ps->msShiftLen - 1; B < Tend; T++, B++, Tleft--)
00987 {
00988
00989 if( (index = ps->msHash1[*T]) != (HASH_TYPE)-1 )
00990 {
00991 patrn = &ps->msPatArray[index];
00992 patrnEnd = patrn + ps->msNumArray[index];
00993
00994 for( ;patrn < patrnEnd; patrn++ )
00995 {
00996 if( Tc && !patrn->psNoCase )
00997 {
00998 if( patrn->psPatCase[0] == Tc[T-Tx] )
00999 {
01000 nfound++;
01001 if(match(patrn->psID, (int)(T-Tx), data))
01002 return nfound;
01003 }
01004 }
01005 else
01006 {
01007 nfound++;
01008 if(match(patrn->psID, (int)(T-Tx), data))
01009 return nfound;
01010 }
01011 }
01012 }
01013
01014
01015
01016
01017
01018 if( Tleft == 1 )
01019 return nfound;
01020
01021
01022
01023
01024
01025 if( (index = ps->msHash [ ( (*T)<<8 ) | *(T+1) ] ) == (HASH_TYPE)-1 )
01026 continue;
01027
01028
01029 ng = mwmGroupMatch2( ps, index,Tx, T, Tc, Tleft, data, match );
01030 if( ng < 0 )
01031 {
01032 ng = -ng;
01033 ng--;
01034 nfound += ng;
01035
01036 return nfound;
01037 }
01038 else
01039 {
01040 nfound += ng;
01041 }
01042 }
01043
01044 return nfound;
01045 }
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055 static
01056
01057 int mwmSearchExBC( MWM_STRUCT *ps,
01058 unsigned char * Tx, int n, unsigned char * Tc,
01059 int(*match)( void * id, int index, void * data ),
01060 void * data )
01061 {
01062 int Tleft, index, nfound, tshift,ng;
01063 unsigned char *T, *Tend, *B;
01064
01065
01066 nfound = 0;
01067
01068 Tleft = n;
01069 Tend = Tx + n;
01070
01071
01072 if( (unsigned)n < ps->msShiftLen )
01073 return 0;
01074
01075
01076
01077 for( T = Tx, B = Tx + ps->msShiftLen - 1; B < Tend; T++, B++, Tleft-- )
01078 {
01079
01080 while( (tshift=ps->msShift[*B]) > 0 )
01081 {
01082 B += tshift; T += tshift; Tleft -= tshift;
01083 if( B >= Tend ) return nfound;
01084
01085 tshift=ps->msShift[*B];
01086 B += tshift; T += tshift; Tleft -= tshift;
01087 if( B >= Tend ) return nfound;
01088 }
01089
01090
01091
01092 if( Tleft == 1 )
01093 return nfound;
01094
01095
01096 if( (index = ps->msHash [ ( (*T)<<8 ) | *(T+1) ] ) == (HASH_TYPE)-1 )
01097 continue;
01098
01099
01100 ng = mwmGroupMatch2( ps, index,Tx, T, Tc, Tleft, data, match );
01101 if( ng < 0 )
01102 {
01103 ng = -ng;
01104 ng--;
01105 nfound += ng;
01106 return nfound;
01107 }
01108 else
01109 {
01110 nfound += ng;
01111 }
01112
01113
01114
01115 }
01116
01117 return nfound;
01118 }
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128 static
01129 int mwmSearchExBW( MWM_STRUCT *ps,
01130 unsigned char * Tx, int n, unsigned char * Tc,
01131 int(*match) ( void * id, int index,void* data ),
01132 void * data )
01133
01134 {
01135 int Tleft, index, nfound, tshift, ng;
01136 unsigned char *T, *Tend, *B;
01137
01138 nfound = 0;
01139
01140 Tleft = n;
01141 Tend = Tx + n;
01142
01143
01144 if( (unsigned)n < ps->msShiftLen )
01145 return 0;
01146
01147
01148 for( T = Tx, B = Tx + ps->msShiftLen - 1; B < Tend; T++, B++, Tleft-- )
01149 {
01150
01151 tshift = ps->msShift2[ ((*B)<<8) | *(B-1) ];
01152 while( tshift )
01153 {
01154 B += tshift; T += tshift; Tleft -= tshift;
01155 if( B >= Tend ) return nfound;
01156 tshift = ps->msShift2[ ((*B)<<8) | *(B-1) ];
01157 }
01158
01159
01160 if( Tleft == 1 ) return nfound;
01161
01162
01163
01164 if( (index = ps->msHash [ ( (*T)<<8 ) | *(T+1) ] ) == (HASH_TYPE)-1 )
01165 continue;
01166
01167
01168
01169 ng = mwmGroupMatch2( ps, index,Tx, T, Tc, Tleft, data, match );
01170 if( ng < 0 )
01171 {
01172 ng = -ng;
01173 ng--;
01174 nfound += ng;
01175 return nfound;
01176 }
01177 else
01178 {
01179 nfound += ng;
01180 }
01181
01182 }
01183
01184 return nfound;
01185 }
01186
01187
01188
01189 static void show_bytes(unsigned n, unsigned char *p)
01190 {
01191 int i;
01192 for(i=0;i<(int)n;i++)
01193 {
01194 if( p[i] >=32 && p[i]<=127 )printf("%c",p[i]);
01195 else printf("\\x%2.2X",p[i]);
01196 }
01197
01198 }
01199
01200
01201
01202
01203 void mwmGroupDetails( void * pv )
01204 {
01205 MWM_STRUCT * ps = (MWM_STRUCT*)pv;
01206 int index,i, m, gmax=0, total=0,gavg=0,subgroups;
01207 static int k=0;
01208 MWM_PATTERN_STRUCT *patrn, *patrnEnd;
01209
01210 printf("*** MWM-Pattern-Group: %d\n",k++);
01211
01212 subgroups=0;
01213 for(i=0;i<65536;i++)
01214 {
01215 if( (index = ps->msHash [i]) == (HASH_TYPE)-1 )
01216 continue;
01217
01218 patrn = &ps->msPatArray[index];
01219 patrnEnd = patrn + ps->msNumArray[index];
01220
01221 printf(" Sub-Pattern-Group: %d-%d\n",subgroups,i);
01222
01223 subgroups++;
01224
01225 for( m=0; patrn < patrnEnd; m++, patrn++ )
01226 {
01227 printf(" Pattern[%d] : ",m);
01228
01229 show_bytes(patrn->psLen,patrn->psPat);
01230
01231 printf("\n");
01232 }
01233
01234 if( m > gmax ) gmax = m;
01235
01236 total+=m;
01237
01238 gavg = total / subgroups;
01239 }
01240
01241 printf("Total Group Patterns : %d\n",total);
01242 printf(" Number of Sub-Groups : %d\n",subgroups);
01243 printf(" Sub-Group Max Patterns: %d\n",gmax);
01244 printf(" Sub-Group Avg Patterns: %d\n",gavg);
01245
01246 }
01247
01248
01249
01250
01251
01252
01253 int mwmPrepPatterns( void * pv )
01254 {
01255 MWM_STRUCT * ps = (MWM_STRUCT *) pv;
01256 int kk;
01257 MWM_PATTERN_STRUCT * plist;
01258
01259
01260 ps->msPatArray = (MWM_PATTERN_STRUCT*)calloc( sizeof(MWM_PATTERN_STRUCT), ps->msNumPatterns );
01261 if( !ps->msPatArray )
01262 {
01263 return -1;
01264 }
01265 ps->msNumArray = (unsigned short *)calloc( sizeof(short), ps->msNumPatterns );
01266 if( !ps->msNumArray )
01267 {
01268 return -1;
01269 }
01270
01271
01272 for( kk=0, plist = ps->plist; plist!=NULL && kk < ps->msNumPatterns; plist=plist->next )
01273 {
01274 memcpy( &ps->msPatArray[kk++], plist, sizeof(MWM_PATTERN_STRUCT) );
01275 }
01276
01277 mwmAnalyzePattens( ps );
01278
01279
01280 qsort( ps->msPatArray, ps->msNumPatterns, sizeof(MWM_PATTERN_STRUCT), sortcmp );
01281
01282
01283 mwmPrepHashedPatternGroups(ps);
01284
01285
01286 if( ps->msNumPatterns < 5 )
01287 {
01288 ps->msMethod = MTH_BM;
01289 }
01290 else
01291 {
01292 ps->msMethod = MTH_MWM;
01293 }
01294
01295
01296
01297 if( ps->msMethod == MTH_MWM )
01298 {
01299
01300 mwmPrepBadCharTable(ps);
01301
01302
01303 if( (ps->msShiftLen > 1) && ps->msLargeShifts )
01304 {
01305 mbmPrepBadWordTable( ps );
01306 }
01307
01308
01309 if( ps->msShiftLen == 1 )
01310 {
01311 ps->search = mwmSearchExNoBC;
01312 }
01313
01314 else if( (ps->msShiftLen > 1) && !ps->msLargeShifts )
01315 {
01316 ps->search = mwmSearchExBC;
01317 }
01318
01319 else if( (ps->msShiftLen > 1) && ps->msLargeShifts && ps->msShift2 )
01320 {
01321 ps->search = mwmSearchExBW;
01322 }
01323
01324 else
01325 {
01326 ps->search = mwmSearchExBC;
01327 }
01328 #ifdef XXXX
01329
01330
01331 #endif
01332 }
01333
01334
01335 if( ps->msMethod == MTH_BM )
01336 {
01337 int i;
01338
01339
01340 for(i=0;i<ps->msNumPatterns;i++)
01341 {
01342 ps->msPatArray[ i ].psBmh = hbm_prep( ps->msPatArray[ i ].psPat, ps->msPatArray[ i ].psLen );
01343 }
01344 }
01345
01346 return 0;
01347 }
01348
01349
01350
01351
01352 int mwmSearch( void * pv,
01353 unsigned char * T, int n,
01354 int(*match)( void * id, int index, void * data ),
01355 void * data )
01356 {
01357
01358 MWM_STRUCT * ps = (MWM_STRUCT*)pv;
01359 unsigned char *s;
01360 iPatCount += n;
01361
01362
01363 ConvCaseToUpperEx( S, T, n );
01364
01365
01366 if( ps->msMethod == MTH_BM )
01367 {
01368
01369
01370 int i,nfound=0;
01371 unsigned char * Tx = NULL;
01372
01373 for( i=0; i<ps->msNumPatterns; i++ )
01374 {
01375 s = &S[0];
01376 do
01377 {
01378 Tx = hbm_match( ps->msPatArray[i].psBmh, s, n );
01379
01380 if( Tx )
01381 {
01382
01383 if( !ps->msPatArray[i].psNoCase )
01384 {
01385 if( memcmp(ps->msPatArray[i].psPatCase,&T[Tx-S],ps->msPatArray[i].psLen) )
01386 {
01387 if (++Tx < s + n)
01388 {
01389 n--;
01390 s++;
01391 }
01392 else
01393 {
01394 n = 0;
01395 }
01396 continue;
01397
01398 }
01399 }
01400
01401 nfound++;
01402
01403 if( match(ps->msPatArray[i].psID, (int)(Tx-S),data) )
01404 return nfound;
01405 }
01406
01407 break;
01408 } while (n >= 0);
01409 }
01410
01411 return nfound;
01412
01413 }
01414 else
01415 {
01416
01417
01418 return ps->search( ps, S, n, T, match, data );
01419 }
01420 }
01421
01422
01423
01424
01425
01426 void mwmFeatures(void)
01427 {
01428 printf("%s\n",MWM_FEATURES);
01429 }
01430
01431 #ifdef MWM_MAIN
01432 int FatalError( char * s )
01433 {
01434 printf("FatalError: %s\n",s);
01435 exit(0);
01436 }
01437
01438
01439
01440 char * patArray[10000];
01441
01442
01443
01444
01445 static int match ( void* id, int index, void * data )
01446 {
01447 printf(" pattern matched: index= %d, id=%d, %s \n", index, id, patArray[(int)id] );
01448 return 0;
01449 }
01450
01451
01452
01453
01454 typedef struct
01455 {
01456 unsigned char * b;
01457 int blen;
01458 }BINARY;
01459
01460
01461
01462 int gethex( int c )
01463 {
01464
01465 if( c >= 'A' && c <= 'F' ) return c -'A' + 10;
01466 if( c >= 'a' && c <= 'f' ) return c -'a' + 10;
01467 if( c >= '0' && c <= '9' ) return c -'0';
01468
01469 return 0;
01470 }
01471
01472
01473 BINARY * converthexbytes( unsigned char * s)
01474 {
01475 int val, k=0, m;
01476 BINARY * p;
01477 int len = strlen(s);
01478
01479 printf("--input hex: %s\n",s);
01480
01481 p = malloc( sizeof(BINARY) );
01482
01483 p->b = malloc( len / 2 );
01484 p->blen= len / 2;
01485
01486 while( *s )
01487 {
01488 val = gethex(*s);
01489 s++;
01490 val <<= 4;
01491
01492 if( !*s ) break;
01493
01494 val |= gethex(*s);
01495 s++;
01496
01497 p->b[k++] = val;
01498 }
01499
01500 if( k != p->blen )
01501 {
01502 printf("hex length mismatch\n");
01503 }
01504
01505 printf("--output hex[%d]: ",p->blen); for(m=0;m<p->blen;m++) printf("%2.2x", p->b[m]);
01506
01507 printf(" \n");
01508
01509 return p;
01510 }
01511
01512
01513
01514
01515 BINARY * syndata( int nbytes, int irand, int repchar )
01516 {
01517 BINARY * p =(BINARY*)malloc( sizeof(BINARY) );
01518 if( ! p ) return 0;
01519
01520 p->b = (unsigned char *)malloc( nbytes );
01521 if( ! p->b ) return 0;
01522 p->blen = nbytes;
01523
01524 if( irand )
01525 {
01526 int i;
01527
01528 srand( time(0) );
01529
01530 for(i=0;i<nbytes;i++)
01531 {
01532 p->b[i] = (unsigned)( rand() & 0xff );
01533 }
01534
01535 }
01536 else
01537 {
01538 memset(p->b,repchar,nbytes);
01539 }
01540 return p;
01541 }
01542
01543
01544 int randpat( unsigned char * s, int imin )
01545 {
01546 int i,len;
01547
01548 static int first=1;
01549
01550 if( first )
01551 {
01552 first=0;
01553 srand(time(0));
01554 }
01555
01556 while( 1 )
01557 {
01558 len = rand() & 0xf;
01559 if( len >= imin ) break;
01560 }
01561
01562
01563 for(i=0;i<len;i++)
01564 {
01565 s[i] = 'a' + ( rand() % 26 );
01566 }
01567
01568 s[len]=0;
01569
01570 printf("--%s\n",s);
01571 return len;
01572 }
01573
01574
01575
01576
01577 int CDECL main ( int argc, char ** argv )
01578 {
01579 unsigned char *T, *text;
01580 int n,textlen;
01581 int nmatches, i, bm = 0;
01582 MWM_STRUCT *ps;
01583 int npats=0, len, stat, nocase=0;
01584 BINARY *p;
01585 int irep=0,irand=0,isyn=0,nrep=1024,repchar=0;
01586
01587 if( argc < 5 )
01588 {
01589 printf("usage: %s [-rand|-rep -n bytes -c|ch repchar -pr npats minlen ] -t|th TextToSearch -nocase [-pr numpats] -p|ph pat1 -p|ph pat2 -p|ph pat3\n",argv[0]);
01590 exit(1);
01591 }
01592
01593
01594 ps = mwmNew();
01595
01596 for(i=1;i<argc;i++)
01597 {
01598 if( strcmp(argv[i],"-nocase")==0 )
01599 {
01600 nocase = 1;
01601 }
01602 if( strcmp(argv[i],"-p")==0 )
01603 {
01604 i++;
01605 npats++;
01606 patArray[npats] = argv[i];
01607 len = strlen( argv[i] );
01608
01609 mwmAddPatternEx( ps, (unsigned char*)argv[i], len, nocase, 0, 0, (void*)npats , 3000 );
01610 }
01611 if( strcmp(argv[i],"-ph")==0 )
01612 {
01613 i++;
01614 npats++;
01615 patArray[npats] = argv[i];
01616 p = converthexbytes( argv[i] );
01617
01618 mwmAddPatternEx( ps, p->b, p->blen, 1 , 0, 0, (void*)npats , 3000 );
01619 }
01620 if( strcmp(argv[i],"-pr")==0 )
01621 {
01622 int m = atoi( argv[++i] );
01623 int imin = atoi( argv[++i] );
01624 int k;
01625 npats = 0;
01626 for(k=0;k<m;k++)
01627 {
01628 unsigned char px[256];
01629 int len = randpat( px, imin );
01630 npats++;
01631 patArray[npats] = (unsigned char *)malloc( len+1 );
01632 sprintf(patArray[npats],"%s",px);
01633 mwmAddPatternEx( ps, px, len, 0 , 0, 0, (void*)npats , 3000 );
01634 }
01635 }
01636 if( strcmp(argv[i],"-rand")==0 )
01637 {
01638 irand = 1;
01639 isyn = 1;
01640 }
01641 if( strcmp(argv[i],"-rep")==0 )
01642 {
01643 irep = 1;
01644 isyn = 1;
01645 }
01646 if( strcmp(argv[i],"-n")==0 )
01647 {
01648 nrep = atoi( argv[++i] );
01649 isyn = 1;
01650 }
01651 if( strcmp(argv[i],"-c")==0 )
01652 {
01653 repchar = argv[++i][0];
01654 isyn = 1;
01655 }
01656 if( strcmp(argv[i],"-ch")==0 )
01657 {
01658 BINARY * px = converthexbytes( argv[++i] );
01659 repchar = px->b[0];
01660 isyn = 1;
01661 }
01662 if( strcmp(argv[i],"-t")==0 )
01663 {
01664 i++;
01665 text = argv[i];
01666 textlen=strlen(text);
01667 }
01668 if( strcmp(argv[i],"-th")==0 )
01669 {
01670 i++;
01671 p = converthexbytes( argv[i] );
01672 text = p->b;
01673 textlen = p->blen;
01674 }
01675 }
01676
01677
01678 if( isyn )
01679 {
01680
01681 p = syndata( nrep, irand, repchar );
01682 text = p->b;
01683 textlen = p->blen;
01684 }
01685
01686
01687 mwmPrepPatterns( ps );
01688
01689
01690
01691 stat = mwmSearch( (void*)ps, (unsigned char*)text, textlen, match, 0 );
01692
01693
01694 if( stat == 0 )
01695 {
01696 printf("no pattern matches\n");
01697 }else{
01698 printf("%d pattern matches in list\n",stat);
01699 }
01700
01701 return 0;
01702 }
01703 #endif
01704
01705