Mercurial > hg > Applications > Grep
annotate regex/bmsearch.cc @ 41:e1c5ecbf8836
add bmsearch.cc
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 02 Mar 2015 23:35:10 +0900 |
parents | |
children | ead0a307449e |
rev | line source |
---|---|
41
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 #include <stdlib.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 #include <unistd.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <math.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 #include <string.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 #include <sys/mman.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 #include <sys/stat.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 #include <sys/types.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 #include <fcntl.h> |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 #include "regex.h" |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 //Boyer Moore法に使用するテーブルを作成 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 int * |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 createBMskiptable(BMDataPtr bmdata) |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 { |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 int searchWordLen = bmdata->searchWordLen; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 char *searchWord = bmdata->searchWord; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 int* skipTable = (int*)malloc(sizeof(int)*256); |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 for(int i = 0; i < 256; ++i){ |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 bmdata->skipTable[i] = searchWordLen; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 for(int j = 0; j < searchWordLen - 1; ++j){ |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 return skipTable; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 //Boyer Moore法による文字列検索アルゴリズム |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 void* BMmethod(BMDataPtr bmdata,ResultPtr result) |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 { |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 int i = bmdata->searchWordLen - 1; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 int matchCounter = 0; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 while ( i < bmdata->readTextLen){ |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 int j = bmdata->searchWordLen - 1; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 while (bmdata->readText[i] == bmdata->searchWord[j]){ |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 if (j == 0){ |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 matchCounter++; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 --i; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 --j; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j); |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 result->matchNum = matchCounter; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 return result; |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 } |
e1c5ecbf8836
add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 |