annotate regex/bmsearch.cc @ 41:e1c5ecbf8836

add bmsearch.cc
author Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
date Mon, 02 Mar 2015 23:35:10 +0900
parents
children ead0a307449e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
41
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include <stdio.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <stdlib.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 #include <unistd.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 #include <math.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <string.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 #include <sys/mman.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 #include <sys/stat.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include <sys/types.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 #include <fcntl.h>
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 #include "regex.h"
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 //Boyer Moore法に使用するテーブルを作成
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 int *
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 createBMskiptable(BMDataPtr bmdata)
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 {
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 int searchWordLen = bmdata->searchWordLen;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 char *searchWord = bmdata->searchWord;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 int* skipTable = (int*)malloc(sizeof(int)*256);
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 for(int i = 0; i < 256; ++i){
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 bmdata->skipTable[i] = searchWordLen;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 for(int j = 0; j < searchWordLen - 1; ++j){
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 return skipTable;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 //Boyer Moore法による文字列検索アルゴリズム
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 void* BMmethod(BMDataPtr bmdata,ResultPtr result)
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 {
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 int i = bmdata->searchWordLen - 1;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 int matchCounter = 0;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 while ( i < bmdata->readTextLen){
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 int j = bmdata->searchWordLen - 1;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 while (bmdata->readText[i] == bmdata->searchWord[j]){
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 if (j == 0){
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 matchCounter++;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 --i;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 --j;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j);
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 result->matchNum = matchCounter;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 return result;
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 }
e1c5ecbf8836 add bmsearch.cc
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51