Mercurial > hg > Members > masakoha > testcode
annotate regex/main.cc @ 40:c25b75f764a7
add Regex.h
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 02 Mar 2015 22:18:37 +0900 |
parents | 120c8116e831 |
children | e1c5ecbf8836 |
rev | line source |
---|---|
38 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <unistd.h> | |
4 #include <math.h> | |
5 #include <string.h> | |
6 #include <sys/mman.h> | |
7 #include <sys/stat.h> | |
8 #include <sys/types.h> | |
9 #include <fcntl.h> | |
40
c25b75f764a7
add Regex.h
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
39
diff
changeset
|
10 #include "Regex.h" |
38 | 11 |
12 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n"; | |
13 | |
14 //Boyer Moore法に使用するテーブルを作成 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
15 int * |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
16 createBMskiptable(BMDataPtr bmdata) |
38 | 17 { |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
18 int searchWordLen = bmdata->searchWordLen; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
19 char *searchWord = bmdata->searchWord; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
20 int* skipTable = (int*)malloc(sizeof(int)*256); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
21 |
38 | 22 for(int i = 0; i < 256; ++i){ |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
23 bmdata->skipTable[i] = searchWordLen; |
38 | 24 } |
25 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
26 for(int j = 0; j < searchWordLen - 1; ++j){ |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
27 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1; |
38 | 28 } |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
29 return skipTable; |
38 | 30 } |
31 | |
32 //Boyer Moore法による文字列検索アルゴリズム | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
33 static void* BMmethod(BMDataPtr bmdata,ResultPtr result) |
38 | 34 { |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
35 int i = bmdata->searchWordLen - 1; |
38 | 36 int matchCounter = 0; |
37 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
38 while ( i < bmdata->readTextLen){ |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
39 int j = bmdata->searchWordLen - 1; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
40 while (bmdata->readText[i] == bmdata->searchWord[j]){ |
38 | 41 if (j == 0){ |
42 matchCounter++; | |
43 } | |
44 --i; | |
45 --j; | |
46 } | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
47 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j); |
38 | 48 } |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
49 |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
50 result->matchNum = matchCounter; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
51 return result; |
38 | 52 } |
53 | |
54 int main(int argc, char* argv[]) { | |
55 | |
56 char *filename = 0; | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
57 char *searchWord = 0; |
38 | 58 |
59 // check argument | |
60 for (int i = 0; argv[i]; i++) { | |
61 if (strcmp(argv[i], "-file") == 0) { | |
62 filename = (char*)argv[i+1]; i++; | |
63 }else if (strcmp(argv[i], "-sw") == 0) { | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
64 searchWord = (char*)argv[i+1]; i++; |
38 | 65 } |
66 } | |
67 | |
68 // prepare file read | |
69 if (filename == 0) { | |
70 puts(usr_help_str); | |
71 exit(1); | |
72 } | |
73 | |
74 struct stat sb; | |
75 long fd = 0; | |
76 char *textfile; | |
77 | |
78 if ((fd=open(filename,O_RDONLY,0666))==0) { | |
79 fprintf(stderr,"can't open %s\n",filename); | |
80 } | |
81 | |
82 if (fstat(fd,&sb)) { | |
83 fprintf(stderr,"can't fstat %s\n",filename); | |
84 } | |
85 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
86 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData)); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
87 |
38 | 88 textfile = (char*)malloc(sb.st_size); |
89 read(fd,textfile,sb.st_size); | |
90 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
91 bmdata->readText = textfile; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
92 bmdata->readTextLen = sb.st_size; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
93 bmdata->skipTable = (int*)malloc(sizeof(int)*256); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
94 |
38 | 95 if (textfile == (char*)-1) { |
96 fprintf(stderr,"Can't mmap file\n"); | |
97 perror(NULL); | |
98 exit(0); | |
99 } | |
100 | |
101 // prepare Boyer Moore Search | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
102 bmdata->searchWord = searchWord; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
103 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
104 bmdata->skipTable = createBMskiptable(bmdata); |
38 | 105 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
106 ResultPtr result = (ResultPtr)malloc(sizeof(Result)); |
38 | 107 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
108 BMmethod(bmdata,result); |
38 | 109 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
110 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
111 printf("Match : %d\n",result->matchNum); |
38 | 112 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
113 free(result); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
114 free(bmdata); |
38 | 115 return 0; |
116 } |