Mercurial > hg > Applications > Grep
annotate regex/main.cc @ 39:120c8116e831
refactoring
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 02 Mar 2015 22:15:55 +0900 |
parents | d15b9d342421 |
children | c25b75f764a7 |
rev | line source |
---|---|
38 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <unistd.h> | |
4 #include <math.h> | |
5 #include <string.h> | |
6 #include <sys/mman.h> | |
7 #include <sys/stat.h> | |
8 #include <sys/types.h> | |
9 #include <fcntl.h> | |
10 | |
11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n"; | |
12 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
13 typedef struct result { |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
14 int matchNum; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
15 int matchLineNum; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
16 char* matchLine; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
17 } Result, *ResultPtr; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
18 |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
19 typedef struct bmData { |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
20 int* skipTable; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
21 char* readText; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
22 int readTextLen; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
23 char* searchWord; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
24 int searchWordLen; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
25 } BMData, *BMDataPtr; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
26 |
38 | 27 //Boyer Moore法に使用するテーブルを作成 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
28 int * |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
29 createBMskiptable(BMDataPtr bmdata) |
38 | 30 { |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
31 int searchWordLen = bmdata->searchWordLen; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
32 char *searchWord = bmdata->searchWord; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
33 int* skipTable = (int*)malloc(sizeof(int)*256); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
34 |
38 | 35 for(int i = 0; i < 256; ++i){ |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
36 bmdata->skipTable[i] = searchWordLen; |
38 | 37 } |
38 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
39 for(int j = 0; j < searchWordLen - 1; ++j){ |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
40 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1; |
38 | 41 } |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
42 return skipTable; |
38 | 43 } |
44 | |
45 //Boyer Moore法による文字列検索アルゴリズム | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
46 static void* BMmethod(BMDataPtr bmdata,ResultPtr result) |
38 | 47 { |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
48 int i = bmdata->searchWordLen - 1; |
38 | 49 int matchCounter = 0; |
50 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
51 while ( i < bmdata->readTextLen){ |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
52 int j = bmdata->searchWordLen - 1; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
53 while (bmdata->readText[i] == bmdata->searchWord[j]){ |
38 | 54 if (j == 0){ |
55 matchCounter++; | |
56 } | |
57 --i; | |
58 --j; | |
59 } | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
60 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j); |
38 | 61 } |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
62 |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
63 result->matchNum = matchCounter; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
64 return result; |
38 | 65 } |
66 | |
67 int main(int argc, char* argv[]) { | |
68 | |
69 char *filename = 0; | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
70 char *searchWord = 0; |
38 | 71 |
72 // check argument | |
73 for (int i = 0; argv[i]; i++) { | |
74 if (strcmp(argv[i], "-file") == 0) { | |
75 filename = (char*)argv[i+1]; i++; | |
76 }else if (strcmp(argv[i], "-sw") == 0) { | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
77 searchWord = (char*)argv[i+1]; i++; |
38 | 78 } |
79 } | |
80 | |
81 // prepare file read | |
82 if (filename == 0) { | |
83 puts(usr_help_str); | |
84 exit(1); | |
85 } | |
86 | |
87 struct stat sb; | |
88 long fd = 0; | |
89 char *textfile; | |
90 | |
91 if ((fd=open(filename,O_RDONLY,0666))==0) { | |
92 fprintf(stderr,"can't open %s\n",filename); | |
93 } | |
94 | |
95 if (fstat(fd,&sb)) { | |
96 fprintf(stderr,"can't fstat %s\n",filename); | |
97 } | |
98 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
99 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData)); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
100 |
38 | 101 textfile = (char*)malloc(sb.st_size); |
102 read(fd,textfile,sb.st_size); | |
103 | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
104 bmdata->readText = textfile; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
105 bmdata->readTextLen = sb.st_size; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
106 bmdata->skipTable = (int*)malloc(sizeof(int)*256); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
107 |
38 | 108 if (textfile == (char*)-1) { |
109 fprintf(stderr,"Can't mmap file\n"); | |
110 perror(NULL); | |
111 exit(0); | |
112 } | |
113 | |
114 // prepare Boyer Moore Search | |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
115 bmdata->searchWord = searchWord; |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
116 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
117 bmdata->skipTable = createBMskiptable(bmdata); |
38 | 118 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
119 ResultPtr result = (ResultPtr)malloc(sizeof(Result)); |
38 | 120 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
121 BMmethod(bmdata,result); |
38 | 122 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
123 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
124 printf("Match : %d\n",result->matchNum); |
38 | 125 |
39
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
126 free(result); |
120c8116e831
refactoring
Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp>
parents:
38
diff
changeset
|
127 free(bmdata); |
38 | 128 return 0; |
129 } |