Mercurial > hg > Members > masakoha > testcode
comparison regex/main.cc @ 39:120c8116e831
refactoring
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 02 Mar 2015 22:15:55 +0900 |
parents | d15b9d342421 |
children | c25b75f764a7 |
comparison
equal
deleted
inserted
replaced
38:d15b9d342421 | 39:120c8116e831 |
---|---|
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include <fcntl.h> | 9 #include <fcntl.h> |
10 | 10 |
11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n"; | 11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n"; |
12 | 12 |
13 typedef struct result { | |
14 int matchNum; | |
15 int matchLineNum; | |
16 char* matchLine; | |
17 } Result, *ResultPtr; | |
18 | |
19 typedef struct bmData { | |
20 int* skipTable; | |
21 char* readText; | |
22 int readTextLen; | |
23 char* searchWord; | |
24 int searchWordLen; | |
25 } BMData, *BMDataPtr; | |
26 | |
13 //Boyer Moore法に使用するテーブルを作成 | 27 //Boyer Moore法に使用するテーブルを作成 |
14 int* | 28 int * |
15 createBMskiptable(u_char *search_word,int search_word_len,int *skip_table) | 29 createBMskiptable(BMDataPtr bmdata) |
16 { | 30 { |
31 int searchWordLen = bmdata->searchWordLen; | |
32 char *searchWord = bmdata->searchWord; | |
33 int* skipTable = (int*)malloc(sizeof(int)*256); | |
34 | |
17 for(int i = 0; i < 256; ++i){ | 35 for(int i = 0; i < 256; ++i){ |
18 skip_table[i] = search_word_len; | 36 bmdata->skipTable[i] = searchWordLen; |
19 } | 37 } |
20 | 38 |
21 for(int j = 0; j < search_word_len - 1; ++j){ | 39 for(int j = 0; j < searchWordLen - 1; ++j){ |
22 skip_table[search_word[j]] = search_word_len - j - 1; | 40 bmdata->skipTable[(u_char)searchWord[j]] = searchWordLen - j - 1; |
23 } | 41 } |
24 return skip_table; | 42 return skipTable; |
25 } | 43 } |
26 | 44 |
27 //Boyer Moore法による文字列検索アルゴリズム | 45 //Boyer Moore法による文字列検索アルゴリズム |
28 static int BMmethod(u_char *text,int textLen, | 46 static void* BMmethod(BMDataPtr bmdata,ResultPtr result) |
29 u_char *pattern,int swLen,int *skipTable) | |
30 { | 47 { |
31 int i = swLen - 1; | 48 int i = bmdata->searchWordLen - 1; |
32 int matchCounter = 0; | 49 int matchCounter = 0; |
33 | 50 |
34 while ( i < textLen){ | 51 while ( i < bmdata->readTextLen){ |
35 int j = swLen - 1; | 52 int j = bmdata->searchWordLen - 1; |
36 while (text[i] == pattern[j]){ | 53 while (bmdata->readText[i] == bmdata->searchWord[j]){ |
37 if (j == 0){ | 54 if (j == 0){ |
38 matchCounter++; | 55 matchCounter++; |
39 } | 56 } |
40 --i; | 57 --i; |
41 --j; | 58 --j; |
42 } | 59 } |
43 i = i + fmax(skipTable[text[i]],swLen - j); | 60 i = i + fmax(bmdata->skipTable[(u_char)bmdata->readText[i]],bmdata->searchWordLen - j); |
44 } | 61 } |
45 return matchCounter; | 62 |
63 result->matchNum = matchCounter; | |
64 return result; | |
46 } | 65 } |
47 | 66 |
48 int main(int argc, char* argv[]) { | 67 int main(int argc, char* argv[]) { |
49 | 68 |
50 char *filename = 0; | 69 char *filename = 0; |
51 // "u_char" is "unsigned char" in sys/types.h | 70 char *searchWord = 0; |
52 u_char *searchWord = 0; | |
53 | 71 |
54 // check argument | 72 // check argument |
55 for (int i = 0; argv[i]; i++) { | 73 for (int i = 0; argv[i]; i++) { |
56 if (strcmp(argv[i], "-file") == 0) { | 74 if (strcmp(argv[i], "-file") == 0) { |
57 filename = (char*)argv[i+1]; i++; | 75 filename = (char*)argv[i+1]; i++; |
58 }else if (strcmp(argv[i], "-sw") == 0) { | 76 }else if (strcmp(argv[i], "-sw") == 0) { |
59 searchWord = (u_char*)argv[i+1]; i++; | 77 searchWord = (char*)argv[i+1]; i++; |
60 } | 78 } |
61 } | 79 } |
62 | 80 |
63 // prepare file read | 81 // prepare file read |
64 if (filename == 0) { | 82 if (filename == 0) { |
76 | 94 |
77 if (fstat(fd,&sb)) { | 95 if (fstat(fd,&sb)) { |
78 fprintf(stderr,"can't fstat %s\n",filename); | 96 fprintf(stderr,"can't fstat %s\n",filename); |
79 } | 97 } |
80 | 98 |
99 BMDataPtr bmdata = (BMDataPtr)malloc(sizeof(BMData)); | |
100 | |
81 textfile = (char*)malloc(sb.st_size); | 101 textfile = (char*)malloc(sb.st_size); |
82 read(fd,textfile,sb.st_size); | 102 read(fd,textfile,sb.st_size); |
103 | |
104 bmdata->readText = textfile; | |
105 bmdata->readTextLen = sb.st_size; | |
106 bmdata->skipTable = (int*)malloc(sizeof(int)*256); | |
83 | 107 |
84 if (textfile == (char*)-1) { | 108 if (textfile == (char*)-1) { |
85 fprintf(stderr,"Can't mmap file\n"); | 109 fprintf(stderr,"Can't mmap file\n"); |
86 perror(NULL); | 110 perror(NULL); |
87 exit(0); | 111 exit(0); |
88 } | 112 } |
89 | 113 |
90 // prepare Boyer Moore Search | 114 // prepare Boyer Moore Search |
115 bmdata->searchWord = searchWord; | |
116 bmdata->searchWordLen = strlen((const char*)bmdata->searchWord); | |
117 bmdata->skipTable = createBMskiptable(bmdata); | |
91 | 118 |
92 int *skipTable = (int*)malloc(256 * sizeof(int)); // 文字列に対応した table を用意 | 119 ResultPtr result = (ResultPtr)malloc(sizeof(Result)); |
93 int searchWordLen = strlen((const char*)searchWord); | |
94 int *BMskip_table = createBMskiptable(searchWord, searchWordLen, skipTable); | |
95 | 120 |
96 int matchNum = BMmethod((u_char*)textfile,sb.st_size,searchWord,searchWordLen,BMskip_table); | 121 BMmethod(bmdata,result); |
97 | 122 |
98 printf("sword: %s len: %d\n",searchWord,searchWordLen); | 123 printf("sword: %s len: %d\n",bmdata->searchWord,bmdata->searchWordLen); |
99 printf("Match : %d\n",matchNum); | 124 printf("Match : %d\n",result->matchNum); |
100 | 125 |
101 free(textfile); | 126 free(result); |
102 free(skipTable); | 127 free(bmdata); |
103 return 0; | 128 return 0; |
104 } | 129 } |