Mercurial > hg > Members > masakoha > testcode
comparison regex/main.cc @ 38:d15b9d342421
add regex
author | Masataka Kohagura <kohagura@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 01 Mar 2015 14:10:25 +0900 |
parents | |
children | 120c8116e831 |
comparison
equal
deleted
inserted
replaced
37:0433a15ea8d2 | 38:d15b9d342421 |
---|---|
1 #include <stdio.h> | |
2 #include <stdlib.h> | |
3 #include <unistd.h> | |
4 #include <math.h> | |
5 #include <string.h> | |
6 #include <sys/mman.h> | |
7 #include <sys/stat.h> | |
8 #include <sys/types.h> | |
9 #include <fcntl.h> | |
10 | |
11 const char *usr_help_str = "Usage: ./regex [-file filename] [-sw SearchWord]\n"; | |
12 | |
13 //Boyer Moore法に使用するテーブルを作成 | |
14 int* | |
15 createBMskiptable(u_char *search_word,int search_word_len,int *skip_table) | |
16 { | |
17 for(int i = 0; i < 256; ++i){ | |
18 skip_table[i] = search_word_len; | |
19 } | |
20 | |
21 for(int j = 0; j < search_word_len - 1; ++j){ | |
22 skip_table[search_word[j]] = search_word_len - j - 1; | |
23 } | |
24 return skip_table; | |
25 } | |
26 | |
27 //Boyer Moore法による文字列検索アルゴリズム | |
28 static int BMmethod(u_char *text,int textLen, | |
29 u_char *pattern,int swLen,int *skipTable) | |
30 { | |
31 int i = swLen - 1; | |
32 int matchCounter = 0; | |
33 | |
34 while ( i < textLen){ | |
35 int j = swLen - 1; | |
36 while (text[i] == pattern[j]){ | |
37 if (j == 0){ | |
38 matchCounter++; | |
39 } | |
40 --i; | |
41 --j; | |
42 } | |
43 i = i + fmax(skipTable[text[i]],swLen - j); | |
44 } | |
45 return matchCounter; | |
46 } | |
47 | |
48 int main(int argc, char* argv[]) { | |
49 | |
50 char *filename = 0; | |
51 // "u_char" is "unsigned char" in sys/types.h | |
52 u_char *searchWord = 0; | |
53 | |
54 // check argument | |
55 for (int i = 0; argv[i]; i++) { | |
56 if (strcmp(argv[i], "-file") == 0) { | |
57 filename = (char*)argv[i+1]; i++; | |
58 }else if (strcmp(argv[i], "-sw") == 0) { | |
59 searchWord = (u_char*)argv[i+1]; i++; | |
60 } | |
61 } | |
62 | |
63 // prepare file read | |
64 if (filename == 0) { | |
65 puts(usr_help_str); | |
66 exit(1); | |
67 } | |
68 | |
69 struct stat sb; | |
70 long fd = 0; | |
71 char *textfile; | |
72 | |
73 if ((fd=open(filename,O_RDONLY,0666))==0) { | |
74 fprintf(stderr,"can't open %s\n",filename); | |
75 } | |
76 | |
77 if (fstat(fd,&sb)) { | |
78 fprintf(stderr,"can't fstat %s\n",filename); | |
79 } | |
80 | |
81 textfile = (char*)malloc(sb.st_size); | |
82 read(fd,textfile,sb.st_size); | |
83 | |
84 if (textfile == (char*)-1) { | |
85 fprintf(stderr,"Can't mmap file\n"); | |
86 perror(NULL); | |
87 exit(0); | |
88 } | |
89 | |
90 // prepare Boyer Moore Search | |
91 | |
92 int *skipTable = (int*)malloc(256 * sizeof(int)); // 文字列に対応した table を用意 | |
93 int searchWordLen = strlen((const char*)searchWord); | |
94 int *BMskip_table = createBMskiptable(searchWord, searchWordLen, skipTable); | |
95 | |
96 int matchNum = BMmethod((u_char*)textfile,sb.st_size,searchWord,searchWordLen,BMskip_table); | |
97 | |
98 printf("sword: %s len: %d\n",searchWord,searchWordLen); | |
99 printf("Match : %d\n",matchNum); | |
100 | |
101 free(textfile); | |
102 free(skipTable); | |
103 return 0; | |
104 } |