Mercurial > hg > Members > masakoha > testcode
changeset 266:e51cac73e42a
CeriumGrep start
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/CeriumGrep.cc Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,78 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "CeriumGrep.h" +#include "subsetConstruction.h" +#include "node.h" +#include "grepWalk.h" +#include "fileread.h" +#include "threadedSearch.h" + +Search grep(int argc,char **argv,bool parallel) +{ + bool generate = true; + bool subset = false; + bool generateSequentialSearch = false; + bool ts = false; + char *filename; + Search s; + s.filename = ""; + s.tg = NULL; + + RegexInfo ri; + ri.stateNumber = 1; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i],"-regex") == 0) { + ri.ptr = (unsigned char*)argv[i+1]; i++; + } else if (strcmp(argv[i],"-noGeneration") == 0) { + generate = false; + } else if (strcmp(argv[i],"-subset") == 0) { + subset = true; + } else if (strcmp(argv[i],"-seq") == 0) { + generateSequentialSearch = true; + } else if (strcmp(argv[i],"-file") == 0) { + s.filename = filename = argv[i+1]; i++; + } else if (strcmp(argv[i],"-ts") == 0) { + ts = true; + } + } + if (!ri.ptr) return s; + + printf("regex : %s\n",ri.ptr); + NodePtr n = regex(&ri); // parse only + printTree(n); + + TGValue tgv; + if (generate && !subset) { // NFA generation + tgv = generateTransitionList(n); + printTree(n); + printState(tgv.tg); + if (generateSequentialSearch) { + exportState(tgv.tg); + } + } else if (subset) { + tgv = generateTransitionList(n); + subsetConstruction(tgv.tg); // Determinization + printState(tgv.tg); + if (generateSequentialSearch) { + exportState(tgv.tg); + } + } + + if (filename != NULL && !parallel) { + int fd = 0; + st_mmap_t st_mmap = createSt_mmap(filename,fd); + Buffer buff = createBuffer(st_mmap); + if (ts) threadedSearch(tgv.tg,buff); + else grepWalk(tgv.tg,buff); + close(fd); + } + + if (parallel) { + createAllPossibleState(tgv.tg); + } + + s.tg = tgv.tg; + return s; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/CeriumGrep.h Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,2 @@ +#include "regexParser.h" +extern Search grep(int argc,char **argv,bool parallel);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/CeriumMain.cc Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,49 @@ +#include <stdio.h> +#include <sys/time.h> +#include "TaskManager.h" +#include "SchedTask.h" +#include "Func.h" +#include "FileMapReduce.h" +#include "CeriumGrep.h" + +extern void task_init(); +void TMend(TaskManager *); +static double st_time; +static double ed_time; +const char* usr_help_str = ""; + +static double +getTime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + +int +TMmain(TaskManager *manager, int argc, char *argv[]) +{ + char *filename = 0; + Search s = grep(argc,argv,true); + + FileMapReduce *fmp = new FileMapReduce(manager,TASK_EXEC,TASK_EXEC_DATA_PARALLEL,TASK_PRINT); + filename = fmp->init(argc, argv); + fmp->w->global = (void*)s.tg; + if (filename < 0) { + return -1; + } + fmp->division_out_size = sizeof(void*)*3; // *Result,*blockBegin,*blockEnd + task_init(); + st_time = getTime(); + fmp->run_start(manager, filename); + manager->set_TMend(TMend); + return 0; +} + +void +TMend(TaskManager *manager) +{ + ed_time = getTime(); + printf("Time: %0.6f\n",ed_time-st_time); +} + +/* end */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Func.h Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,6 @@ +enum { +#include "SysTasks.h" + TASK_EXEC, + TASK_EXEC_DATA_PARALLEL, + TASK_PRINT, +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,40 @@ +default: macosx + +macosx: FORCE + @echo "Make for Mac OS X" + @$(MAKE) -f Makefile.macosx + +linux: FORCE + @echo "Make for Linux" + @$(MAKE) -f Makefile.linux + +cell: FORCE + @echo "Make for CELL (Cell)" + @$(MAKE) -f Makefile.cell + +gpu: FORCE + @echo "Make for OpenCL" + @$(MAKE) -f Makefile.gpu + +cuda: FORCE + @echo "Make for Cuda" + @$(MAKE) -f Makefile.cuda + +test: + ./word_count -file c.txt + +parallel-test: macosx + @$(MAKE) -f Makefile.macosx test + +gpu-test: FORCE + @echo "Make for OpenCL" + @$(MAKE) -f Makefile.gpu test + + +FORCE: + +clean: + @$(MAKE) -f Makefile.macosx clean + @$(MAKE) -f Makefile.linux clean + @$(MAKE) -f Makefile.cell clean + @$(MAKE) -f Makefile.cuda clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile.cell Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,39 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_DIR = ppe +TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) +TASK_SRCS_EXCLUDE = +TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +LIBS += -lCellManager -lspe2 -lpthread -Wl,--gc-sections + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) speobject + +$(TARGET): $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +speobject: + cd spe; $(MAKE) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo ppu-gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) + rm -f *~ \#* + rm -f ppe/*~ ppe/\#* + cd spe; $(MAKE) clean
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile.cuda Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,51 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_DIR = ppe +CUDA_TASK_DIR = cuda + +TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) +TASK_SRCS_EXCLUDE = +TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) $(wildcard $(CUDA_TASK_DIR)/*.cc) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +CUDA_SRCS_TMP = $(wildcard $(CUDA_TASK_DIR)/*.cu) +CUDA_SRCS_EXCLUDE = # 除外するファイルを書く +CUDA_SRCS = $(filter-out $(CUDA_TASK_DIR)/$(CUDA_SRCS_EXCLUDE),$(CUDA_SRCS_TMP)) +CUDA_OBJS = $(CUDA_SRCS:.cu=.ptx) + +CFLAGS += -D__CERIUM_CUDA__ +LIBS += `sdl-config --libs` -lCudaManager -F/Library/Frameworks -framework CUDA + +INCLUDE += -I$(CUDA_PATH) + +NVCC = nvcc +NVCCFLAGS = -ptx -arch=sm_20 + +.SUFFIXES: .cc .o .cu .ptx + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +.cu.ptx: + $(NVCC) $(NVCCFLAGS) $< -o $@ + +all: $(TARGET) $(CUDA_OBJS) + +$(TARGET): $(OBJS) $(TASK_OBJS) $(CUDA_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo ppu-gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) $(CUDA_OBJS) + rm -f *~ \#* + rm -f cuda/*~ cuda/\#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile.def Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,18 @@ +TARGET = word_count + +# include/library path +# ex macosx +#CERIUM = /Users/gongo/Source/Cerium +ABIBIT=64 + +# ex linux/ps3 +CERIUM = ../../../Cerium + + +OPT = -g -O0 + +CC = clang++ +CFLAGS = -m64 -Wall $(OPT) + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I.. +LIBS = -L${CERIUM}/TaskManager
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile.gpu Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,45 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_DIR1 = ppe +TASK_DIR2 = gpu +TASK_SRCS_TMP = $(wildcard $(TASK_DIR2)/*.cc $(TASK_DIR1)/*.cc) +TASK_SRCS_EXCLUDE = # Exec.cc +TASK_SRCS = $(filter-out $(TASK_DIR1)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +CC += $(ABI) +CFLAGS += -D__CERIUM_GPU__ + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I.. +LIBS = -L${CERIUM}/TaskManager -DUSE_SIMPLE_TASK -lGpuManager -framework opencl `sdl-config --libs` + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) + +$(TARGET): $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo lldb -- ./$(TARGET) -file c.txt -gpu -g + +test : + ./$(TARGET) -file c.txt -gpu -g + +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) + rm -f *~ \#* + rm -f ppe/*~ ppe/\#* + rm -f spe/*~ spe/\#* + rm -f gpu/*~ gpu/\#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile.linux Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,36 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_DIR = ppe +TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) +TASK_SRCS_EXCLUDE = +TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +LIBS += -lFifoManager -lrt + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) + +$(TARGET): $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) + rm -f *~ \#* + rm -f ppe/*~ ppe/\#* + rm -f spe/*~ spe/\#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/Makefile.macosx Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,41 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_DIR = ppe +TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) +TASK_SRCS_EXCLUDE = +TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +LIBS += -lFifoManager `sdl-config --libs` +CC += -m$(ABIBIT) -g + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) + +$(TARGET): $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +test: + ./$(TARGET) -file c.txt -cpu 1 + ./$(TARGET) -file c.txt -cpu 4 + ./$(TARGET) -file c.txt -cpu 4 -i +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) + rm -f *~ \#* + rm -f ppe/*~ ppe/\#* + rm -f spe/*~ spe/\#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/ppe/Exec.cc Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,87 @@ +#include <stdio.h> +#include <string.h> +#include "Exec.h" +#include "Func.h" + +/* これは必須 */ +SchedDefineTask1(Exec,blockedGrep); + +TSValue stateNothing(TSValue tsv) { + return tsv; +} + +TSValue stateSkip(TSValue tsv) { + tsv.buff.matchBegin = tsv.buff.buffptr; + return tsv; +} + +TSValue stateMatch(TSValue tsv) { + ResultPtr r = NEW(Result); + r->begin = tsv.buff.matchBegin; + r->end = tsv.buff.buffptr; + *tsv.resultEnd = r; + r->next = NULL; + tsv.resultEnd = &r->next; + tsv.current = tsv.tg->stateList->tState; + tsv.buff.buffptr--; + tsv = stateSkip(tsv); + return tsv; +} + +TSValue threadedSearch(TransitionGeneratorPtr tg,Buffer buff) { + TSValue tsv; + tsv.buff = buff; + tsv.tg = tg; + tsv.result = NULL; + tsv.resultEnd = &tsv.result; + tsv.current = generateTState(tg->anyState); + tsv.tg->stateSkip = stateSkip; + tsv.tg->stateMatch = stateMatch; + tsv.tg->stateNothing = stateNothing; + unsigned char *end = buff.buffend; + buff.buffend = buff.buff+1; + tSearch(tsv); + tsv.blockBegin = tsv.current; + buff.buffend = end; + tSearch(tsv); + tsv.blockEnd = tsv.current; + if (tsv.blockEnd->bi.bitContainer != 1) { + ResultPtr r = NEW(Result); + r->begin = tsv.buff.matchBegin; + r->end = NULL; + *tsv.resultEnd = r; + r->next = NULL; + tsv.resultEnd = &r->next; + } + return tsv; +} + +static int +blockedGrep(SchedTask *s, void *rbuf, void *wbuf) +{ + long task_spwaned = (long)s->get_param(0); + long division_size = (long)s->get_param(1); + long length = (long)s->get_param(2); + long out_size = (long)s->get_param(3); + MapReduce *w = (MapReduce*)s->get_param(4); + long allocation = task_spwaned + (long)s->x; + char* i_data; + unsigned long long* o_data; + if (division_size) { + i_data = (char*)s->get_input(rbuf,0) + allocation*division_size; + o_data = (unsigned long long*)s->get_output(wbuf,1) + allocation*out_size; + } else { + i_data = (char*)s->get_input(0); + o_data = (unsigned long long*)s->get_output(0); + } + TransitionGeneratorPtr tg = (TransitionGeneratorPtr)w->global; + StatePtr startState = tg->anyState; + Buffer buff; + buff.buff = buff.buffptr = buff.matchBegin = i_data; + buff.buffend = buff.buff + division_size; + TSValue tsv = threadedSearch(tg,buff); + o_data[0] = (void*)tsv.result; + o_data[1] = (void*)tsv.blockBegin; + o_data[2] = (void*)tsv.blockEnd; + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/ppe/Exec.h Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,9 @@ +#ifndef INCLUDED_TASK_HELLO +#define INCLUDED_TASK_HELLO + +#ifndef INCLUDED_SCHED_TASK +# include "SchedTask.h" +#endif + + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/ppe/Print.cc Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,42 @@ +#include <stdio.h> +#include <string.h> +#include "Print.h" +#include "Func.h" +#include "FileMapReduce.h" + +#define STATUS_NUM 2 +/* これは必須 */ +SchedDefineTask1(Print,run_print); + +static int +run_print(SchedTask *s, void *rbuf, void *wbuf) +{ + MapReduce *w = (MapReduce*)s->get_input(0); + + unsigned char* buff = (unsigned char*)w->i_data; + int out_size = w->division_out_size / sizeof(unsigned long long); + ResultPtr prev = NULL; + for (int i = 0; i < out_task_num ; i++) { + ResultPtr *idata = (ResultPtr*)w->o_data[i*3+0]; + StatePtr *blockEnd = (StatePtr)w->o_data[i*3+2]; + StatePtr *blockBegin = (StatePtr)w->o_data[i*3+4]; // next Block's blockBegin. + ResultPtr r = idata[i*out_size]; + if (prev && i != out_task_num-1) { + // 最後のブロックでなく、前の blockEnd が state 1 でない場合) + if (blockBegin->bi.bitContainer & blockEnd->bi.bitContainer) { + // 前のブロックの matchBegin から最初 result の end までがマッチ + fwrite(prev->result,r->end - prev->begin,1,stdout); + } + r = r->next; + } + for (;r;r = r->next) { + if (r->end == NULL) { + prev = r; + break; + } + fwrite(r->begin,r->end - r->begin,1,stdout); + puts("\n"); + } + } + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/ppe/Print.h Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,9 @@ +#ifndef INCLUDED_TASK_PRINT +#define INCLUDED_TASK_PRINT + +#ifndef INCLUDED_SCHED_TASK +# include "SchedTask.h" +#endif + + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regexParser/cerium/task_init.cc Thu Jan 28 21:14:34 2016 +0900 @@ -0,0 +1,35 @@ +#include "Func.h" +#include "Scheduler.h" +#ifdef __CERIUM_GPU__ +#include "GpuScheduler.h" +#endif +#ifdef __CERIUM_CUDA__ +#include "CudaScheduler.h" +#endif + +/* 必ずこの位置に書いて */ +SchedExternTask(Exec); +SchedExternTask(Exec_Data_Parallel); +SchedExternTask(Print); + +/** + * この関数は ../spe/spe-main と違って + * 自分で呼び出せばいい関数なので + * 好きな関数名でおk (SchedRegisterTask は必須) + */ +void +task_init(void) +{ +#ifdef __CERIUM_GPU__ + GpuSchedRegister(TASK_EXEC, "gpu/Exec.cl", "wordcount"); + GpuSchedRegister(TASK_EXEC_DATA_PARALLEL, "gpu/Exec_Data_Parallel.cl","wordcount_parallel"); +#endif +#ifdef __CERIUM_CUDA__ + CudaSchedRegister(TASK_EXEC, "cuda/Exec.ptx", "wordcount"); + CudaSchedRegister(TASK_EXEC_DATA_PARALLEL, "cuda/Exec_Data_Parallel.ptx","wordcount_parallel"); +#endif + + SchedRegisterTask(TASK_EXEC, Exec); + SchedRegisterTask(TASK_EXEC_DATA_PARALLEL, Exec); + SchedRegisterTask(TASK_PRINT, Print); +}
--- a/regexParser/grepWalk.cc Wed Jan 27 18:00:25 2016 +0900 +++ b/regexParser/grepWalk.cc Thu Jan 28 21:14:34 2016 +0900 @@ -8,7 +8,7 @@ void grepSkip(TransitionGeneratorPtr tg,Buffer buff); void grepMatch(TransitionGeneratorPtr tg,Buffer buff) { - fwrite(buff.matchBegin,buff.buffptr-buff.matchBegin,1,stdout); + fwrite(buff.matchBegin,buff.buffptr-buff.matchBegin-1,1,stdout); puts("\n"); grepSkip(tg,buff); }
--- a/regexParser/main.cc Wed Jan 27 18:00:25 2016 +0900 +++ b/regexParser/main.cc Thu Jan 28 21:14:34 2016 +0900 @@ -1,69 +1,7 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include "regexParser.h" -#include "subsetConstruction.h" -#include "node.h" -#include "grepWalk.h" -#include "fileread.h" -#include "threadedSearch.h" +#include "CeriumGrep.h" int main(int argc, char **argv) { - bool generate = true; - bool subset = false; - bool generateSequentialSearch = false; - bool ts = false; - char *filename; - - RegexInfo ri; - ri.stateNumber = 1; - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i],"-regex") == 0) { - ri.ptr = (unsigned char*)argv[i+1]; i++; - } else if (strcmp(argv[i],"-noGeneration") == 0) { - generate = false; - } else if (strcmp(argv[i],"-subset") == 0) { - subset = true; - } else if (strcmp(argv[i],"-seq") == 0) { - generateSequentialSearch = true; - } else if (strcmp(argv[i],"-file") == 0) { - filename = argv[i+1]; i++; - } else if (strcmp(argv[i],"-ts") == 0) { - ts = true; - } - } - if (!ri.ptr) return 0; - - printf("regex : %s\n",ri.ptr); - NodePtr n = regex(&ri); // parse only - printTree(n); - - TGValue tgv; - if (generate && !subset) { // NFA generation - tgv = generateTransitionList(n); - printTree(n); - printState(tgv.tg); - if (generateSequentialSearch) { - exportState(tgv.tg); - } - } else if (subset) { - tgv = generateTransitionList(n); - subsetConstruction(tgv.tg); // Determinization - printState(tgv.tg); - if (generateSequentialSearch) { - exportState(tgv.tg); - } - } - - if (filename != NULL) { - int fd = 0; - st_mmap_t st_mmap = createSt_mmap(filename,fd); - Buffer buff = createBuffer(st_mmap); - if (ts) threadedSearch(tgv.tg,buff); - else grepWalk(tgv.tg,buff); - close(fd); - } + Search s = grep(argc,argv,false); return 0; }
--- a/regexParser/regexParser.h Wed Jan 27 18:00:25 2016 +0900 +++ b/regexParser/regexParser.h Thu Jan 28 21:14:34 2016 +0900 @@ -67,8 +67,8 @@ } TState, *TStatePtr; typedef struct result { - unsigned char begin; - unsigned char end; + unsigned char *begin; + unsigned char *end; struct result *next; } Result, *ResultPtr; @@ -94,6 +94,10 @@ StatePtr stateEnd; StatePtr *stateArray; StatePtr stateList; + StatePtr anyState; + TSValue stateSkip(TSValue tsv); + TSValue stateMatch(TSValue tsv); + TSValue stateNothing(TSValue tsv); } TransitionGenerator, *TransitionGeneratorPtr; typedef struct buffer { @@ -106,6 +110,7 @@ typedef struct tsValue { Buffer buff; ResultPtr result; + ResultPtr resultEnd; TransitionGeneratorPtr tg; TState *current; TState *blockBegin; @@ -149,6 +154,11 @@ unsigned char* file_mmap; off_t size; } st_mmap_t; + +typedef struct search { + TransitionGeneratorPtr tg; + const char* filename; +} Search, *SearchPtr; #endif extern NodePtr createNode(RegexInfoPtr ri,unsigned char type,CharClassPtr cc, NodePtr left, NodePtr right);
--- a/regexParser/subsetConstruction.cc Wed Jan 27 18:00:25 2016 +0900 +++ b/regexParser/subsetConstruction.cc Thu Jan 28 21:14:34 2016 +0900 @@ -355,6 +355,9 @@ tgv.tg->stateArray = (StatePtr*)calloc(bi.bitContainer*2,sizeof(StatePtr*)); tgv.tg->stateArray[startState->bitState.bitContainer] = startState; tgv.tg->stateArray[endState->bitState.bitContainer] = endState; + BitVector anyBi; + anyBi.bitContainer = bi.bitContainer * 2 - 1; // all bit 1 state + tgv.tg->anyState = createState(tgv,anyBi); tgv.startState = startState; tgv.endState = endState; tgv = generateTransition(n,tgv,2);
--- a/regexParser/threadedSearch.cc Wed Jan 27 18:00:25 2016 +0900 +++ b/regexParser/threadedSearch.cc Thu Jan 28 21:14:34 2016 +0900 @@ -25,7 +25,7 @@ return tsv; } -TStatePtr generateTState(StatePtr state) { +TStatePtr generateTState(StatePtr state, TransitionGeneratorPtr tg) { TStatePtr tState = NEW(TState); tState->state = state; state->tState = tState; @@ -53,11 +53,11 @@ } free(ccw); if (state->accept) { - tState->stateSkip = stateMatch; - tState->stateContinue = stateNothing; + tState->stateSkip = tg->stateMatch; + tState->stateContinue = tg->stateNothing; } else { - tState->stateSkip = stateSkip; - tState->stateContinue = stateNothing; + tState->stateSkip = tg->stateSkip; + tState->stateContinue = tg->stateNothing; } return tState; } @@ -106,6 +106,10 @@ TSValue tsv; tsv.buff = buff; tsv.tg = tg; + tsv.result = NULL; tsv.current = generateTState(tg->stateList); + tsv.tg->stateSkip = stateSkip; + tsv.tg->stateMatch = stateMatch; + tsv.tg->stateNothing = stateNothing; tSearch(tsv); }