view regexParser/CeriumGrep.cc @ 314:a4484c02cba5

add wordMode in regexParser
author mir3636
date Sat, 07 May 2016 18:38:54 +0900
parents c9ac6f06e706
children a1b65d39b947
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "CeriumGrep.h"
#include "subsetConstruction.h"
#include "node.h"
#include "grepWalk.h"
#include "fileread.h"
#include "threadedSearch.h"
#include "generateSequentialSearch.h"

Search grep(int argc,char **argv,bool parallel)
{
    bool generate = true;
    bool subset = false;
    void  (*generateSequentialSearch)(TransitionGeneratorPtr) = NULL;
    bool ts = false;
    char *filename = NULL;
    Search s;
    s.filename = 0;
    s.tg = NULL;

    RegexInfo ri;
    ri.stateNumber = 1;
    ri.wordMode = true;
    for (int i = 1; i < argc; i++) {
        if (strcmp(argv[i],"-regex") == 0) {
            ri.ptr = (unsigned char*)argv[i+1]; i++;
        } else if (strcmp(argv[i],"-noGeneration") == 0) {
            generate = false;
        } else if (strcmp(argv[i],"-subset") == 0) {
            subset = true;
        } else if (strcmp(argv[i],"-seq") == 0) {
            generateSequentialSearch = exportState;
        } else if (strcmp(argv[i],"-loop") == 0) {
            generateSequentialSearch = exportStateLoop;
        } else if (strcmp(argv[i],"-cbc") == 0) {
            generateSequentialSearch = exportStateCbC;
        } else if (strcmp(argv[i],"-file") == 0) {
            s.filename = filename = argv[i+1]; i++;
        } else if (strcmp(argv[i],"-ts") == 0) {
            ts = true;
        } else if (strcmp(argv[i],"+word") == 0) {
            ri.wordMode = false;
        }
    }
    if (!ri.ptr) return s;

    printf("regex : %s\n",ri.ptr);
    NodePtr n = regex(&ri);   // parse only
    printTree(n);

    TGValue tgv;
    if (generate && !subset)  {  // NFA generation
        tgv = generateTransitionList(n);
        printTree(n);
        printState(tgv.tg);
        if (generateSequentialSearch) {
            exportState(tgv.tg);
        }
    } else if (subset)  {
        tgv = generateTransitionList(n);
        subsetConstruction(tgv.tg);   // Determinization
        printState(tgv.tg);
        if (generateSequentialSearch) {
            generateSequentialSearch(tgv.tg);
        }
    }

    if (filename != NULL && !parallel) {
        int fd = 0;
        st_mmap_t st_mmap = createSt_mmap(filename,fd);
        Buffer buff = createBuffer(st_mmap);
        if (ts) threadedSearch(tgv.tg,buff);
        else grepWalk(tgv.tg,buff);
        close(fd);
    }

    s.tg = tgv.tg;
    return s;
}