Mercurial > hg > Members > masakoha > masa

<!DOCTYPE html>

<!--
  Google HTML5 slide template

  Authors: Luke Mahé (code)
           Marcin Wichary (code and design)

           Dominic Mazzoni (browser compatibility)
           Charles Chen (ChromeVox support)

  URL: http://code.google.com/p/html5slides/
-->

<html>
  <head>
    <title>2013-06-18</title>

    <meta charset='utf-8'>
    <script
      src='http://html5slides.googlecode.com/svn/trunk/slides.js'></script>
  </head>

  <style>
    /* Your individual styles here, or just use inline styles if that’s
       what you want. */
  .slides article { background-image: none !important; background-color: white; }


  </style>

  <body style='display: none'>

    <section class='slides layout-regular template-default'>

      <!-- Your slides (<article>s) go here. Delete or comment out the
           slides below.-->

      <article>
        <h1>
          Ceriumによる
          <br>
          正規表現マッチャの実装
        </h1>
        <p>
          Masataka Kohagura
          <br>
          18th June , 2013
        </p>
      </article>

      <article>
        <h3>
        研究目的
        </h3>
        <p>
        本研究室では、Cell用に作られたCeriumにて並列プログラミングを行なっている。様々な例題を実装することにより、どのような問題でも並列処理ができることを証明する。
        </p>
        <p>
        現在は文字列サーチを実装している段階で、ボイヤームーア法を実装している。
        セミグループという、分割したファイルに対して並列処理をさせるような手法によって、既存の文字列サーチと処理速度を比較し、どれだけ速く、どれだけ効率よくなるのかを測定する。
        </p>
      </article>

      <article>
      <h3>
      今週までにしたこと
      </h3>
      <p>
      ・word_countのソース読み<br>
      (タスクが複数読み込まれた場合どうなるかを重点に)
      </p>
      <p>
      ・検索文字列中に割れたときの処理が正しく動くようにした。
      (ただし、タスクが複数存在するときのCPU数の問題は未解決)
      </p>
      <p>
      ・出力結果にpositionの追加
      </p>
      <p>
      ・Ceriumのバージョンを過去のものに戻して動作することを確認
      </p>
      </article>

      <article class='smaller'>
      <h3>実行結果</h3>
      <section><pre>
[Masa]~%  ./regex -file d.txt -cpu 2
in Exec.cc
in Exec.cc
task num : 2
position
2 a
192 a
388 a
390 a
16389 a
      </pre><section>
      <p>
      出力結果の数字はマッチしたキーワードの先頭ポジション、アルファベットはマッチした先頭の文字を出力させている。<br>
      </p>
      <p>
      out_dataを1つのタスク当たり256個(position 128個、先頭文字128個)出力している。(固定)
      </p>
      </article>

      <article class='smaller'>
        <h3>
        Print.cc
        </h3>
        <section>
        <pre>
static int
run_print(SchedTask *s, void *rbuf, void *wbuf)
{
    WordCount *w = *(WordCount**)rbuf;
    unsigned long long *idata = w->o_data;
    unsigned int idata_task_num = w->out_size * w->out_task_num;

    s->printf("task num : %d\n",w->task_spwaned);

    s->printf("position\n");
    for (int i = 0;i < idata_task_num ;i++) {

        if(idata[2*i] == 0x61){
            s->printf("%d ",(int)idata[2*i+1]);
            s->printf("%c\n",(unsigned char)idata[2*i]);
        }
    return 0;
}
        </pre>
      </article>

      <article class='smaller'>
      <h3>
      Exec.cc 一部
      </h3>
      <section>
      <pre>
int BM_method(unsigned char *text,int *offset,int text_length,
              unsigned char *pattern,unsigned long long *match_string)
{

    while ( i < text_len){
        int j = pattern_len - 1;
        while (text[i] == pattern[j]){
            if (j == 0){
                match_string[2*k] = text[i];
                int position = (long int)offset + i + 1;
                match_string[2*k+1] = position;

                k++;
            }
            --i;
            --j;
        }
        i = i + max((int)skip[(int)text[i]],pattern_len - j);
    }
    return 0;
}
      </pre>
      </section>
      </article>

      <article class='smaller'>
      <h3>
      Exec.cc 一部
      </h3>
      <section>
      <pre>
static int
run(SchedTask *s, void *rbuf, void *wbuf)
{
    unsigned char *i_data = (unsigned char *)rbuf;
    unsigned long long *o_data = (unsigned long long*)wbuf;
    int length = (int)s->get_inputSize(0);
    int *offset = (int*)s->get_param(1);
    unsigned char search_word[] = "aba";

    BM_method(i_data,offset,length,search_word,o_data);
    s->printf("in Exec.cc\n");


    return 0;
}
      </pre>
      </section>
      </article>
  </body>
</html>
author	Masataka Kohagura <e085726@ie.u-ryukyu.ac.jp>
date	Fri, 27 Dec 2013 19:04:32 +0900
parents	c9b2998eb516
children