Mercurial > hg > Papers > 2015 > yuhi-master
view paper/master_paper.aux @ 33:417431560eed
benchmark data parallel
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 15 Feb 2015 11:59:06 +0900 |
parents | 9b7ce931cad9 |
children | 7956856211c5 |
line wrap: on
line source
\relax \@writefile{toc}{\contentsline {chapter}{\numberline {第1章}マルチプラットフォームなフレームワークにおける並列プログラミング}{1}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {chapter}{\numberline {第2章}既存のマルチプラットフォームフレームワーク}{3}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {2.1}GPU/Cell の Architecture}{3}} \newlabel{sec:shared_memory}{{2.1}{3}} \@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces GPU Architecture}}{4}} \newlabel{fig:gpuarch}{{2.1}{4}} \@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces CPU Architecture}}{4}} \newlabel{fig:cpuarch}{{2.2}{4}} \@writefile{lof}{\contentsline {figure}{\numberline {2.3}{\ignorespaces Cell Architecture}}{5}} \newlabel{fig:cell_arch}{{2.3}{5}} \@writefile{toc}{\contentsline {section}{\numberline {2.2}OpenCL}{5}} \@writefile{toc}{\contentsline {section}{\numberline {2.3}CUDA}{6}} \@writefile{toc}{\contentsline {section}{\numberline {2.4}StarPU}{6}} \newlabel{src:codelet}{{2.1}{7}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {2.1}codeletの例}{7}} \@writefile{toc}{\contentsline {section}{\numberline {2.5}Cell Broadband Engine}{7}} \@writefile{toc}{\contentsline {chapter}{\numberline {第3章}並列プログラミングフレームワーク Cerium}{8}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {3.1}Cerium の概要}{8}} \@writefile{toc}{\contentsline {section}{\numberline {3.2}Cerium TaskManager}{8}} \newlabel{src:createTask}{{3.1}{8}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {3.1}Task の生成}{8}} \@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces Task 生成おける API}}{9}} \newlabel{table:task_create_api}{{3.1}{9}} \newlabel{src:task}{{3.2}{9}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {3.2}Task}{9}} \@writefile{lot}{\contentsline {table}{\numberline {3.2}{\ignorespaces Task 側で使用する API}}{9}} \newlabel{table:task_api}{{3.2}{9}} \@writefile{toc}{\contentsline {section}{\numberline {3.3}Cerium における Task}{10}} \@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces Task Manager}}{10}} \newlabel{fig:taskmanager}{{3.1}{10}} \@writefile{toc}{\contentsline {section}{\numberline {3.4}Task の Scheduling}{11}} \@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces Scheduler}}{11}} \newlabel{fig:scheduler}{{3.2}{11}} \@writefile{toc}{\contentsline {chapter}{\numberline {第4章}Ceriumを用いた例題}{12}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {4.1}Bitonic Sort}{12}} \@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Bitonic Sort の例}}{13}} \newlabel{fig:sort}{{4.1}{13}} \@writefile{toc}{\contentsline {section}{\numberline {4.2}Word Count}{14}} \@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces WordCountのフロー}}{15}} \newlabel{fig:wordcount}{{4.2}{15}} \@writefile{toc}{\contentsline {section}{\numberline {4.3}FFT}{16}} \@writefile{toc}{\contentsline {chapter}{\numberline {第5章}マルチコアへの対応}{17}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {5.1}マルチコア上での実行の機構}{17}} \@writefile{toc}{\contentsline {section}{\numberline {5.2}DMA}{17}} \@writefile{toc}{\contentsline {chapter}{\numberline {第6章}GPGPU への対応}{18}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {6.1}OpenCL および CUDA による実装}{18}} \@writefile{toc}{\contentsline {section}{\numberline {6.2}データ並列}{19}} \newlabel{src:multiply_opencl}{{6.1}{19}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.1}Multiply(OpenCL)}{19}} \newlabel{src:multiply_cuda}{{6.2}{19}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.2}Multiply(CUDA)}{19}} \newlabel{src:multiply_cpu}{{6.3}{20}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {6.3}Multiply(CPU)}{20}} \@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces データ並列実行時の index の割り当て}}{20}} \newlabel{table:dataparallel_index}{{6.1}{20}} \@writefile{toc}{\contentsline {chapter}{\numberline {第7章}並列処理向けI/O}{22}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {7.1}mmap}{22}} \@writefile{lof}{\contentsline {figure}{\numberline {7.1}{\ignorespaces mmap の Model}}{22}} \newlabel{fig:mmap}{{7.1}{22}} \@writefile{toc}{\contentsline {section}{\numberline {7.2}Blocked Read による I/O の並列化}{23}} \@writefile{lof}{\contentsline {figure}{\numberline {7.2}{\ignorespaces BlockedRead による WordCount}}{23}} \newlabel{fig:blockedread}{{7.2}{23}} \newlabel{src:blockedread_create}{{7.1}{24}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {7.1}BlockedRead を行う Task の生成}{24}} \newlabel{src:blockedread_task}{{7.2}{24}} \@writefile{lol}{\contentsline {lstlisting}{\numberline {7.2}BlockedRead Task}{24}} \@writefile{toc}{\contentsline {section}{\numberline {7.3}I/O 専用 Thread の実装}{25}} \@writefile{lof}{\contentsline {figure}{\numberline {7.3}{\ignorespaces BlockedRead と Task を同じ thread で動かした場合}}{25}} \newlabel{fig:spe_any_blockedread}{{7.3}{25}} \@writefile{lof}{\contentsline {figure}{\numberline {7.4}{\ignorespaces IO Thread による BlockedRead}}{25}} \newlabel{fig:iothread__blockedread}{{7.4}{25}} \@writefile{toc}{\contentsline {chapter}{\numberline {第8章}ベンチマーク}{26}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {8.1}実験環境}{26}} \@writefile{lot}{\contentsline {table}{\numberline {8.1}{\ignorespaces Ceriumを実行する実験環境1}}{26}} \newlabel{tab:firefly_spec}{{8.1}{26}} \@writefile{lot}{\contentsline {table}{\numberline {8.2}{\ignorespaces Ceriumを実行する実験環境2}}{26}} \newlabel{tab:dragonfly_spec}{{8.2}{26}} \@writefile{toc}{\contentsline {section}{\numberline {8.2}マルチコア}{27}} \@writefile{lof}{\contentsline {figure}{\numberline {8.1}{\ignorespaces マルチコア CPU における Sort}}{27}} \newlabel{fig:sort_on_multicore}{{8.1}{27}} \@writefile{lof}{\contentsline {figure}{\numberline {8.2}{\ignorespaces マルチコア CPU における WordCount}}{28}} \newlabel{fig:wordcount_on_multicore}{{8.2}{28}} \@writefile{lof}{\contentsline {figure}{\numberline {8.3}{\ignorespaces Word Count による prefetch機能のベンチマーク}}{28}} \newlabel{fig:prefetch_bench}{{8.3}{28}} \@writefile{toc}{\contentsline {section}{\numberline {8.3}GPGPU}{29}} \@writefile{lof}{\contentsline {figure}{\numberline {8.4}{\ignorespaces Word Count によるデータ並列実行のベンチマーク}}{29}} \newlabel{fig:dataparallel}{{8.4}{29}} \@writefile{lof}{\contentsline {figure}{\numberline {8.5}{\ignorespaces マルチコア CPU、OpenCL、CUDA における FFT}}{30}} \newlabel{fig:fft_bench}{{8.5}{30}} \@writefile{lof}{\contentsline {figure}{\numberline {8.6}{\ignorespaces MacPro 2013 における FFT}}{31}} \newlabel{fig:fft_bench_dragonfly}{{8.6}{31}} \@writefile{toc}{\contentsline {section}{\numberline {8.4}並列 I/O}{31}} \@writefile{toc}{\contentsline {chapter}{\numberline {第9章}既存のプログラミングフレームワークとの比較}{32}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \@writefile{toc}{\contentsline {section}{\numberline {9.1}OpenCL}{32}} \@writefile{lof}{\contentsline {figure}{\numberline {9.1}{\ignorespaces WorkItem ID}}{33}} \newlabel{fig:workitem_id}{{9.1}{33}} \@writefile{lot}{\contentsline {table}{\numberline {9.1}{\ignorespaces kernel で使用する ID 取得の API}}{33}} \newlabel{table:kernel_id_api}{{9.1}{33}} \@writefile{toc}{\contentsline {section}{\numberline {9.2}CUDA}{33}} \@writefile{lof}{\contentsline {figure}{\numberline {9.2}{\ignorespaces Calculate Index example}}{34}} \newlabel{fig:calculateIndex}{{9.2}{34}} \@writefile{toc}{\contentsline {section}{\numberline {9.3}StarPU}{34}} \@writefile{lof}{\contentsline {figure}{\numberline {9.3}{\ignorespaces StarPUにおけるデータ分割}}{35}} \newlabel{fig:data_partition}{{9.3}{35}} \@writefile{toc}{\contentsline {chapter}{\numberline {第10章}結論}{36}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} \newlabel{chapter:conclusion}{{10}{36}} \@writefile{toc}{\contentsline {section}{\numberline {10.1}まとめ}{36}} \@writefile{toc}{\contentsline {section}{\numberline {10.2}今後の課題}{36}} \citation{*} \bibstyle{junsrt} \bibdata{master_paper} \@writefile{toc}{\contentsline {chapter}{謝辞}{37}} \bibcite{nobuyasu:2013a}{1} \bibcite{shoshi:2011a}{2} \bibcite{shoshi:2011b}{3} \bibcite{cassandra}{4} \bibcite{bigtable}{5} \@writefile{toc}{\contentsline {chapter}{参考文献}{38}} \@writefile{toc}{\contentsline {chapter}{発表文献}{39}}