Mercurial > hg > Papers > 2015 > yuhi-master
changeset 33:417431560eed
benchmark data parallel
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 15 Feb 2015 11:59:06 +0900 |
parents | 9b7ce931cad9 |
children | 9b181351ca4d |
files | paper/chapter1.tex paper/chapter4.tex paper/figures/GPU/.DS_Store paper/figures/GPU/fft/bar_plot.sh paper/graffle/wordcount.graffle paper/images/cell_arch.xbb paper/images/cpu_arch.pdf paper/images/cpu_arch.xbb paper/images/wordcount.pdf paper/master_paper.aux paper/master_paper.dvi paper/master_paper.lof paper/master_paper.log paper/master_paper.lot paper/master_paper.pdf paper/master_paper.toc |
diffstat | 16 files changed, 314 insertions(+), 492 deletions(-) [+] |
line wrap: on
line diff
--- a/paper/chapter1.tex Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/chapter1.tex Sun Feb 15 11:59:06 2015 +0900 @@ -3,7 +3,7 @@ マルチプラットフォームにはマルチコア CPU 、 GPU や Cell といったヘテロジニアスマルチコアのような 様々な構成がある。 -\section{GPU の Architecture} +\section{GPU/Cell の Architecture} \label{sec:shared_memory} 本研究では、 CPU の他に GPU 上でのプログラミング (GPGPU) にも対応する。 @@ -39,6 +39,23 @@ \caption{CPU Architecture} \label{fig:cpuarch} \end{figure} + +Shared Memory でないプロセッサとして、Cell が挙げられる。 + +Cell は1基の制御系プロセッサコア PPE ( PowerPC Processer Element ) と +8基の演算系プロセッサコア SPE ( Synergistic Processer Element ) で構成される。 +各プロセッサコアはEIB (Element Interconnect Bus ) と呼ばれる高速なバスで接続されている。 +また、 EIB はメインメモリや外部入出力デバイスと接続されており、 +各プロセッサコアは EIB を経由してデータアクセスを行う。 +PPE、SPE、メインメモリ、EIB の構成図を図:\ref{fig:cell_arch}に示す。 +\begin{figure}[htpb] + \begin{center} + \includegraphics[scale=0.8]{./images/cell_arch.pdf} + \end{center} + \caption{Cell Architecture} + \label{fig:cell_arch} +\end{figure} + \newpage %-------- % OpenCL @@ -126,3 +143,37 @@ }; \end{lstlisting} +\section{Cell Broadband Engine} +Cell Broadband Engine は、 +ソニー・コンピュータエンタテイメント、ソニー、IBM 、東芝によって開発されたプロセッサである。 +Cell は PPE と SPE によって構成されており、これらは OpenCL や CUDA で言うところの Device にあたる。 +PPE は Cell Broadband Engine のメインプロセッサで、 +複数の SPE をコアプロセッサとして使用できる汎用プロセッサである。 +メインメモリや外部デバイスへの入出力、SPE を制御する役割を担っている。 + +SPE は PPE によって制御される演算系のプロセッサである。 +\ref{sec:shared_memory}節でも述べた通り、 SPE からメインメモリへ直接アクセスすることはできず、 + DMA (Direct Memory Access) 転送によってアクセスを行う。 +DMA 転送とは CPU を介さずに周辺装置とメモリとの間でデータ転送を行う事で、 +SPE が持っているコントローラが DMA Controller と通信することで行われる。手順としては以下のようになる。 + +\begin{enumerate} + \item SPE で起動しているプログラムが、コントローラに対して DMA 転送命令を発行 + \item SPE の持つコントローラが DMA Controller を介して DMA 転送を開始。 + この間 SPE で起動しているプログラムは停止しない + \item 転送が開始したら、SPE プログラムが転送の完了を待つ + +\end{enumerate} + +Cell の PPE には主に2つの仕様がある。 + +OpenCL には主に2つの仕様がある。 + +\begin{itemize} +\item SPU 拡張 C/C++ +\item libSPE2 +\end{itemize} + +SPE 上で動作する、拡張された C/C++ の言語を用いる。 +通常の C/C++ 言語との違いは DMA 転送、SIMD 演算(加算、減算、乗算)に対応していることが挙げられる。 +一方で libSPE2 は PPE が SPE を制御するためのライブラリ群である。
--- a/paper/chapter4.tex Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/chapter4.tex Sun Feb 15 11:59:06 2015 +0900 @@ -21,3 +21,7 @@ しかしマルチコア CPU 上で実行する場合は各 CPU で同じメモリ空間を利用できる。 よって DMA 転送を用いていた部分をポインタ渡しを行うように修正し、 メモリに直接アクセスさせることで速度の向上が見込める。 + +更に、DMA には prefetch 機能がある。 +DMA の転送効率を向上させるため、 DMA で転送する送信データを予め取り込むことができる。 +Cerium では DMA によるprefetch、DMA を用いないポインタ渡し、更に明示的なコピーによるデータ転送をサポートする。
--- a/paper/figures/GPU/fft/bar_plot.sh Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/figures/GPU/fft/bar_plot.sh Sun Feb 15 11:59:06 2015 +0900 @@ -9,8 +9,8 @@ set terminal postscript eps color enhanced # "sample.eps"として出力 -set output "${fsname}.eps" -#set output "| epstopdf -f -o=${fsname}.pdf" +#set output "${fsname}.eps" +set output "| epstopdf -f -o=${fsname}.pdf" # x軸名を設定 set xlabel "Architecture"
--- a/paper/graffle/wordcount.graffle Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/graffle/wordcount.graffle Sun Feb 15 11:59:06 2015 +0900 @@ -46,7 +46,7 @@ <key>Creator</key> <string>yuhi</string> <key>DisplayScale</key> - <string>1 0/72 in = 1 0/72 in</string> + <string>1 0/72 in = 1.0000 in</string> <key>GraphDocumentVersion</key> <integer>8</integer> <key>GraphicsList</key> @@ -349,7 +349,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -393,7 +393,7 @@ <key>Text</key> <dict> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -437,7 +437,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -455,7 +455,7 @@ <key>Head</key> <dict> <key>ID</key> - <integer>78</integer> + <integer>74</integer> <key>Info</key> <integer>4</integer> </dict> @@ -464,158 +464,6 @@ <key>Points</key> <array> <string>{373, 225.49999999999994}</string> - <string>{457.35714285714289, 282.64285714285671}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - <key>Width</key> - <real>2</real> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>43</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>Head</key> - <dict> - <key>ID</key> - <integer>77</integer> - <key>Info</key> - <integer>4</integer> - </dict> - <key>ID</key> - <integer>85</integer> - <key>Points</key> - <array> - <string>{373, 157.49999999999994}</string> - <string>{457.35714285714289, 253.42857142857181}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - <key>Width</key> - <real>2</real> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>42</integer> - <key>Info</key> - <integer>3</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>Head</key> - <dict> - <key>ID</key> - <integer>76</integer> - <key>Info</key> - <integer>4</integer> - </dict> - <key>ID</key> - <integer>84</integer> - <key>Points</key> - <array> - <string>{373, 89.499999999999943}</string> - <string>{457.35714285714289, 224.21428571428569}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - <key>Width</key> - <real>2</real> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>4</integer> - <key>Info</key> - <integer>3</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>Head</key> - <dict> - <key>ID</key> - <integer>75</integer> - <key>Info</key> - <integer>4</integer> - </dict> - <key>ID</key> - <integer>83</integer> - <key>Points</key> - <array> - <string>{373, 293.49999999999994}</string> - <string>{457.35714285714289, 194.99999999999994}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - <key>Width</key> - <real>2</real> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>44</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>Head</key> - <dict> - <key>ID</key> - <integer>74</integer> - <key>Info</key> - <integer>4</integer> - </dict> - <key>ID</key> - <integer>82</integer> - <key>Points</key> - <array> - <string>{373, 225.49999999999994}</string> <string>{457.35714285714289, 165.78571428571419}</string> </array> <key>Style</key> @@ -645,11 +493,9 @@ <dict> <key>ID</key> <integer>73</integer> - <key>Info</key> - <integer>4</integer> </dict> <key>ID</key> - <integer>81</integer> + <integer>85</integer> <key>Points</key> <array> <string>{373, 157.49999999999994}</string> @@ -986,7 +832,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1034,7 +880,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1052,110 +898,6 @@ <key>Head</key> <dict> <key>ID</key> - <integer>43</integer> - </dict> - <key>ID</key> - <integer>51</integer> - <key>Points</key> - <array> - <string>{230.5, 323.99999999999994}</string> - <string>{319, 225.49999999999994}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>91</integer> - <key>Info</key> - <integer>3</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>ID</key> - <integer>50</integer> - <key>Points</key> - <array> - <string>{230.5, 280.99999999999994}</string> - <string>{315, 160.99999999999994}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>92</integer> - <key>Info</key> - <integer>3</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>Head</key> - <dict> - <key>ID</key> - <integer>4</integer> - <key>Info</key> - <integer>4</integer> - </dict> - <key>ID</key> - <integer>49</integer> - <key>Points</key> - <array> - <string>{230.5, 237.99999999999994}</string> - <string>{319, 89.499999999999943}</string> - </array> - <key>Style</key> - <dict> - <key>stroke</key> - <dict> - <key>HeadArrow</key> - <string>FilledArrow</string> - <key>Legacy</key> - <true/> - <key>TailArrow</key> - <string>0</string> - <key>Width</key> - <real>2</real> - </dict> - </dict> - <key>Tail</key> - <dict> - <key>ID</key> - <integer>93</integer> - <key>Info</key> - <integer>3</integer> - </dict> - </dict> - <dict> - <key>Class</key> - <string>LineGraphic</string> - <key>Head</key> - <dict> - <key>ID</key> <integer>44</integer> <key>Info</key> <integer>4</integer> @@ -1334,7 +1076,7 @@ <key>Text</key> <dict> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1374,7 +1116,7 @@ <key>Text</key> <dict> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1414,7 +1156,7 @@ <key>Text</key> <dict> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1454,7 +1196,7 @@ <key>Text</key> <dict> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1543,7 +1285,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1798,7 +1540,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -1929,7 +1671,7 @@ <key>Pad</key> <integer>0</integer> <key>Text</key> - <string>{\rtf1\ansi\ansicpg1252\cocoartf1343\cocoasubrtf160 + <string>{\rtf1\ansi\ansicpg1252\cocoartf1344\cocoasubrtf720 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -2018,7 +1760,7 @@ <key>MasterSheets</key> <array/> <key>ModificationDate</key> - <string>2015-01-22 10:34:57 +0000</string> + <string>2015-02-14 06:55:45 +0000</string> <key>Modifier</key> <string>yuhi</string> <key>NotesVisible</key>
--- a/paper/images/cell_arch.xbb Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/images/cell_arch.xbb Sun Feb 15 11:59:06 2015 +0900 @@ -1,8 +1,8 @@ -%%Title: ./images/cell_arch.pdf +%%Title: ./cell_arch.pdf %%Creator: extractbb 20130405 %%BoundingBox: 0 0 273 289 %%HiResBoundingBox: 0.000000 0.000000 273.000000 289.000000 %%PDFVersion: 1.3 %%Pages: 1 -%%CreationDate: Tue Oct 22 15:47:16 2013 +%%CreationDate: Sat Feb 14 16:49:28 2015
--- a/paper/images/cpu_arch.xbb Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/images/cpu_arch.xbb Sun Feb 15 11:59:06 2015 +0900 @@ -1,8 +1,8 @@ -%%Title: ./images/cpu_arch.pdf +%%Title: ./cpu_arch.pdf %%Creator: extractbb 20130405 -%%BoundingBox: 0 0 273 225 -%%HiResBoundingBox: 0.000000 0.000000 273.000000 225.000000 +%%BoundingBox: 0 0 442 263 +%%HiResBoundingBox: 0.000000 0.000000 442.000000 263.000000 %%PDFVersion: 1.3 %%Pages: 1 -%%CreationDate: Tue Oct 22 15:47:17 2013 +%%CreationDate: Sat Feb 14 17:20:46 2015
--- a/paper/master_paper.aux Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/master_paper.aux Sun Feb 15 11:59:06 2015 +0900 @@ -5,129 +5,136 @@ \@writefile{toc}{\contentsline {chapter}{\numberline {第2章}既存のマルチプラットフォームフレームワーク}{3}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {2.1}GPU の Architecture}{3}} +\@writefile{toc}{\contentsline {section}{\numberline {2.1}GPU/Cell の Architecture}{3}} \newlabel{sec:shared_memory}{{2.1}{3}} \@writefile{lof}{\contentsline {figure}{\numberline {2.1}{\ignorespaces GPU Architecture}}{4}} \newlabel{fig:gpuarch}{{2.1}{4}} \@writefile{lof}{\contentsline {figure}{\numberline {2.2}{\ignorespaces CPU Architecture}}{4}} \newlabel{fig:cpuarch}{{2.2}{4}} +\@writefile{lof}{\contentsline {figure}{\numberline {2.3}{\ignorespaces Cell Architecture}}{5}} +\newlabel{fig:cell_arch}{{2.3}{5}} \@writefile{toc}{\contentsline {section}{\numberline {2.2}OpenCL}{5}} -\@writefile{toc}{\contentsline {section}{\numberline {2.3}CUDA}{5}} +\@writefile{toc}{\contentsline {section}{\numberline {2.3}CUDA}{6}} \@writefile{toc}{\contentsline {section}{\numberline {2.4}StarPU}{6}} -\newlabel{src:codelet}{{2.1}{6}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {2.1}codeletの例}{6}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第3章}並列プログラミングフレームワーク Cerium}{7}} -\@writefile{lof}{\addvspace {10\p@ }} -\@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {3.1}Cerium の概要}{7}} -\@writefile{toc}{\contentsline {section}{\numberline {3.2}Cerium TaskManager}{7}} -\newlabel{src:createTask}{{3.1}{7}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.1}Task の生成}{7}} -\@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces Task 生成おける API}}{8}} -\newlabel{table:task_create_api}{{3.1}{8}} -\newlabel{src:task}{{3.2}{8}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.2}Task}{8}} -\@writefile{lot}{\contentsline {table}{\numberline {3.2}{\ignorespaces Task 側で使用する API}}{8}} -\newlabel{table:task_api}{{3.2}{8}} -\@writefile{toc}{\contentsline {section}{\numberline {3.3}Cerium における Task}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces Task Manager}}{9}} -\newlabel{fig:taskmanager}{{3.1}{9}} -\@writefile{toc}{\contentsline {section}{\numberline {3.4}Task の Scheduling}{10}} -\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces Scheduler}}{10}} -\newlabel{fig:scheduler}{{3.2}{10}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第4章}Ceriumを用いた例題}{11}} +\newlabel{src:codelet}{{2.1}{7}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {2.1}codeletの例}{7}} +\@writefile{toc}{\contentsline {section}{\numberline {2.5}Cell Broadband Engine}{7}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第3章}並列プログラミングフレームワーク Cerium}{8}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {4.1}Bitonic Sort}{11}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Bitonic Sort の例}}{12}} -\newlabel{fig:sort}{{4.1}{12}} -\@writefile{toc}{\contentsline {section}{\numberline {4.2}Word Count}{13}} -\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces WordCountのフロー}}{14}} -\newlabel{fig:wordcount}{{4.2}{14}} -\@writefile{toc}{\contentsline {section}{\numberline {4.3}FFT}{15}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第5章}マルチコアへの対応}{16}} +\@writefile{toc}{\contentsline {section}{\numberline {3.1}Cerium の概要}{8}} +\@writefile{toc}{\contentsline {section}{\numberline {3.2}Cerium TaskManager}{8}} +\newlabel{src:createTask}{{3.1}{8}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.1}Task の生成}{8}} +\@writefile{lot}{\contentsline {table}{\numberline {3.1}{\ignorespaces Task 生成おける API}}{9}} +\newlabel{table:task_create_api}{{3.1}{9}} +\newlabel{src:task}{{3.2}{9}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {3.2}Task}{9}} +\@writefile{lot}{\contentsline {table}{\numberline {3.2}{\ignorespaces Task 側で使用する API}}{9}} +\newlabel{table:task_api}{{3.2}{9}} +\@writefile{toc}{\contentsline {section}{\numberline {3.3}Cerium における Task}{10}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.1}{\ignorespaces Task Manager}}{10}} +\newlabel{fig:taskmanager}{{3.1}{10}} +\@writefile{toc}{\contentsline {section}{\numberline {3.4}Task の Scheduling}{11}} +\@writefile{lof}{\contentsline {figure}{\numberline {3.2}{\ignorespaces Scheduler}}{11}} +\newlabel{fig:scheduler}{{3.2}{11}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第4章}Ceriumを用いた例題}{12}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {5.1}マルチコア上での実行の機構}{16}} -\@writefile{toc}{\contentsline {section}{\numberline {5.2}DMA}{16}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第6章}GPGPU への対応}{17}} +\@writefile{toc}{\contentsline {section}{\numberline {4.1}Bitonic Sort}{12}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.1}{\ignorespaces Bitonic Sort の例}}{13}} +\newlabel{fig:sort}{{4.1}{13}} +\@writefile{toc}{\contentsline {section}{\numberline {4.2}Word Count}{14}} +\@writefile{lof}{\contentsline {figure}{\numberline {4.2}{\ignorespaces WordCountのフロー}}{15}} +\newlabel{fig:wordcount}{{4.2}{15}} +\@writefile{toc}{\contentsline {section}{\numberline {4.3}FFT}{16}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第5章}マルチコアへの対応}{17}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {6.1}OpenCL および CUDA による実装}{17}} -\@writefile{toc}{\contentsline {section}{\numberline {6.2}データ並列}{18}} -\newlabel{src:multiply_opencl}{{6.1}{18}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.1}Multiply(OpenCL)}{18}} -\newlabel{src:multiply_cuda}{{6.2}{18}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.2}Multiply(CUDA)}{18}} -\newlabel{src:multiply_cpu}{{6.3}{19}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.3}Multiply(CPU)}{19}} -\@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces データ並列実行時の index の割り当て}}{19}} -\newlabel{table:dataparallel_index}{{6.1}{19}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第7章}並列処理向けI/O}{21}} +\@writefile{toc}{\contentsline {section}{\numberline {5.1}マルチコア上での実行の機構}{17}} +\@writefile{toc}{\contentsline {section}{\numberline {5.2}DMA}{17}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第6章}GPGPU への対応}{18}} +\@writefile{lof}{\addvspace {10\p@ }} +\@writefile{lot}{\addvspace {10\p@ }} +\@writefile{toc}{\contentsline {section}{\numberline {6.1}OpenCL および CUDA による実装}{18}} +\@writefile{toc}{\contentsline {section}{\numberline {6.2}データ並列}{19}} +\newlabel{src:multiply_opencl}{{6.1}{19}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.1}Multiply(OpenCL)}{19}} +\newlabel{src:multiply_cuda}{{6.2}{19}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.2}Multiply(CUDA)}{19}} +\newlabel{src:multiply_cpu}{{6.3}{20}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {6.3}Multiply(CPU)}{20}} +\@writefile{lot}{\contentsline {table}{\numberline {6.1}{\ignorespaces データ並列実行時の index の割り当て}}{20}} +\newlabel{table:dataparallel_index}{{6.1}{20}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第7章}並列処理向けI/O}{22}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {7.1}mmap}{21}} -\@writefile{lof}{\contentsline {figure}{\numberline {7.1}{\ignorespaces mmap の Model}}{21}} -\newlabel{fig:mmap}{{7.1}{21}} -\@writefile{toc}{\contentsline {section}{\numberline {7.2}Blocked Read による I/O の並列化}{22}} -\@writefile{lof}{\contentsline {figure}{\numberline {7.2}{\ignorespaces BlockedRead による WordCount}}{22}} -\newlabel{fig:blockedread}{{7.2}{22}} -\newlabel{src:blockedread_create}{{7.1}{23}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {7.1}BlockedRead を行う Task の生成}{23}} -\newlabel{src:blockedread_task}{{7.2}{23}} -\@writefile{lol}{\contentsline {lstlisting}{\numberline {7.2}BlockedRead Task}{23}} -\@writefile{toc}{\contentsline {section}{\numberline {7.3}I/O 専用 Thread の実装}{24}} -\@writefile{lof}{\contentsline {figure}{\numberline {7.3}{\ignorespaces BlockedRead と Task を同じ thread で動かした場合}}{24}} -\newlabel{fig:spe_any_blockedread}{{7.3}{24}} -\@writefile{lof}{\contentsline {figure}{\numberline {7.4}{\ignorespaces IO Thread による BlockedRead}}{24}} -\newlabel{fig:iothread__blockedread}{{7.4}{24}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第8章}ベンチマーク}{25}} +\@writefile{toc}{\contentsline {section}{\numberline {7.1}mmap}{22}} +\@writefile{lof}{\contentsline {figure}{\numberline {7.1}{\ignorespaces mmap の Model}}{22}} +\newlabel{fig:mmap}{{7.1}{22}} +\@writefile{toc}{\contentsline {section}{\numberline {7.2}Blocked Read による I/O の並列化}{23}} +\@writefile{lof}{\contentsline {figure}{\numberline {7.2}{\ignorespaces BlockedRead による WordCount}}{23}} +\newlabel{fig:blockedread}{{7.2}{23}} +\newlabel{src:blockedread_create}{{7.1}{24}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {7.1}BlockedRead を行う Task の生成}{24}} +\newlabel{src:blockedread_task}{{7.2}{24}} +\@writefile{lol}{\contentsline {lstlisting}{\numberline {7.2}BlockedRead Task}{24}} +\@writefile{toc}{\contentsline {section}{\numberline {7.3}I/O 専用 Thread の実装}{25}} +\@writefile{lof}{\contentsline {figure}{\numberline {7.3}{\ignorespaces BlockedRead と Task を同じ thread で動かした場合}}{25}} +\newlabel{fig:spe_any_blockedread}{{7.3}{25}} +\@writefile{lof}{\contentsline {figure}{\numberline {7.4}{\ignorespaces IO Thread による BlockedRead}}{25}} +\newlabel{fig:iothread__blockedread}{{7.4}{25}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第8章}ベンチマーク}{26}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {8.1}実験環境}{25}} -\@writefile{lot}{\contentsline {table}{\numberline {8.1}{\ignorespaces Ceriumを実行する実験環境1}}{25}} -\newlabel{tab:firefly_spec}{{8.1}{25}} -\@writefile{lot}{\contentsline {table}{\numberline {8.2}{\ignorespaces Ceriumを実行する実験環境2}}{25}} -\newlabel{tab:dragonfly_spec}{{8.2}{25}} -\@writefile{toc}{\contentsline {section}{\numberline {8.2}マルチコア}{26}} -\@writefile{lof}{\contentsline {figure}{\numberline {8.1}{\ignorespaces マルチコア CPU における Sort}}{26}} -\newlabel{fig:sort_on_multicore}{{8.1}{26}} -\@writefile{lof}{\contentsline {figure}{\numberline {8.2}{\ignorespaces マルチコア CPU における WordCount}}{26}} -\newlabel{fig:wordcount_on_multicore}{{8.2}{26}} -\@writefile{toc}{\contentsline {section}{\numberline {8.3}GPGPU}{27}} -\@writefile{lof}{\contentsline {figure}{\numberline {8.3}{\ignorespaces マルチコア CPU、OpenCL、CUDA における FFT}}{27}} -\newlabel{fig:fft_bench}{{8.3}{27}} -\@writefile{lof}{\contentsline {figure}{\numberline {8.4}{\ignorespaces MacPro 2013 Late Model における FFT}}{28}} -\newlabel{fig:fft_bench_dragonfly}{{8.4}{28}} -\@writefile{toc}{\contentsline {section}{\numberline {8.4}並列 I/O}{28}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第9章}既存のプログラミングフレームワークとの比較}{29}} +\@writefile{toc}{\contentsline {section}{\numberline {8.1}実験環境}{26}} +\@writefile{lot}{\contentsline {table}{\numberline {8.1}{\ignorespaces Ceriumを実行する実験環境1}}{26}} +\newlabel{tab:firefly_spec}{{8.1}{26}} +\@writefile{lot}{\contentsline {table}{\numberline {8.2}{\ignorespaces Ceriumを実行する実験環境2}}{26}} +\newlabel{tab:dragonfly_spec}{{8.2}{26}} +\@writefile{toc}{\contentsline {section}{\numberline {8.2}マルチコア}{27}} +\@writefile{lof}{\contentsline {figure}{\numberline {8.1}{\ignorespaces マルチコア CPU における Sort}}{27}} +\newlabel{fig:sort_on_multicore}{{8.1}{27}} +\@writefile{lof}{\contentsline {figure}{\numberline {8.2}{\ignorespaces マルチコア CPU における WordCount}}{28}} +\newlabel{fig:wordcount_on_multicore}{{8.2}{28}} +\@writefile{lof}{\contentsline {figure}{\numberline {8.3}{\ignorespaces Word Count による prefetch機能のベンチマーク}}{28}} +\newlabel{fig:prefetch_bench}{{8.3}{28}} +\@writefile{toc}{\contentsline {section}{\numberline {8.3}GPGPU}{29}} +\@writefile{lof}{\contentsline {figure}{\numberline {8.4}{\ignorespaces Word Count によるデータ並列実行のベンチマーク}}{29}} +\newlabel{fig:dataparallel}{{8.4}{29}} +\@writefile{lof}{\contentsline {figure}{\numberline {8.5}{\ignorespaces マルチコア CPU、OpenCL、CUDA における FFT}}{30}} +\newlabel{fig:fft_bench}{{8.5}{30}} +\@writefile{lof}{\contentsline {figure}{\numberline {8.6}{\ignorespaces MacPro 2013 における FFT}}{31}} +\newlabel{fig:fft_bench_dragonfly}{{8.6}{31}} +\@writefile{toc}{\contentsline {section}{\numberline {8.4}並列 I/O}{31}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第9章}既存のプログラミングフレームワークとの比較}{32}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\@writefile{toc}{\contentsline {section}{\numberline {9.1}OpenCL}{29}} -\@writefile{lof}{\contentsline {figure}{\numberline {9.1}{\ignorespaces WorkItem ID}}{30}} -\newlabel{fig:workitem_id}{{9.1}{30}} -\@writefile{lot}{\contentsline {table}{\numberline {9.1}{\ignorespaces kernel で使用する ID 取得の API}}{30}} -\newlabel{table:kernel_id_api}{{9.1}{30}} -\@writefile{toc}{\contentsline {section}{\numberline {9.2}CUDA}{30}} -\@writefile{lof}{\contentsline {figure}{\numberline {9.2}{\ignorespaces Calculate Index example}}{31}} -\newlabel{fig:calculateIndex}{{9.2}{31}} -\@writefile{toc}{\contentsline {section}{\numberline {9.3}StarPU}{31}} -\@writefile{lof}{\contentsline {figure}{\numberline {9.3}{\ignorespaces StarPUにおけるデータ分割}}{32}} -\newlabel{fig:data_partition}{{9.3}{32}} -\@writefile{toc}{\contentsline {chapter}{\numberline {第10章}結論}{33}} +\@writefile{toc}{\contentsline {section}{\numberline {9.1}OpenCL}{32}} +\@writefile{lof}{\contentsline {figure}{\numberline {9.1}{\ignorespaces WorkItem ID}}{33}} +\newlabel{fig:workitem_id}{{9.1}{33}} +\@writefile{lot}{\contentsline {table}{\numberline {9.1}{\ignorespaces kernel で使用する ID 取得の API}}{33}} +\newlabel{table:kernel_id_api}{{9.1}{33}} +\@writefile{toc}{\contentsline {section}{\numberline {9.2}CUDA}{33}} +\@writefile{lof}{\contentsline {figure}{\numberline {9.2}{\ignorespaces Calculate Index example}}{34}} +\newlabel{fig:calculateIndex}{{9.2}{34}} +\@writefile{toc}{\contentsline {section}{\numberline {9.3}StarPU}{34}} +\@writefile{lof}{\contentsline {figure}{\numberline {9.3}{\ignorespaces StarPUにおけるデータ分割}}{35}} +\newlabel{fig:data_partition}{{9.3}{35}} +\@writefile{toc}{\contentsline {chapter}{\numberline {第10章}結論}{36}} \@writefile{lof}{\addvspace {10\p@ }} \@writefile{lot}{\addvspace {10\p@ }} -\newlabel{chapter:conclusion}{{10}{33}} -\@writefile{toc}{\contentsline {section}{\numberline {10.1}まとめ}{33}} -\@writefile{toc}{\contentsline {section}{\numberline {10.2}今後の課題}{33}} +\newlabel{chapter:conclusion}{{10}{36}} +\@writefile{toc}{\contentsline {section}{\numberline {10.1}まとめ}{36}} +\@writefile{toc}{\contentsline {section}{\numberline {10.2}今後の課題}{36}} \citation{*} \bibstyle{junsrt} \bibdata{master_paper} -\@writefile{toc}{\contentsline {chapter}{謝辞}{34}} +\@writefile{toc}{\contentsline {chapter}{謝辞}{37}} \bibcite{nobuyasu:2013a}{1} \bibcite{shoshi:2011a}{2} \bibcite{shoshi:2011b}{3} \bibcite{cassandra}{4} \bibcite{bigtable}{5} -\@writefile{toc}{\contentsline {chapter}{参考文献}{35}} -\@writefile{toc}{\contentsline {chapter}{発表文献}{36}} +\@writefile{toc}{\contentsline {chapter}{参考文献}{38}} +\@writefile{toc}{\contentsline {chapter}{発表文献}{39}}
--- a/paper/master_paper.lof Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/master_paper.lof Sun Feb 15 11:59:06 2015 +0900 @@ -2,26 +2,29 @@ \addvspace {10\p@ } \contentsline {figure}{\numberline {2.1}{\ignorespaces GPU Architecture}}{4} \contentsline {figure}{\numberline {2.2}{\ignorespaces CPU Architecture}}{4} +\contentsline {figure}{\numberline {2.3}{\ignorespaces Cell Architecture}}{5} \addvspace {10\p@ } -\contentsline {figure}{\numberline {3.1}{\ignorespaces Task Manager}}{9} -\contentsline {figure}{\numberline {3.2}{\ignorespaces Scheduler}}{10} +\contentsline {figure}{\numberline {3.1}{\ignorespaces Task Manager}}{10} +\contentsline {figure}{\numberline {3.2}{\ignorespaces Scheduler}}{11} \addvspace {10\p@ } -\contentsline {figure}{\numberline {4.1}{\ignorespaces Bitonic Sort の例}}{12} -\contentsline {figure}{\numberline {4.2}{\ignorespaces WordCountのフロー}}{14} +\contentsline {figure}{\numberline {4.1}{\ignorespaces Bitonic Sort の例}}{13} +\contentsline {figure}{\numberline {4.2}{\ignorespaces WordCountのフロー}}{15} \addvspace {10\p@ } \addvspace {10\p@ } \addvspace {10\p@ } -\contentsline {figure}{\numberline {7.1}{\ignorespaces mmap の Model}}{21} -\contentsline {figure}{\numberline {7.2}{\ignorespaces BlockedRead による WordCount}}{22} -\contentsline {figure}{\numberline {7.3}{\ignorespaces BlockedRead と Task を同じ thread で動かした場合}}{24} -\contentsline {figure}{\numberline {7.4}{\ignorespaces IO Thread による BlockedRead}}{24} +\contentsline {figure}{\numberline {7.1}{\ignorespaces mmap の Model}}{22} +\contentsline {figure}{\numberline {7.2}{\ignorespaces BlockedRead による WordCount}}{23} +\contentsline {figure}{\numberline {7.3}{\ignorespaces BlockedRead と Task を同じ thread で動かした場合}}{25} +\contentsline {figure}{\numberline {7.4}{\ignorespaces IO Thread による BlockedRead}}{25} \addvspace {10\p@ } -\contentsline {figure}{\numberline {8.1}{\ignorespaces マルチコア CPU における Sort}}{26} -\contentsline {figure}{\numberline {8.2}{\ignorespaces マルチコア CPU における WordCount}}{26} -\contentsline {figure}{\numberline {8.3}{\ignorespaces マルチコア CPU、OpenCL、CUDA における FFT}}{27} -\contentsline {figure}{\numberline {8.4}{\ignorespaces MacPro 2013 Late Model における FFT}}{28} +\contentsline {figure}{\numberline {8.1}{\ignorespaces マルチコア CPU における Sort}}{27} +\contentsline {figure}{\numberline {8.2}{\ignorespaces マルチコア CPU における WordCount}}{28} +\contentsline {figure}{\numberline {8.3}{\ignorespaces Word Count による prefetch機能のベンチマーク}}{28} +\contentsline {figure}{\numberline {8.4}{\ignorespaces Word Count によるデータ並列実行のベンチマーク}}{29} +\contentsline {figure}{\numberline {8.5}{\ignorespaces マルチコア CPU、OpenCL、CUDA における FFT}}{30} +\contentsline {figure}{\numberline {8.6}{\ignorespaces MacPro 2013 における FFT}}{31} \addvspace {10\p@ } -\contentsline {figure}{\numberline {9.1}{\ignorespaces WorkItem ID}}{30} -\contentsline {figure}{\numberline {9.2}{\ignorespaces Calculate Index example}}{31} -\contentsline {figure}{\numberline {9.3}{\ignorespaces StarPUにおけるデータ分割}}{32} +\contentsline {figure}{\numberline {9.1}{\ignorespaces WorkItem ID}}{33} +\contentsline {figure}{\numberline {9.2}{\ignorespaces Calculate Index example}}{34} +\contentsline {figure}{\numberline {9.3}{\ignorespaces StarPUにおけるデータ分割}}{35} \addvspace {10\p@ }
--- a/paper/master_paper.log Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/master_paper.log Sun Feb 15 11:59:06 2015 +0900 @@ -1,4 +1,4 @@ -This is e-pTeX, Version 3.1415926-p3.4-110825-2.6 (utf8.euc) (TeX Live 2013) (format=platex 2013.5.30) 14 FEB 2015 15:34 +This is e-pTeX, Version 3.1415926-p3.4-110825-2.6 (utf8.euc) (TeX Live 2013) (format=platex 2013.5.30) 15 FEB 2015 00:42 entering extended mode \write18 enabled. %&-line parsing enabled. @@ -272,28 +272,32 @@ File: ./images/gpu_arch.pdf Graphic file (type pdf) <./images/gpu_arch.pdf> File: ./images/cpu_arch.pdf Graphic file (type pdf) - <./images/cpu_arch.pdf> [3 + <./images/cpu_arch.pdf> +File: ./images/cell_arch.pdf Graphic file (type pdf) + <./images/cell_arch.pdf> +[3 ] File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> -[4] -LaTeX Font Info: Try loading font information for OMS+cmr on input line 65. - (/usr/local/share/texlive/2013/texmf-dist/tex/latex/base/omscmr.fd + <images/emblem-bitmap.eps> [4] +LaTeX Font Info: Try loading font information for OMS+cmr on input line 82. + +(/usr/local/share/texlive/2013/texmf-dist/tex/latex/base/omscmr.fd File: omscmr.fd 1999/05/25 v2.5h Standard LaTeX font definitions ) LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <12> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 65. +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 82. File: images/emblem-bitmap.eps Graphic file (type eps) <images/emblem-bitmap.eps> [5] LaTeX Font Info: Font shape `OMS/cmr/m/n' in size <10> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 122. -) (./chapter2.tex +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 139. +File: images/emblem-bitmap.eps Graphic file (type eps) + <images/emblem-bitmap.eps> [6]) (./chapter2.tex File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> -[6] -第 3 章(7ページ) + +<images/emblem-bitmap.eps> [7] +第 3 章(8ページ) LaTeX Font Warning: Font shape `JT1/mc/m/it' undefined (Font) using `JT1/mc/m/n' instead on input line 24. @@ -312,30 +316,30 @@ ) LaTeX Font Info: Font shape `OML/cmr/m/n' in size <10> not available (Font) Font shape `OML/cmm/m/it' tried instead on input line 25. - [7 + [8 ] File: images/emblem-bitmap.eps Graphic file (type eps) -<images/emblem-bitmap.eps> [8] +<images/emblem-bitmap.eps> [9] File: ./images/createTask.pdf Graphic file (type pdf) <./images/createTask.pdf> File: images/emblem-bitmap.eps Graphic file (type eps) -<images/emblem-bitmap.eps> [9] +<images/emblem-bitmap.eps> [10] File: ./images/scheduler.pdf Graphic file (type pdf) <./images/scheduler.pdf>) (./chapter3.tex File: images/emblem-bitmap.eps Graphic file (type eps) -<images/emblem-bitmap.eps> [10] -第 4 章(11ページ) -[11 +<images/emblem-bitmap.eps> [11] +第 4 章(12ページ) +[12 ] File: ./images/sort_benchmark.pdf Graphic file (type pdf) <./images/sort_benchmark.pdf> File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [12] + <images/emblem-bitmap.eps> [13] File: ./images/wordcount.pdf Graphic file (type pdf) <./images/wordcount.pdf> @@ -344,41 +348,41 @@ [] File: images/emblem-bitmap.eps Graphic file (type eps) -<images/emblem-bitmap.eps> [13] +<images/emblem-bitmap.eps> [14] File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [14]) + <images/emblem-bitmap.eps> [15]) (./chapter4.tex File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [15 + <images/emblem-bitmap.eps> [16 ] -第 5 章(16ページ) -) (./chapter5.tex [16 +第 5 章(17ページ) +) (./chapter5.tex [17 ] -第 6 章(17ページ) -[17 +第 6 章(18ページ) +[18 ] File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [18]) (./chapter6.tex + <images/emblem-bitmap.eps> [19]) (./chapter6.tex File: images/emblem-bitmap.eps Graphic file (type eps) -<images/emblem-bitmap.eps> [19] +<images/emblem-bitmap.eps> [20] File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [20] -第 7 章(21ページ) + <images/emblem-bitmap.eps> [21] +第 7 章(22ページ) File: ./images/mmap.pdf Graphic file (type pdf) -<./images/mmap.pdf> [21 +<./images/mmap.pdf> [22 ] File: ./images/blockedread.pdf Graphic file (type pdf) <./images/blockedread.pdf> File: images/emblem-bitmap.eps Graphic file (type eps) <images/emblem-bitmap.eps> -[22] +[23] File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [23] + <images/emblem-bitmap.eps> [24] File: ./images/speblockedread.pdf Graphic file (type pdf) <./images/speblockedread.pdf> Overfull \hbox (2.16278pt too wide) in paragraph at lines 118--119 @@ -386,7 +390,7 @@ [] -LaTeX Warning: Reference `fig:iothread_blockedread' on page 24 undefined on inp +LaTeX Warning: Reference `fig:iothread_blockedread' on page 25 undefined on inp ut line 128. File: ./images/iothread.pdf Graphic file (type pdf) @@ -397,34 +401,44 @@ ) (./chapter8.tex File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [24] -第 8 章(25ページ) + <images/emblem-bitmap.eps> [25] +第 8 章(26ページ) File: ./figures/multicore/sort.pdf Graphic file (type pdf) -<./figures/multicore/sort.pdf> [25 +<./figures/multicore/sort.pdf> [26 ] File: ./figures/multicore/word_count.pdf Graphic file (type pdf) <./figures/multicore/word_count.pdf> +File: ./figures/dma/dmabench.pdf Graphic file (type pdf) + +<./figures/dma/dmabench.pdf> +File: images/emblem-bitmap.eps Graphic file (type eps) + <images/emblem-bitmap.eps> [27] File: images/emblem-bitmap.eps Graphic file (type eps) -<images/emblem-bitmap.eps> [26] +<images/emblem-bitmap.eps> [28] +File: ./figures/GPU/wordcount_dataparallel.pdf Graphic file (type pdf) + <./figures/GPU/wordcount_dataparallel.pdf> +File: images/emblem-bitmap.eps Graphic file (type eps) + +<images/emblem-bitmap.eps> [29] File: ./figures/GPU/fft_firefly.pdf Graphic file (type pdf) <./figures/GPU/fft_firefly.pdf> File: ./figures/GPU/fft_dragonfly.pdf Graphic file (type pdf) <./figures/GPU/fft_dragonfly.pdf> File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [27]) + <images/emblem-bitmap.eps> [30]) (./chapter9.tex File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [28] -第 9 章(29ページ) + <images/emblem-bitmap.eps> [31] +第 9 章(32ページ) File: ./images/workitem.pdf Graphic file (type pdf) -<./images/workitem.pdf> [29 +<./images/workitem.pdf> [32 ] File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [30] + <images/emblem-bitmap.eps> [33] File: ./images/calculateIndex.pdf Graphic file (type pdf) <./images/calculateIndex.pdf> @@ -432,20 +446,20 @@ <./images/starpu_data_parallel.pdf>) (./conclusion.tex File: images/emblem-bitmap.eps Graphic file (type eps) - <images/emblem-bitmap.eps> [31] + <images/emblem-bitmap.eps> [34] File: images/emblem-bitmap.eps Graphic file (type eps) <images/emblem-bitmap.eps> -[32] -第 10 章(33ページ) -) (./thanx.tex [33 +[35] +第 10 章(36ページ) +) (./thanx.tex [36 -]) (./master_paper.bbl [34 +]) (./master_paper.bbl [37 -]) (./appendix.tex [35 +]) (./appendix.tex [38 ]) No file master_paper.ind. -[36 +[39 ] (./master_paper.aux) @@ -456,12 +470,12 @@ ) Here is how much of TeX's memory you used: - 2526 strings out of 494008 - 35720 string characters out of 6154472 + 2536 strings out of 494008 + 35926 string characters out of 6154472 189780 words of memory out of 5000000 - 5896 multiletter control sequences out of 15000+600000 + 5902 multiletter control sequences out of 15000+600000 18223 words of font info for 71 fonts, out of 8000000 for 9000 745 hyphenation exceptions out of 8191 - 33i,12n,40p,207b,1888s stack positions out of 5000i,500n,10000p,200000b,80000s + 33i,12n,40p,207b,1954s stack positions out of 5000i,500n,10000p,200000b,80000s -Output written on master_paper.dvi (43 pages, 121412 bytes). +Output written on master_paper.dvi (46 pages, 132304 bytes).
--- a/paper/master_paper.lot Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/master_paper.lot Sun Feb 15 11:59:06 2015 +0900 @@ -1,16 +1,16 @@ \addvspace {10\p@ } \addvspace {10\p@ } \addvspace {10\p@ } -\contentsline {table}{\numberline {3.1}{\ignorespaces Task 生成おける API}}{8} -\contentsline {table}{\numberline {3.2}{\ignorespaces Task 側で使用する API}}{8} +\contentsline {table}{\numberline {3.1}{\ignorespaces Task 生成おける API}}{9} +\contentsline {table}{\numberline {3.2}{\ignorespaces Task 側で使用する API}}{9} \addvspace {10\p@ } \addvspace {10\p@ } \addvspace {10\p@ } -\contentsline {table}{\numberline {6.1}{\ignorespaces データ並列実行時の index の割り当て}}{19} +\contentsline {table}{\numberline {6.1}{\ignorespaces データ並列実行時の index の割り当て}}{20} \addvspace {10\p@ } \addvspace {10\p@ } -\contentsline {table}{\numberline {8.1}{\ignorespaces Ceriumを実行する実験環境1}}{25} -\contentsline {table}{\numberline {8.2}{\ignorespaces Ceriumを実行する実験環境2}}{25} +\contentsline {table}{\numberline {8.1}{\ignorespaces Ceriumを実行する実験環境1}}{26} +\contentsline {table}{\numberline {8.2}{\ignorespaces Ceriumを実行する実験環境2}}{26} \addvspace {10\p@ } -\contentsline {table}{\numberline {9.1}{\ignorespaces kernel で使用する ID 取得の API}}{30} +\contentsline {table}{\numberline {9.1}{\ignorespaces kernel で使用する ID 取得の API}}{33} \addvspace {10\p@ }
--- a/paper/master_paper.toc Sat Feb 14 15:50:18 2015 +0900 +++ b/paper/master_paper.toc Sun Feb 15 11:59:06 2015 +0900 @@ -1,40 +1,41 @@ \contentsline {chapter}{\numberline {第1章}マルチプラットフォームなフレームワークにおける並列プログラミング}{1} \contentsline {chapter}{\numberline {第2章}既存のマルチプラットフォームフレームワーク}{3} -\contentsline {section}{\numberline {2.1}GPU の Architecture}{3} +\contentsline {section}{\numberline {2.1}GPU/Cell の Architecture}{3} \contentsline {section}{\numberline {2.2}OpenCL}{5} -\contentsline {section}{\numberline {2.3}CUDA}{5} +\contentsline {section}{\numberline {2.3}CUDA}{6} \contentsline {section}{\numberline {2.4}StarPU}{6} -\contentsline {chapter}{\numberline {第3章}並列プログラミングフレームワーク Cerium}{7} -\contentsline {section}{\numberline {3.1}Cerium の概要}{7} -\contentsline {section}{\numberline {3.2}Cerium TaskManager}{7} -\contentsline {section}{\numberline {3.3}Cerium における Task}{9} -\contentsline {section}{\numberline {3.4}Task の Scheduling}{10} -\contentsline {chapter}{\numberline {第4章}Ceriumを用いた例題}{11} -\contentsline {section}{\numberline {4.1}Bitonic Sort}{11} -\contentsline {section}{\numberline {4.2}Word Count}{13} -\contentsline {section}{\numberline {4.3}FFT}{15} -\contentsline {chapter}{\numberline {第5章}マルチコアへの対応}{16} -\contentsline {section}{\numberline {5.1}マルチコア上での実行の機構}{16} -\contentsline {section}{\numberline {5.2}DMA}{16} -\contentsline {chapter}{\numberline {第6章}GPGPU への対応}{17} -\contentsline {section}{\numberline {6.1}OpenCL および CUDA による実装}{17} -\contentsline {section}{\numberline {6.2}データ並列}{18} -\contentsline {chapter}{\numberline {第7章}並列処理向けI/O}{21} -\contentsline {section}{\numberline {7.1}mmap}{21} -\contentsline {section}{\numberline {7.2}Blocked Read による I/O の並列化}{22} -\contentsline {section}{\numberline {7.3}I/O 専用 Thread の実装}{24} -\contentsline {chapter}{\numberline {第8章}ベンチマーク}{25} -\contentsline {section}{\numberline {8.1}実験環境}{25} -\contentsline {section}{\numberline {8.2}マルチコア}{26} -\contentsline {section}{\numberline {8.3}GPGPU}{27} -\contentsline {section}{\numberline {8.4}並列 I/O}{28} -\contentsline {chapter}{\numberline {第9章}既存のプログラミングフレームワークとの比較}{29} -\contentsline {section}{\numberline {9.1}OpenCL}{29} -\contentsline {section}{\numberline {9.2}CUDA}{30} -\contentsline {section}{\numberline {9.3}StarPU}{31} -\contentsline {chapter}{\numberline {第10章}結論}{33} -\contentsline {section}{\numberline {10.1}まとめ}{33} -\contentsline {section}{\numberline {10.2}今後の課題}{33} -\contentsline {chapter}{謝辞}{34} -\contentsline {chapter}{参考文献}{35} -\contentsline {chapter}{発表文献}{36} +\contentsline {section}{\numberline {2.5}Cell Broadband Engine}{7} +\contentsline {chapter}{\numberline {第3章}並列プログラミングフレームワーク Cerium}{8} +\contentsline {section}{\numberline {3.1}Cerium の概要}{8} +\contentsline {section}{\numberline {3.2}Cerium TaskManager}{8} +\contentsline {section}{\numberline {3.3}Cerium における Task}{10} +\contentsline {section}{\numberline {3.4}Task の Scheduling}{11} +\contentsline {chapter}{\numberline {第4章}Ceriumを用いた例題}{12} +\contentsline {section}{\numberline {4.1}Bitonic Sort}{12} +\contentsline {section}{\numberline {4.2}Word Count}{14} +\contentsline {section}{\numberline {4.3}FFT}{16} +\contentsline {chapter}{\numberline {第5章}マルチコアへの対応}{17} +\contentsline {section}{\numberline {5.1}マルチコア上での実行の機構}{17} +\contentsline {section}{\numberline {5.2}DMA}{17} +\contentsline {chapter}{\numberline {第6章}GPGPU への対応}{18} +\contentsline {section}{\numberline {6.1}OpenCL および CUDA による実装}{18} +\contentsline {section}{\numberline {6.2}データ並列}{19} +\contentsline {chapter}{\numberline {第7章}並列処理向けI/O}{22} +\contentsline {section}{\numberline {7.1}mmap}{22} +\contentsline {section}{\numberline {7.2}Blocked Read による I/O の並列化}{23} +\contentsline {section}{\numberline {7.3}I/O 専用 Thread の実装}{25} +\contentsline {chapter}{\numberline {第8章}ベンチマーク}{26} +\contentsline {section}{\numberline {8.1}実験環境}{26} +\contentsline {section}{\numberline {8.2}マルチコア}{27} +\contentsline {section}{\numberline {8.3}GPGPU}{29} +\contentsline {section}{\numberline {8.4}並列 I/O}{31} +\contentsline {chapter}{\numberline {第9章}既存のプログラミングフレームワークとの比較}{32} +\contentsline {section}{\numberline {9.1}OpenCL}{32} +\contentsline {section}{\numberline {9.2}CUDA}{33} +\contentsline {section}{\numberline {9.3}StarPU}{34} +\contentsline {chapter}{\numberline {第10章}結論}{36} +\contentsline {section}{\numberline {10.1}まとめ}{36} +\contentsline {section}{\numberline {10.2}今後の課題}{36} +\contentsline {chapter}{謝辞}{37} +\contentsline {chapter}{参考文献}{38} +\contentsline {chapter}{発表文献}{39}