book.tex 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. \documentclass[10pt]{book}
  2. \usepackage[T1]{fontenc}
  3. \usepackage[utf8]{inputenc}
  4. \usepackage{lmodern}
  5. \usepackage{hyperref}
  6. \usepackage{graphicx}
  7. \usepackage[english]{babel}
  8. \usepackage{listings}
  9. \usepackage{amsmath}
  10. \usepackage{amsthm}
  11. \usepackage{amssymb}
  12. \usepackage{natbib}
  13. \usepackage{stmaryrd}
  14. \usepackage{xypic}
  15. \lstset{%
  16. basicstyle=\ttfamily%
  17. }
  18. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  19. % 'dedication' environment: To add a dedication paragraph at the start of book %
  20. % Source: http://www.tug.org/pipermail/texhax/2010-June/015184.html %
  21. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  22. \newenvironment{dedication}
  23. {
  24. \cleardoublepage
  25. \thispagestyle{empty}
  26. \vspace*{\stretch{1}}
  27. \hfill\begin{minipage}[t]{0.66\textwidth}
  28. \raggedright
  29. }
  30. {
  31. \end{minipage}
  32. \vspace*{\stretch{3}}
  33. \clearpage
  34. }
  35. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  36. % Chapter quote at the start of chapter %
  37. % Source: http://tex.stackexchange.com/a/53380 %
  38. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  39. \makeatletter
  40. \renewcommand{\@chapapp}{}% Not necessary...
  41. \newenvironment{chapquote}[2][2em]
  42. {\setlength{\@tempdima}{#1}%
  43. \def\chapquote@author{#2}%
  44. \parshape 1 \@tempdima \dimexpr\textwidth-2\@tempdima\relax%
  45. \itshape}
  46. {\par\normalfont\hfill--\ \chapquote@author\hspace*{\@tempdima}\par\bigskip}
  47. \makeatother
  48. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  49. \newcommand{\itm}[1]{\ensuremath{\mathit{#1}}}
  50. \newcommand{\Atom}{\itm{atom}}
  51. \newcommand{\Stmt}{\itm{stmt}}
  52. \newcommand{\Exp}{\itm{exp}}
  53. \newcommand{\Ins}{\itm{instr}}
  54. \newcommand{\Prog}{\itm{prog}}
  55. \newcommand{\Arg}{\itm{arg}}
  56. \newcommand{\Int}{\itm{int}}
  57. \newcommand{\Var}{\itm{var}}
  58. \newcommand{\Op}{\itm{op}}
  59. \newcommand{\key}[1]{\texttt{#1}}
  60. \newcommand{\READ}{(\key{read})}
  61. \newcommand{\UNIOP}[2]{(\key{#1}\,#2)}
  62. \newcommand{\BINOP}[3]{(\key{#1}\,#2\,#3)}
  63. \newcommand{\LET}[3]{(\key{let}\,([#1\;#2])\,#3)}
  64. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  65. \title{\Huge \textbf{Essentials of Compilation} \\
  66. \huge From Scheme to x86 Assembly}
  67. \author{\textsc{Jeremy G. Siek}
  68. \thanks{\url{http://homes.soic.indiana.edu/jsiek/}}
  69. }
  70. \begin{document}
  71. \frontmatter
  72. \maketitle
  73. \begin{dedication}
  74. This book is dedicated to the programming languages group at Indiana University.
  75. \end{dedication}
  76. \tableofcontents
  77. %\listoffigures
  78. %\listoftables
  79. \mainmatter
  80. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  81. \chapter*{Preface}
  82. \cite{Sarkar:2004fk}
  83. \cite{Keep:2012aa}
  84. \cite{Ghuloum:2006bh}
  85. %\section*{Structure of book}
  86. % You might want to add short description about each chapter in this book.
  87. %\section*{About the companion website}
  88. %The website\footnote{\url{https://github.com/amberj/latex-book-template}} for %this file contains:
  89. %\begin{itemize}
  90. % \item A link to (freely downlodable) latest version of this document.
  91. % \item Link to download LaTeX source for this document.
  92. % \item Miscellaneous material (e.g. suggested readings etc).
  93. %\end{itemize}
  94. \section*{Acknowledgements}
  95. Need to give thanks to
  96. \begin{itemize}
  97. \item Kent Dybvig
  98. \item Daniel P. Freidman
  99. \item Oscar Waddell
  100. \item Abdulaziz Ghuloum
  101. \item Dipanwita Sarkar
  102. \end{itemize}
  103. %\mbox{}\\
  104. %\noindent Amber Jain \\
  105. %\noindent \url{http://amberj.devio.us/}
  106. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  107. \chapter{Integers and Variables}
  108. %\begin{chapquote}{Author's name, \textit{Source of this quote}}
  109. %``This is a quote and I don't know who said this.''
  110. %\end{chapquote}
  111. The $S_0$ language includes integers, operations on integers,
  112. (arithmetic and input), and variable definitions. The syntax of the
  113. $S_0$ language is defined by the grammar in
  114. Figure~\ref{fig:s0-syntax}. This language is rich enough to exhibit
  115. several compilation techniques but simple enough so that we can
  116. implement a compiler for it in two weeks of hard work. To give the
  117. reader a feeling for the scale of this first compiler, the instructor
  118. solution for the $S_0$ compiler consists of 6 recursive functions and
  119. a few small helper functions that together span 256 lines of code.
  120. \begin{figure}[htbp]
  121. \centering
  122. \fbox{
  123. \begin{minipage}{0.85\textwidth}
  124. \[
  125. \begin{array}{lcl}
  126. \Op &::=& \key{+} \mid \key{-} \mid \key{*} \mid \key{read} \\
  127. \Exp &::=& \Int \mid (\Op \; \Exp^{+}) \mid \Var \mid \LET{\Var}{\Exp}{\Exp}
  128. \end{array}
  129. \]
  130. \end{minipage}
  131. }
  132. \caption{The syntax of the $S_0$ language. The abbreviation \Op{} is
  133. short for operator, \Exp{} is short for expression, \Int{} for integer,
  134. and \Var{} for variable.}
  135. \label{fig:s0-syntax}
  136. \end{figure}
  137. The result of evaluating an expression is a value. For $S_0$, values
  138. are integers. To make it straightforward to map these integers onto
  139. x86-64 assembly~\citep{Matz:2013aa}, we restrict the integers to just
  140. those representable with 64-bits, the range $-2^{63}$ to $2^{63}$.
  141. We will walk through some examples of $S_0$ programs, commenting on
  142. aspects of the language that will be relevant to compiling it. We
  143. start with one of the simplest $S_0$ programs; it adds two integers.
  144. \[
  145. \BINOP{+}{10}{32}
  146. \]
  147. The result is $42$, as you might expected.
  148. %
  149. The next example demonstrates that expressions may be nested within
  150. eachother, in this case nesting several additions and negations.
  151. \[
  152. \BINOP{+}{10}{ \UNIOP{-}{ \BINOP{+}{12}{20} } }
  153. \]
  154. What is the result of the above program?
  155. The \key{let} construct stores a value in a variable which can then be
  156. used within the body of the \key{let}. So the following program stores
  157. $32$ in $x$ and then computes $\BINOP{+}{10}{x}$, producing $42$.
  158. \[
  159. \LET{x}{ \BINOP{+}{12}{20} }{ \BINOP{+}{10}{x} }
  160. \]
  161. When there are multiple \key{let}'s for the same variable, the closest
  162. enclosing \key{let} is used. Consider the following program with two
  163. \key{let}'s that define variables named $x$.
  164. \[
  165. \LET{x}{32}{ \BINOP{+}{ \LET{x}{10}{x} }{ x } }
  166. \]
  167. For the purposes of showing which variable uses correspond to which
  168. definitions, the following shows the $x$'s annotated with subscripts
  169. to distinguish them.
  170. \[
  171. \LET{x_1}{32}{ \BINOP{+}{ \LET{x_2}{10}{x_2} }{ x_1 } }
  172. \]
  173. The \key{read} operation prompts the user of the program for an
  174. integer. Given an input of $10$, the following program produces $42$.
  175. \[
  176. \BINOP{+}{(\key{read})}{32}
  177. \]
  178. We include the \key{read} operation in $S_0$ to demonstrate that order
  179. of evaluation can make a different. Given the input $52$ then $10$,
  180. the following produces $42$ (and not $-42$).
  181. \[
  182. \LET{x}{\READ}{ \LET{y}{\READ}{ \BINOP{-}{x}{y} } }
  183. \]
  184. The initializing expression of a \key{let} is always evaluated before
  185. the body of the \key{let}, so in the above, the \key{read} for $x$ is
  186. performed before the \key{read} for $y$.
  187. %
  188. The behavior of the following program is somewhat subtle because
  189. Scheme does not specify an evaluation order for arguments of an
  190. operator such as $-$.
  191. \[
  192. \BINOP{-}{\READ}{\READ}
  193. \]
  194. Given the input $42$ then $10$, the above program can result in either
  195. $42$ or $-42$, depending on the whims of the Scheme implementation.
  196. The goal for this chapter is to implement a compiler that translates
  197. any program $p \in S_0$ into a x86-64 assembly program $p'$ such that
  198. the assembly program exhibits the same behavior on Intel hardward as
  199. the $S_0$ program running in a Scheme implementation.
  200. \[
  201. \xymatrix{
  202. p \in S_0 \ar[rr]^{\text{compile}} \ar[drr]_{\text{run in Scheme}\quad} && p' \in \text{x86-64} \ar[d]^{\quad\text{run on an x86 machine}}\\
  203. & & n \in \mathbb{Z}
  204. }
  205. \]
  206. In the next section we introduce enough of the x86-64 assembly
  207. language to compile $S_0$.
  208. \section{x86-64 Assembly}
  209. An x86-64 program is a sequence of instructions. The instructions
  210. manipulate a fixed number of variables called \emph{registers} and can
  211. load and store values into \emph{memory}. Memory is a mapping of
  212. 64-bit addresses to 64-bit values. The syntax $n(r)$ is used to read
  213. the address $a$ stored in register $r$ and then offset it by $n$,
  214. producing the address $a + n$. The arithmetic instructions, such as
  215. $\key{addq}\,s\,d$, read from the source $s$ and destination argument
  216. $d$, apply the arithmetic operation, then stores the result in the
  217. destination $d$. In this case, computing $d \gets d + s$. The move
  218. instruction, $\key{movq}\,s\,d$ reads from $s$ and stores the result
  219. in $d$. The $\key{callq}\,\mathit{label}$ instruction executes the
  220. function specified by the label, which we shall use to implement
  221. \key{read}. Figure~\ref{fig:x86-a} defines the syntax for this
  222. subset of the x86-64 assembly language.
  223. \begin{figure}[tbp]
  224. \fbox{
  225. \begin{minipage}{0.96\textwidth}
  226. \[
  227. \begin{array}{lcl}
  228. \itm{register} &::=& \key{rax} \mid \key{rbx} \mid \key{rcx}
  229. \mid \key{rdx} \mid \key{rsi} \mid \key{rdi} \mid \\
  230. && \key{r8} \mid \key{r9} \mid \key{r10}
  231. \mid \key{r11} \mid \key{r12} \mid \key{r13}
  232. \mid \key{r14} \mid \key{r15} \\
  233. \Arg &::=& \Int \mid \key{\%}\itm{register} \mid \Int(\key{\%}\itm{register}) \\
  234. \Ins &::=& \key{addq} \; \Arg \; \Arg \mid
  235. \key{subq} \; \Arg \; \Arg \mid
  236. \key{imulq} \; \Arg \; \Arg \mid
  237. \key{negq} \; \Arg \mid \\
  238. && \key{movq} \; \Arg \; \Arg \mid
  239. \key{callq} \; \mathit{label} \\
  240. \Prog &::= & \Ins^{*}
  241. \end{array}
  242. \]
  243. \end{minipage}
  244. }
  245. \caption{A subset of the x86-64 assembly language.}
  246. \label{fig:x86-a}
  247. \end{figure}
  248. \begin{figure}[tbp]
  249. \begin{lstlisting}
  250. .globl _main
  251. _main:
  252. movq $10, %rax
  253. addq $32, %rax
  254. retq
  255. \end{lstlisting}
  256. \caption{A simple x86-64 program equivalent to $(+ \; 10 \; 32)$.}
  257. \label{fig:p0-x86}
  258. \end{figure}
  259. \section{An intermediate C-like language}
  260. \[
  261. \begin{array}{lcl}
  262. \Atom &::=& \Int \mid \Var \\
  263. \Exp &::=& \Atom \mid (\Op \; \Atom^{*})\\
  264. \Stmt &::=& (\key{assign} \; \Var \; \Exp) \mid (\key{return}\; \Exp)
  265. \end{array}
  266. \]
  267. \bibliographystyle{plainnat}
  268. \bibliography{all}
  269. \end{document}