book.tex 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913
  1. \documentclass[12pt]{book}
  2. \usepackage[T1]{fontenc}
  3. \usepackage[utf8]{inputenc}
  4. \usepackage{lmodern}
  5. \usepackage{hyperref}
  6. \usepackage{graphicx}
  7. \usepackage[english]{babel}
  8. \usepackage{listings}
  9. \usepackage{amsmath}
  10. \usepackage{amsthm}
  11. \usepackage{amssymb}
  12. \usepackage{natbib}
  13. \usepackage{stmaryrd}
  14. \usepackage{xypic}
  15. \lstset{%
  16. language=Lisp,
  17. basicstyle=\ttfamily\small
  18. }
  19. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  20. % 'dedication' environment: To add a dedication paragraph at the start of book %
  21. % Source: http://www.tug.org/pipermail/texhax/2010-June/015184.html %
  22. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  23. \newenvironment{dedication}
  24. {
  25. \cleardoublepage
  26. \thispagestyle{empty}
  27. \vspace*{\stretch{1}}
  28. \hfill\begin{minipage}[t]{0.66\textwidth}
  29. \raggedright
  30. }
  31. {
  32. \end{minipage}
  33. \vspace*{\stretch{3}}
  34. \clearpage
  35. }
  36. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  37. % Chapter quote at the start of chapter %
  38. % Source: http://tex.stackexchange.com/a/53380 %
  39. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  40. \makeatletter
  41. \renewcommand{\@chapapp}{}% Not necessary...
  42. \newenvironment{chapquote}[2][2em]
  43. {\setlength{\@tempdima}{#1}%
  44. \def\chapquote@author{#2}%
  45. \parshape 1 \@tempdima \dimexpr\textwidth-2\@tempdima\relax%
  46. \itshape}
  47. {\par\normalfont\hfill--\ \chapquote@author\hspace*{\@tempdima}\par\bigskip}
  48. \makeatother
  49. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  50. \newcommand{\itm}[1]{\ensuremath{\mathit{#1}}}
  51. \newcommand{\Stmt}{\itm{stmt}}
  52. \newcommand{\Exp}{\itm{exp}}
  53. \newcommand{\Instr}{\itm{instr}}
  54. \newcommand{\Prog}{\itm{prog}}
  55. \newcommand{\Arg}{\itm{arg}}
  56. \newcommand{\Int}{\itm{int}}
  57. \newcommand{\Var}{\itm{var}}
  58. \newcommand{\Op}{\itm{op}}
  59. \newcommand{\key}[1]{\texttt{#1}}
  60. \newcommand{\READ}{(\key{read})}
  61. \newcommand{\UNIOP}[2]{(\key{#1}\,#2)}
  62. \newcommand{\BINOP}[3]{(\key{#1}\,#2\,#3)}
  63. \newcommand{\LET}[3]{(\key{let}\,([#1\;#2])\,#3)}
  64. \newcommand{\ASSIGN}[2]{(\key{assign}\,#1\;#2)}
  65. \newcommand{\RETURN}[1]{(\key{return}\,#1)}
  66. \newcommand{\INT}[1]{(\key{int}\;#1)}
  67. \newcommand{\REG}[1]{(\key{reg}\;#1)}
  68. \newcommand{\VAR}[1]{(\key{var}\;#1)}
  69. \newcommand{\STACKLOC}[1]{(\key{stack}\;#1)}
  70. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  71. \title{\Huge \textbf{Essentials of Compilation} \\
  72. \huge An Incremental Approach}
  73. \author{\textsc{Jeremy G. Siek}
  74. \thanks{\url{http://homes.soic.indiana.edu/jsiek/}}
  75. }
  76. \begin{document}
  77. \frontmatter
  78. \maketitle
  79. \begin{dedication}
  80. This book is dedicated to the programming language wonks at Indiana
  81. University.
  82. \end{dedication}
  83. \tableofcontents
  84. %\listoffigures
  85. %\listoftables
  86. \mainmatter
  87. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  88. \chapter*{Preface}
  89. Talk about nano-pass \citep{Sarkar:2004fk,Keep:2012aa} and incremental
  90. compilers \citep{Ghuloum:2006bh}.
  91. %\section*{Structure of book}
  92. % You might want to add short description about each chapter in this book.
  93. %\section*{About the companion website}
  94. %The website\footnote{\url{https://github.com/amberj/latex-book-template}} for %this file contains:
  95. %\begin{itemize}
  96. % \item A link to (freely downlodable) latest version of this document.
  97. % \item Link to download LaTeX source for this document.
  98. % \item Miscellaneous material (e.g. suggested readings etc).
  99. %\end{itemize}
  100. \section*{Acknowledgments}
  101. Need to give thanks to
  102. \begin{itemize}
  103. \item Kent Dybvig
  104. \item Daniel P. Friedman
  105. \item Abdulaziz Ghuloum
  106. \item Oscar Waddell
  107. \item Dipanwita Sarkar
  108. \item Ronald Garcia
  109. \item Bor-Yuh Evan Chang
  110. \end{itemize}
  111. %\mbox{}\\
  112. %\noindent Amber Jain \\
  113. %\noindent \url{http://amberj.devio.us/}
  114. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  115. \chapter{Integers and Variables}
  116. \label{ch:int-exp}
  117. %\begin{chapquote}{Author's name, \textit{Source of this quote}}
  118. %``This is a quote and I don't know who said this.''
  119. %\end{chapquote}
  120. \section{The $S_0$ Language}
  121. The $S_0$ language includes integers, operations on integers,
  122. (arithmetic and input), and variable definitions. The syntax of the
  123. $S_0$ language is defined by the grammar in
  124. Figure~\ref{fig:s0-syntax}. This language is rich enough to exhibit
  125. several compilation techniques but simple enough so that we can
  126. implement a compiler for it in two weeks of hard work. To give the
  127. reader a feeling for the scale of this first compiler, the instructor
  128. solution for the $S_0$ compiler consists of 6 recursive functions and
  129. a few small helper functions that together span 256 lines of code.
  130. \begin{figure}[htbp]
  131. \centering
  132. \fbox{
  133. \begin{minipage}{0.85\textwidth}
  134. \[
  135. \begin{array}{lcl}
  136. \Op &::=& \key{+} \mid \key{-} \mid \key{*} \mid \key{read} \\
  137. \Exp &::=& \Int \mid (\Op \; \Exp^{*}) \mid \Var \mid \LET{\Var}{\Exp}{\Exp}
  138. \end{array}
  139. \]
  140. \end{minipage}
  141. }
  142. \caption{The syntax of the $S_0$ language. The abbreviation \Op{} is
  143. short for operator, \Exp{} is short for expression, \Int{} for integer,
  144. and \Var{} for variable.}
  145. \label{fig:s0-syntax}
  146. \end{figure}
  147. The result of evaluating an expression is a value. For $S_0$, values
  148. are integers. To make it straightforward to map these integers onto
  149. x86-64 assembly~\citep{Matz:2013aa}, we restrict the integers to just
  150. those representable with 64-bits, the range $-2^{63}$ to $2^{63}$.
  151. We will walk through some examples of $S_0$ programs, commenting on
  152. aspects of the language that will be relevant to compiling it. We
  153. start with one of the simplest $S_0$ programs; it adds two integers.
  154. \[
  155. \BINOP{+}{10}{32}
  156. \]
  157. The result is $42$, as you might expected.
  158. %
  159. The next example demonstrates that expressions may be nested within
  160. each other, in this case nesting several additions and negations.
  161. \[
  162. \BINOP{+}{10}{ \UNIOP{-}{ \BINOP{+}{12}{20} } }
  163. \]
  164. What is the result of the above program?
  165. The \key{let} construct stores a value in a variable which can then be
  166. used within the body of the \key{let}. So the following program stores
  167. $32$ in $x$ and then computes $\BINOP{+}{10}{x}$, producing $42$.
  168. \[
  169. \LET{x}{ \BINOP{+}{12}{20} }{ \BINOP{+}{10}{x} }
  170. \]
  171. When there are multiple \key{let}'s for the same variable, the closest
  172. enclosing \key{let} is used. Consider the following program with two
  173. \key{let}'s that define variables named $x$.
  174. \[
  175. \LET{x}{32}{ \BINOP{+}{ \LET{x}{10}{x} }{ x } }
  176. \]
  177. For the purposes of showing which variable uses correspond to which
  178. definitions, the following shows the $x$'s annotated with subscripts
  179. to distinguish them.
  180. \[
  181. \LET{x_1}{32}{ \BINOP{+}{ \LET{x_2}{10}{x_2} }{ x_1 } }
  182. \]
  183. The \key{read} operation prompts the user of the program for an
  184. integer. Given an input of $10$, the following program produces $42$.
  185. \[
  186. \BINOP{+}{(\key{read})}{32}
  187. \]
  188. We include the \key{read} operation in $S_0$ to demonstrate that order
  189. of evaluation can make a different. Given the input $52$ then $10$,
  190. the following produces $42$ (and not $-42$).
  191. \[
  192. \LET{x}{\READ}{ \LET{y}{\READ}{ \BINOP{-}{x}{y} } }
  193. \]
  194. The initializing expression is always evaluated before the body of the
  195. \key{let}, so in the above, the \key{read} for $x$ is performed before
  196. the \key{read} for $y$.
  197. %
  198. The behavior of the following program is somewhat subtle because
  199. Scheme does not specify an evaluation order for arguments of an
  200. operator such as $-$.
  201. \[
  202. \BINOP{-}{\READ}{\READ}
  203. \]
  204. Given the input $42$ then $10$, the above program can result in either
  205. $42$ or $-42$, depending on the whims of the Scheme implementation.
  206. The goal for this chapter is to implement a compiler that translates
  207. any program $p \in S_0$ into a x86-64 assembly program $p'$ such that
  208. the assembly program exhibits the same behavior on an x86 computer as
  209. the $S_0$ program running in a Scheme implementation.
  210. \[
  211. \xymatrix{
  212. p \in S_0 \ar[rr]^{\text{compile}} \ar[drr]_{\text{run in Scheme}\quad} && p' \in \text{x86-64} \ar[d]^{\quad\text{run on an x86 machine}}\\
  213. & & n \in \mathbb{Z}
  214. }
  215. \]
  216. In the next section we introduce enough of the x86-64 assembly
  217. language to compile $S_0$.
  218. \section{The x86-64 Assembly Language}
  219. An x86-64 program is a sequence of instructions. The instructions
  220. manipulate 16 variables called \emph{registers} and can also load and
  221. store values into \emph{memory}. Memory is a mapping of 64-bit
  222. addresses to 64-bit values. The syntax $n(r)$ is used to read the
  223. address $a$ stored in register $r$ and then offset it by $n$ bytes (8
  224. bits), producing the address $a + n$. The arithmetic instructions,
  225. such as $\key{addq}\,s\,d$, read from the source $s$ and destination
  226. argument $d$, apply the arithmetic operation, then stores the result
  227. in the destination $d$. In this case, computing $d \gets d + s$. The
  228. move instruction, $\key{movq}\,s\,d$ reads from $s$ and stores the
  229. result in $d$. The $\key{callq}\,\mathit{label}$ instruction executes
  230. the procedure specified by the label, which we shall use to implement
  231. \key{read}. Figure~\ref{fig:x86-a} defines the syntax for this subset
  232. of the x86-64 assembly language.
  233. \begin{figure}[tbp]
  234. \fbox{
  235. \begin{minipage}{0.96\textwidth}
  236. \[
  237. \begin{array}{lcl}
  238. \itm{register} &::=& \key{rsp} \mid \key{rbp} \mid \key{rax} \mid \key{rbx} \mid \key{rcx}
  239. \mid \key{rdx} \mid \key{rsi} \mid \key{rdi} \mid \\
  240. && \key{r8} \mid \key{r9} \mid \key{r10}
  241. \mid \key{r11} \mid \key{r12} \mid \key{r13}
  242. \mid \key{r14} \mid \key{r15} \\
  243. \Arg &::=& \key{\$}\Int \mid \key{\%}\itm{register} \mid \Int(\key{\%}\itm{register}) \\
  244. \Instr &::=& \key{addq} \; \Arg, \Arg \mid
  245. \key{subq} \; \Arg, \Arg \mid
  246. \key{imulq} \; \Arg,\Arg \mid
  247. \key{negq} \; \Arg \mid \\
  248. && \key{movq} \; \Arg, \Arg \mid
  249. \key{callq} \; \mathit{label} \mid
  250. \key{pushq}\;\Arg \mid \key{popq}\;\Arg \mid \key{retq} \\
  251. \Prog &::= & \key{.globl \_main}\\
  252. & & \key{\_main:} \; \Instr^{+}
  253. \end{array}
  254. \]
  255. \end{minipage}
  256. }
  257. \caption{A subset of the x86-64 assembly language.}
  258. \label{fig:x86-a}
  259. \end{figure}
  260. Figure~\ref{fig:p0-x86} depicts an x86-64 program that is equivalent
  261. to $\BINOP{+}{10}{32}$. The \key{globl} directive says that the
  262. \key{\_main} procedure is externally visible, which is necessary so
  263. that the operating system can call it. The label \key{\_main:}
  264. indicates the beginning of the \key{\_main} procedure. The
  265. instruction $\key{movq}\,\$10, \%\key{rax}$ puts $10$ into the
  266. register \key{rax}. The following instruction $\key{addq}\,\key{\$}32,
  267. \key{\%rax}$ adds $32$ to the $10$ in \key{rax} and puts the result,
  268. $42$, back into \key{rax}. The instruction \key{retq} finishes the
  269. \key{\_main} function by returning the integer in the \key{rax}
  270. register to the operating system.
  271. \begin{figure}[htbp]
  272. \centering
  273. \begin{minipage}{0.6\textwidth}
  274. \begin{lstlisting}
  275. .globl _main
  276. _main:
  277. movq $10, %rax
  278. addq $32, %rax
  279. retq
  280. \end{lstlisting}
  281. \end{minipage}
  282. \caption{A simple x86-64 program equivalent to $\BINOP{+}{10}{32}$.}
  283. \label{fig:p0-x86}
  284. \end{figure}
  285. The next example exhibits the use of memory. Figure~\ref{fig:p1-x86}
  286. lists an x86-64 program that is equivalent to $\BINOP{+}{52}{
  287. \UNIOP{-}{10} }$. To understand how this x86-64 program uses memory,
  288. we need to explain a region of memory called called the
  289. \emph{procedure call stack} (\emph{stack} for short). The stack
  290. consists of a separate \emph{frame} for each procedure call. The
  291. memory layout for an individual frame is shown in
  292. Figure~\ref{fig:frame}. The register \key{rsp} is called the
  293. \emph{stack pointer} and points to the item at the top of the
  294. stack. The stack grows downward in memory, so we increase the size of
  295. the stack by subtracting from the stack pointer. The frame size is
  296. required to be a multiple of 16 bytes. The register \key{rbp} is the
  297. \emph{base pointer} which serves two purposes: 1) it saves the
  298. location of the stack pointer for the procedure that called the
  299. current one and 2) it is used to access variables associated with the
  300. current procedure. We number the variables from $1$ to $n$. Variable
  301. $1$ is stored at address $-8\key{(\%rbp)}$, variable $2$ at
  302. $-16\key{(\%rbp)}$, etc.
  303. \begin{figure}
  304. \centering
  305. \begin{minipage}{0.6\textwidth}
  306. \begin{lstlisting}
  307. .globl _main
  308. _main:
  309. pushq %rbp
  310. movq %rsp, %rbp
  311. subq $16, %rsp
  312. movq $10, -8(%rbp)
  313. negq -8(%rbp)
  314. movq $52, %rax
  315. addq -8(%rbp), %rax
  316. addq $16, %rsp
  317. popq %rbp
  318. retq
  319. \end{lstlisting}
  320. \end{minipage}
  321. \caption{An x86-64 program equivalent to $\BINOP{+}{52}{\UNIOP{-}{10} }$.}
  322. \label{fig:p1-x86}
  323. \end{figure}
  324. \begin{figure}
  325. \centering
  326. \begin{tabular}{|r|l|} \hline
  327. Position & Contents \\ \hline
  328. 8(\key{\%rbp}) & return address \\
  329. 0(\key{\%rbp}) & old \key{rbp} \\
  330. -8(\key{\%rbp}) & variable $1$ \\
  331. -16(\key{\%rbp}) & variable $2$ \\
  332. \ldots & \ldots \\
  333. 0(\key{\%rsp}) & variable $n$\\ \hline
  334. \end{tabular}
  335. \caption{Memory layout of a frame.}
  336. \label{fig:frame}
  337. \end{figure}
  338. Getting back to the program in Figure~\ref{fig:p1-x86}, the first
  339. three instructions are the typical prelude for a procedure. The
  340. instruction \key{pushq \%rbp} saves the base pointer for the procedure
  341. that called the current one onto the stack and subtracts $8$ from the
  342. stack pointer. The second instruction \key{movq \%rsp, \%rbp} changes
  343. the base pointer to the top of the stack. The instruction \key{subq
  344. \$16, \%rsp} moves the stack pointer down to make enough room for
  345. storing variables. This program just needs one variable ($8$ bytes)
  346. but because the frame size is required to be a multiple of 16 bytes,
  347. it rounds to 16 bytes.
  348. The next four instructions carry out the work of computing
  349. $\BINOP{+}{52}{\UNIOP{-}{10} }$. The first instruction \key{movq \$10,
  350. -8(\%rbp)} stores $10$ in variable $1$. The instruction \key{negq
  351. -8(\%rbp)} changes variable $1$ to $-10$. The \key{movq \$52, \%rax}
  352. places $52$ in the register \key{rax} and \key{addq -8(\%rbp), \%rax}
  353. adds the contents of variable $1$ to \key{rax}, at which point
  354. \key{rax} contains $42$.
  355. The last three instructions are the typical \emph{conclusion} of a
  356. procedure. The \key{addq \$16, \%rsp} instruction moves the stack
  357. pointer back to point at the old base pointer. The amount added here
  358. needs to match the amount that was subtracted in the prelude of the
  359. procedure. Then \key{popq \%rbp} returns the old base pointer to
  360. \key{rbp} and adds $8$ to the stack pointer. The \key{retq}
  361. instruction jumps back to the procedure that called this one and
  362. subtracts 8 from the stack pointer.
  363. The compiler will need a convenient representation for manipulating
  364. x86 programs, so we define an abstract syntax for x86 in
  365. Figure~\ref{fig:x86-ast-a}. The \itm{info} field of the \key{program}
  366. AST node is for storing auxilliary information that needs to be
  367. communicated from one pass to the next. The function \key{print-x86}
  368. provided in the supplemental code converts an x86 abstract syntax tree
  369. into the text representation for x86 (Figure~\ref{fig:x86-a}).
  370. \begin{figure}[tbp]
  371. \fbox{
  372. \begin{minipage}{0.96\textwidth}
  373. \[
  374. \begin{array}{lcl}
  375. \Arg &::=& \INT{\Int} \mid \REG{\itm{register}}
  376. \mid \STACKLOC{\Int} \\
  377. \Instr &::=& (\key{add} \; \Arg\; \Arg) \mid
  378. (\key{sub} \; \Arg\; \Arg) \mid
  379. (\key{imul} \; \Arg\;\Arg) \mid
  380. (\key{neg} \; \Arg) \mid \\
  381. && (\key{mov} \; \Arg\; \Arg) \mid
  382. (\key{call} \; \mathit{label}) \mid
  383. (\key{push}\;\Arg) \mid (\key{pop}\;\Arg) \mid (\key{ret}) \\
  384. \Prog &::= & (\key{program} \;\itm{info} \; \Instr^{+})
  385. \end{array}
  386. \]
  387. \end{minipage}
  388. }
  389. \caption{Abstract syntax for x86-64 assembly.}
  390. \label{fig:x86-ast-a}
  391. \end{figure}
  392. \section{Planning the route from $S_0$ to x86-64}
  393. \label{sec:plan-s0-x86}
  394. To compile one language to another it helps to focus on the
  395. differences between the two languages. It is these differences that
  396. the compiler will need to bridge. What are the differences between
  397. $S_0$ and x86-64 assembly? Here we list some of the most important the
  398. differences.
  399. \begin{enumerate}
  400. \item x86-64 arithmetic instructions typically take two arguments and
  401. update the second argument in place. In contrast, $S_0$ arithmetic
  402. operations only read their arguments and produce a new value.
  403. \item An argument to an $S_0$ operator can be any expression, whereas
  404. x86-64 instructions restrict their arguments to integers, registers,
  405. and memory locations.
  406. \item An $S_0$ program can have any number of variables whereas x86-64
  407. has only 16 registers.
  408. \item Variables in $S_0$ can overshadow other variables with the same
  409. name. The registers and memory locations of x86-64 all have unique
  410. names.
  411. \end{enumerate}
  412. We ease the challenge of compiling from $S_0$ to x86 by breaking down
  413. the problem into several steps, dealing with the above differences one
  414. at a time. The main question then becomes: in what order to we tackle
  415. these differences? This is often one of the most challenging questions
  416. that a compiler writer must answer because some orderings may be much
  417. more difficult to implement than others. It is difficult to know ahead
  418. of time which orders will be better so often some trial-and-error is
  419. involved. However, we can try to plan ahead and choose the orderings
  420. based on what we find out.
  421. For example, to handle difference \#2 (nested expressions), we shall
  422. introduce new variables and pull apart the nested expressions into a
  423. sequence of assignment statements. To deal with difference \#3 we
  424. will be replacing variables with registers and/or stack
  425. locations. Thus, it makes sense to deal with \#2 before \#3 so that
  426. \#3 can replace both the original variables and the new ones. Next,
  427. consider where \#1 should fit in. Because it has to do with the format
  428. of x86 instructions, it makes more sense after we have flattened the
  429. nested expressions (\#2). Finally, when should we deal with \#4
  430. (variable overshadowing)? We shall be solving this problem by
  431. renaming variables to make sure they have unique names. Recall that
  432. our plan for \#2 involves moving nested expressions, which could be
  433. problematic if it changes the shadowing of variables. However, if we
  434. deal with \#4 first, then it will not be an issue. Thus, we arrive at
  435. the following ordering.
  436. \[
  437. \xymatrix{
  438. 4 \ar[r] & 2 \ar[r] & 1 \ar[r] & 3
  439. }
  440. \]
  441. We further simplify the translation from $S_0$ to x86 by identifying
  442. an intermediate language named $C_0$, roughly half-way between $S_0$
  443. and x86, to provide a rest stop along the way. The name $C_0$ comes
  444. from this language being vaguely similar to the $C$ language. The
  445. differences \#4 and \#1, regarding variables and nested expressions,
  446. are handled by the passes \textsf{uniquify} and \textsf{flatten} that
  447. bring us to $C_0$.
  448. \[\large
  449. \xymatrix@=50pt{
  450. S_0 \ar@/^/[r]^-{\textsf{uniquify}} &
  451. S_0 \ar@/^/[r]^-{\textsf{flatten}} &
  452. C_0
  453. }
  454. \]
  455. The syntax for $C_0$ is defined in Figure~\ref{fig:c0-syntax}. The
  456. $C_0$ language supports the same operators as $S_0$ but the arguments
  457. of operators are now restricted to just variables and integers. The
  458. \key{let} construct of $S_0$ is replaced by an assignment statement
  459. and there is a \key{return} construct to specify the return value of
  460. the program. A program consists of a sequence of statements that
  461. include at least one \key{return} statement.
  462. \begin{figure}[htbp]
  463. \[
  464. \begin{array}{lcl}
  465. \Arg &::=& \Int \mid \Var \\
  466. \Exp &::=& \Arg \mid (\Op \; \Arg^{*})\\
  467. \Stmt &::=& \ASSIGN{\Var}{\Exp} \mid \RETURN{\Arg} \\
  468. \Prog & ::= & (\key{program}\;\itm{info}\;\Stmt^{+})
  469. \end{array}
  470. \]
  471. \caption{The $C_0$ intermediate language.}
  472. \label{fig:c0-syntax}
  473. \end{figure}
  474. To get from $C_0$ to x86-64 assembly requires three more steps, which
  475. we discuss below.
  476. \[\large
  477. \xymatrix@=50pt{
  478. C_0 \ar@/^/[r]^-{\textsf{select\_instr.}}
  479. & \text{x86}^{*} \ar@/^/[r]^-{\textsf{assign\_homes}}
  480. & \text{x86}^{*} \ar@/^/[r]^-{\textsf{patch\_instr.}}
  481. & \text{x86}
  482. }
  483. \]
  484. We handle difference \#1, concerning the format of arithmetic
  485. instructions, in the \textsf{select\_instructions} pass. The result
  486. of this pass produces programs consisting of x86-64 instructions that
  487. use variables.
  488. %
  489. As there are only 16 registers, we cannot always map variables to
  490. registers (difference \#3). Fortunately, the stack can grow quite, so
  491. we can map variables to locations on the stack. This is handled in the
  492. \textsf{assign\_homes} pass. The topic of
  493. Chapter~\ref{ch:register-allocation} is implementing a smarter
  494. approach in which we make a best-effort to map variables to registers,
  495. resorting to the stack only when necessary.
  496. The final pass in our journey to x86 handles an indiosycracy of x86
  497. assembly. Many x86 instructions have two arguments but only one of the
  498. arguments may be a memory reference. Because we are mapping variables
  499. to stack locations, many of our generated instructions will violate
  500. this restriction. The purpose of the \textsf{patch\_instructions} pass
  501. is to fix this problem by replacing every bad instruction with a short
  502. sequence of instructions that use the \key{rax} register.
  503. \section{Uniquify Variables}
  504. The purpose of this pass is to make sure that each \key{let} uses a
  505. unique variable name. For example, the \textsf{uniquify} pass could
  506. translate
  507. \[
  508. \LET{x}{32}{ \BINOP{+}{ \LET{x}{10}{x} }{ x } }
  509. \]
  510. to
  511. \[
  512. \LET{x.1}{32}{ \BINOP{+}{ \LET{x.2}{10}{x.2} }{ x.1 } }
  513. \]
  514. We recommend implementing \textsf{uniquify} as a recursive function
  515. that mostly just copies the input program. However, when encountering
  516. a \key{let}, it should generate a unique name for the variable (the
  517. Racket function \key{gensym} is handy for this) and associate the old
  518. name with the new unique name in an association list. The
  519. \textsf{uniquify} function will need to access this association list
  520. when it gets to a variable reference, so we add another paramter to
  521. \textsf{uniquify} for the association list.
  522. \section{Flatten Expressions}
  523. The purpose of the \textsf{flatten} pass is to get rid of nested
  524. expressions, such as the $\UNIOP{-}{10}$ in the following program,
  525. without changing the behavior of the program.
  526. \[
  527. \BINOP{+}{52}{ \UNIOP{-}{10} }
  528. \]
  529. This can be accomplished by introducing a new variable, assigning the
  530. nested expression to the new variable, and then using the new variable
  531. in place of the nested expressions. For example, the above program is
  532. translated to the following one.
  533. \[
  534. \begin{array}{l}
  535. \ASSIGN{ \itm{x} }{ \UNIOP{-}{10} } \\
  536. \RETURN{ \BINOP{+}{52}{ \itm{x} } }
  537. \end{array}
  538. \]
  539. We recommend implementing \textsf{flatten} as a recursive function
  540. that returns two things, 1) the newly flattened expression, and 2) a
  541. list of assignment statements, one for each of the new variables
  542. introduced while flattening the expression.
  543. Take special care for programs such as the following that initialize
  544. variables with integers or other variables.
  545. \[
  546. \LET{a}{42}{ \LET{b}{a}{ b }}
  547. \]
  548. This program should be translated to
  549. \[
  550. \ASSIGN{a}{42} \;
  551. \ASSIGN{b}{a} \;
  552. \RETURN{b}
  553. \]
  554. and not the following, which could result from a naive implementation
  555. of \textsf{flatten}.
  556. \[
  557. \ASSIGN{x.1}{42}\;
  558. \ASSIGN{a}{x.1}\;
  559. \ASSIGN{x.2}{a}\;
  560. \ASSIGN{b}{x.2}\;
  561. \RETURN{b}
  562. \]
  563. \section{Select Instructions}
  564. In the \textsf{select\_instructions} pass we begin the work of
  565. translating from $C_0$ to x86. The target language of this pass is a
  566. pseudo-x86 language that still uses variables, so we add an AST node
  567. of the form $\VAR{\itm{var}}$. The \textsf{select\_instructions} pass
  568. deals with the differing format of arithmetic operations. For example,
  569. in $C_0$ an addition operation could take the following form:
  570. \[
  571. \ASSIGN{x}{ \BINOP{+}{10}{32} }
  572. \]
  573. To translate to x86, we need to express this addition using the
  574. \key{add} instruction that does an inplace update. So we first move
  575. $10$ to $x$ then perform the \key{add}.
  576. \[
  577. (\key{mov}\,\INT{10}\, \VAR{x})\; (\key{add} \;\INT{32}\; \VAR{x})
  578. \]
  579. There are some cases that require special care to avoid generating
  580. needlessly complicated code. If one of the arguments is the same as
  581. the left-hand side of the assignment, then there is no need for the
  582. extra move instruction. For example, the following
  583. \[
  584. \ASSIGN{x}{ \BINOP{+}{10}{x} }
  585. \quad\text{should translate to}\quad
  586. (\key{add} \; \INT{10}\; \VAR{x})
  587. \]
  588. Regarding the \RETURN{e} statement of $C_0$, we recommend treating it
  589. as an assignment to the \key{rax} register and let the procedure
  590. conclusion handle the transfer of control back to the calling
  591. procedure.
  592. \section{Assign Homes}
  593. As discussed in Section~\ref{sec:plan-s0-x86}, the
  594. \textsf{assign\_homes} pass places all of the variables on the stack.
  595. Consider again the example $S_0$ program $\BINOP{+}{52}{ \UNIOP{-}{10} }$,
  596. which after \textsf{select\_instructions} looks like the following.
  597. \[
  598. \begin{array}{l}
  599. (\key{mov}\;\INT{10}\; \VAR{x})\\
  600. (\key{neg}\; \VAR{x})\\
  601. (\key{mov}\; \INT{52}\; \REG{\itm{rax}})\\
  602. (\key{add}\; \VAR{x} \REG{\itm{rax}})
  603. \end{array}
  604. \]
  605. The one and only variable $x$ is assigned to stack location
  606. \key{-8(\%rbp)}, so the \textsf{assign\_homes} pass translates the
  607. above to
  608. \[
  609. \begin{array}{l}
  610. (\key{mov}\;\INT{10}\; \STACKLOC{{-}8})\\
  611. (\key{neg}\; \STACKLOC{{-}8})\\
  612. (\key{mov}\; \INT{52}\; \REG{\itm{rax}})\\
  613. (\key{add}\; \STACKLOC{{-}8}\; \REG{\itm{rax}})
  614. \end{array}
  615. \]
  616. In the process of assigning stack locations to variables, it is
  617. convenient to compute and store the size of the frame which will be
  618. needed later to generate the procedure conclusion.
  619. \section{Patch Instructions}
  620. The purpose of this pass is to make sure that each instruction adheres
  621. to the restrictions regarding which arguments can be memory
  622. references. For most instructions, the rule is that at most one
  623. argument may be a memory reference.
  624. Consider again the following example.
  625. \[
  626. \LET{a}{42}{ \LET{b}{a}{ b }}
  627. \]
  628. After \textsf{assign\_homes} pass, the above has been translated to
  629. \[
  630. \begin{array}{l}
  631. (\key{mov} \;\INT{42}\; \STACKLOC{{-}8})\\
  632. (\key{mov}\;\STACKLOC{{-}8}\; \STACKLOC{{-}16})\\
  633. (\key{mov}\;\STACKLOC{{-}16}\; \REG{\itm{rax}})
  634. \end{array}
  635. \]
  636. The second \key{mov} instruction is problematic because both arguments
  637. are stack locations. We suggest fixing this problem by moving from the
  638. source to \key{rax} and then from \key{rax} to the destination, as
  639. follows.
  640. \[
  641. \begin{array}{l}
  642. (\key{mov} \;\INT{42}\; \STACKLOC{{-}8})\\
  643. (\key{mov}\;\STACKLOC{{-}8}\; \REG{\itm{rax}})\\
  644. (\key{mov}\;\REG{\itm{rax}}\; \STACKLOC{{-}16})\\
  645. (\key{mov}\;\STACKLOC{{-}16}\; \REG{\itm{rax}})
  646. \end{array}
  647. \]
  648. The \key{imul} instruction is a special case because the destination
  649. argument must be a register.
  650. \section{Testing with Interpreters}
  651. The typical way to test a compiler is to run the generated assembly
  652. code on a diverse set of programs and check whether they behave as
  653. expected. However, when a compiler is structured as our is, with many
  654. passes, when there is an error in the generated assembly code it can
  655. be hard to determine which pass contains the source of the error. A
  656. good way to isolate the error is to not only test the generated
  657. assembly code but to also test the output of every pass. This requires
  658. having interpreters for all the intermediate languages. Indeed, the
  659. file \key{interp.rkt} in the supplemental code provides interpreters
  660. for all the intermediate languages described in this book, starting
  661. with interpreters for $S_0$, $C_0$, and x86 (in abstract syntax).
  662. The file \key{run-tests.rkt} automates the process of running the
  663. interpreters on the output programs of each pass and checking their
  664. result.
  665. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  666. \chapter{Register Allocation}
  667. \label{ch:register-allocation}
  668. % three new passes between instruction selection and spill code
  669. % uncover-live
  670. % build-interference
  671. % allocate registers (uses assign-homes)
  672. \[
  673. \xymatrix{
  674. C_0 \ar@/^/[r]^-{\textsf{select\_instr.}}
  675. & \text{x86}^{*} \ar[d]^-{\textsf{uncover\_live}} \\
  676. & \text{x86}^{*} \ar[d]^-{\textsf{build\_interference}} \\
  677. & \text{x86}^{*} \ar[d]_-{\textsf{allocate\_register}} \\
  678. & \text{x86}^{*} \ar@/^/[r]^-{\textsf{patch\_instr.}}
  679. & \text{x86}
  680. }
  681. \]
  682. % example
  683. % some vars with disjoint live ranges: x y
  684. % some vars with overlapping live ranges: z
  685. \begin{lstlisting}
  686. (let ([x 30])
  687. (let ([z (+ x 4)])
  688. (let ([y 2])
  689. (let ([w (+ z 10)])
  690. (- w y)))))
  691. \end{lstlisting}
  692. after select instructions
  693. \begin{lstlisting}
  694. (program (x z y w)
  695. (mov (int 30) (var x))
  696. (mov (var x) (var z))
  697. (add (int 4) (var z))
  698. (mov (int 2) (var y))
  699. (mov (var z) (var w))
  700. (add (int 10) (var w))
  701. (mov (var w) (reg rax))
  702. (sub (var y) (reg rax)))
  703. \end{lstlisting}
  704. \section{Liveness Analysis}
  705. \begin{lstlisting}
  706. (program (x z y w)
  707. ; { }
  708. (mov (int 30) (var x))
  709. ; { x }
  710. (mov (var x) (var z))
  711. ; { z }
  712. (add (int 4) (var z))
  713. ; { z }
  714. (mov (int 2) (var y))
  715. ; { y, z }
  716. (mov (var z) (var w))
  717. ; { w, y }
  718. (add (int 10) (var w))
  719. ; { w, y }
  720. (mov (var w) (reg rax))
  721. ; { y, rax }
  722. (sub (var y) (reg rax)))
  723. \end{lstlisting}
  724. \section{Build Interference Graph}
  725. %% (hash
  726. %% 'z1498
  727. %% (set 'rax 'x1497 'y1499)
  728. %% 'x1497
  729. %% (set 'z1498)
  730. %% 'rax
  731. %% (set 'z1498 'y1499)
  732. %% 'y1499
  733. %% (set 'rax 'z1498)))
  734. \[
  735. \xymatrix{
  736. w \ar@{-}[d] \ar@{-}[dr] & x \ar@{-}[d] \\
  737. y \ar@{-}[r] & z
  738. }
  739. \]
  740. \section{Graph Coloring via Sudoku}
  741. Suppose only \key{rbx} is available for use by the register allocator.
  742. \[
  743. \xymatrix{
  744. w:\key{-8(\%rbp)} \ar@{-}[d] \ar@{-}[dr] & x:\itm{rbx} \ar@{-}[d] \\
  745. y:\itm{rbx} \ar@{-}[r] & z:\key{-16(\%rbp)}
  746. }
  747. \]
  748. \begin{lstlisting}
  749. movq $30, %rbx
  750. movq %rbx, -16(%rbp)
  751. addq $4, -16(%rbp)
  752. movq $2, %rbx
  753. movq -16(%rbp), -8(%rbp)
  754. addq $10, -8(%rbp)
  755. movq -8(%rbp), %rax
  756. subq %rbx, %rax
  757. \end{lstlisting}
  758. patch instructions fixes the move from
  759. \key{-16(\%rbp)} to \key{-8(\%rbp)}.
  760. \begin{lstlisting}
  761. movq -16(%rbp), %rax
  762. movq %rax, -8(%rbp)
  763. \end{lstlisting}
  764. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  765. \chapter{Booleans, Conditions, and Type Checking}
  766. \label{ch:bool-types}
  767. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  768. \chapter{Tuples and Heap Allocation}
  769. \label{ch:tuples}
  770. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  771. \chapter{Functions}
  772. \label{ch:functions}
  773. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  774. \chapter{Lexically Scoped Functions}
  775. \label{ch:lambdas}
  776. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  777. \chapter{Mutable Data}
  778. \label{ch:mutable-data}
  779. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  780. \chapter{The Dynamic Type}
  781. \label{ch:type-dynamic}
  782. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  783. \chapter{Parametric Polymorphism}
  784. \label{ch:parametric-polymorphism}
  785. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  786. \chapter{High-level Optimization}
  787. \label{ch:high-level-optimization}
  788. \bibliographystyle{plainnat}
  789. \bibliography{all}
  790. \end{document}
  791. %% LocalWords: Dybvig Waddell Abdulaziz Ghuloum Dipanwita
  792. %% LocalWords: Sarkar lcl Matz aa representable