123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446 |
- \documentclass[12pt]{book}
- \usepackage[T1]{fontenc}
- \usepackage[utf8]{inputenc}
- \usepackage{lmodern}
- \usepackage{hyperref}
- \usepackage{graphicx}
- \usepackage[english]{babel}
- \usepackage{listings}
- \usepackage{amsmath}
- \usepackage{amsthm}
- \usepackage{amssymb}
- \usepackage{natbib}
- \usepackage{stmaryrd}
- \usepackage{xypic}
- \usepackage{semantic}
- \lstset{%
- language=Lisp,
- basicstyle=\ttfamily\small,
- escapechar=@
- }
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % 'dedication' environment: To add a dedication paragraph at the start of book %
- % Source: http://www.tug.org/pipermail/texhax/2010-June/015184.html %
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \newenvironment{dedication}
- {
- \cleardoublepage
- \thispagestyle{empty}
- \vspace*{\stretch{1}}
- \hfill\begin{minipage}[t]{0.66\textwidth}
- \raggedright
- }
- {
- \end{minipage}
- \vspace*{\stretch{3}}
- \clearpage
- }
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % Chapter quote at the start of chapter %
- % Source: http://tex.stackexchange.com/a/53380 %
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \makeatletter
- \renewcommand{\@chapapp}{}% Not necessary...
- \newenvironment{chapquote}[2][2em]
- {\setlength{\@tempdima}{#1}%
- \def\chapquote@author{#2}%
- \parshape 1 \@tempdima \dimexpr\textwidth-2\@tempdima\relax%
- \itshape}
- {\par\normalfont\hfill--\ \chapquote@author\hspace*{\@tempdima}\par\bigskip}
- \makeatother
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \newcommand{\itm}[1]{\ensuremath{\mathit{#1}}}
- \newcommand{\Stmt}{\itm{stmt}}
- \newcommand{\Exp}{\itm{exp}}
- \newcommand{\Instr}{\itm{instr}}
- \newcommand{\Prog}{\itm{prog}}
- \newcommand{\Arg}{\itm{arg}}
- \newcommand{\Int}{\itm{int}}
- \newcommand{\Var}{\itm{var}}
- \newcommand{\Op}{\itm{op}}
- \newcommand{\key}[1]{\texttt{#1}}
- \newcommand{\READ}{(\key{read})}
- \newcommand{\UNIOP}[2]{(\key{#1}\,#2)}
- \newcommand{\BINOP}[3]{(\key{#1}\,#2\,#3)}
- \newcommand{\LET}[3]{(\key{let}\,([#1\;#2])\,#3)}
- \newcommand{\ASSIGN}[2]{(\key{assign}\,#1\;#2)}
- \newcommand{\RETURN}[1]{(\key{return}\,#1)}
- \newcommand{\INT}[1]{(\key{int}\;#1)}
- \newcommand{\REG}[1]{(\key{reg}\;#1)}
- \newcommand{\VAR}[1]{(\key{var}\;#1)}
- \newcommand{\STACKLOC}[1]{(\key{stack}\;#1)}
- \newcommand{\IF}[3]{(\key{if}\,#1\;#2\;#3)}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \title{\Huge \textbf{Essentials of Compilation} \\
- \huge An Incremental Approach}
- \author{\textsc{Jeremy G. Siek} \\
- %\thanks{\url{http://homes.soic.indiana.edu/jsiek/}} \\
- Indiana University \\
- \\
- with contributions from: \\
- Carl Factora
- }
- \begin{document}
- \frontmatter
- \maketitle
- \begin{dedication}
- This book is dedicated to the programming language wonks at Indiana
- University.
- \end{dedication}
- \tableofcontents
- %\listoffigures
- %\listoftables
- \mainmatter
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter*{Preface}
- Talk about nano-pass \citep{Sarkar:2004fk,Keep:2012aa} and incremental
- compilers \citep{Ghuloum:2006bh}.
- %\section*{Structure of book}
- % You might want to add short description about each chapter in this book.
- %\section*{About the companion website}
- %The website\footnote{\url{https://github.com/amberj/latex-book-template}} for %this file contains:
- %\begin{itemize}
- % \item A link to (freely downlodable) latest version of this document.
- % \item Link to download LaTeX source for this document.
- % \item Miscellaneous material (e.g. suggested readings etc).
- %\end{itemize}
- \section*{Acknowledgments}
- Need to give thanks to
- \begin{itemize}
- \item Kent Dybvig
- \item Daniel P. Friedman
- \item Abdulaziz Ghuloum
- \item Oscar Waddell
- \item Dipanwita Sarkar
- \item Ronald Garcia
- \item Bor-Yuh Evan Chang
- \end{itemize}
- %\mbox{}\\
- %\noindent Amber Jain \\
- %\noindent \url{http://amberj.devio.us/}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Abstract Syntax Trees, Matching, and Recursion}
- \label{ch:trees-recur}
- \section{Abstract Syntax Trees}
- % \begin{enumerate}
- % \item language representation
- % \item reading grammars
- % \end{enumerate}
- Abstract syntax trees (AST) are used to represent and model the syntax of a
- language. In compiler implementation, we use them to represent intermediary
- languages (IL). Representing ILs with ASTs allow us to categorize expressions
- our language along with the restricting the context in which they can
- appear. A simple example is the representation of the untyped
- \mbox{\(\lambda\)-calculus} with simple arithmetic operators. For our
- purposes, we use Racket syntax.
- \begin{verbatim}
- op ::= + | - | *
- exp ::= n | (op exp*) | x | (lambda (x) exp) | (exp exp)
- \end{verbatim}
- With this specification, we can more easily perform \textit{syntax
- transformations} on any expression within the given language (i.e.,
- \(\lambda\)-calculus). In the above AST, the syntax {\tt exp*} signifies
- \textit{zero or more} {\tt exp}. Later on in this chapter, we show how
- to transform an arbitrary \(\lambda\)-term into the equivalent
- \textit{de-Bruijinized} \(\lambda\)-term.
- \section{Using Match}
- % \begin{enumerate}
- % \item Syntax transformation
- % \item Some Racket examples (factorial?)
- % \end{enumerate}
- Racket provides a built-in pattern-matcher, {\tt match}, that we can use to
- perform syntax transformations. As a preliminary example, we include a
- familiar definition of factorial, without using match.
- \begin{verbatim}
- (define (! n)
- (if (zero? n) 1
- (* n (! (sub1 n)))))
- \end{verbatim}
- In this form of factorial, we are simply conditioning on the inputted
- natural number, {\tt n}. If we rewrite factorial to use {\tt match}, we can
- match on the actual value of {\tt n}.
- \begin{verbatim}
- (define (! n)
- (match n
- (0 1)
- (n (* n (! (sub1 n))))))
- \end{verbatim}
- Of course, we can also use {\tt match} to pattern match on more complex
- expressions.
- If we were told to write a function that takes a \(\lambda\)-term as input,
- we can match on the values of \textit{syntax-expressions} ({\tt sexp}). We
- can then represent the language of Figure ?? with the following function
- that uses {\tt match}.
- \begin{verbatim}
- (lambda (exp)
- (match exp
- ((? number?) ...)
- ((? symbol?) ...)
- (`(,op exp* ...)
- #:when (memv op '(+ - *))
- ...)
- (`(lambda (,x) ,b) ...)
- (`(,e1 ,e2) ...)))
- \end{verbatim}
- It's easy to get lost in Racket's {\tt match} syntax. To understand this,
- we can represent the possible ways of writing \textit{left-hand side} (LHS)
- match expressions.
- \begin{verbatim}
- exp ::= val | (unquote val) | (exp exp*)
- lhs ::= val | (quote val*) | (quasi-quote exp) | (? Racket-pred)
- \end{verbatim}
- \section{Recursion}
- % \begin{enumerate}
- % \item \textit{What is a base case?}
- % \item Using on a language (lambda calculus ->
- % \end{enumerate}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Integers and Variables}
- \label{ch:int-exp}
- %\begin{chapquote}{Author's name, \textit{Source of this quote}}
- %``This is a quote and I don't know who said this.''
- %\end{chapquote}
- \section{The $S_0$ Language}
- The $S_0$ language includes integers, operations on integers,
- (arithmetic and input), and variable definitions. The syntax of the
- $S_0$ language is defined by the grammar in
- Figure~\ref{fig:s0-syntax}. This language is rich enough to exhibit
- several compilation techniques but simple enough so that we can
- implement a compiler for it in two weeks of hard work. To give the
- reader a feeling for the scale of this first compiler, the instructor
- solution for the $S_0$ compiler consists of 6 recursive functions and
- a few small helper functions that together span 256 lines of code.
- \begin{figure}[htbp]
- \centering
- \fbox{
- \begin{minipage}{0.85\textwidth}
- \[
- \begin{array}{lcl}
- \Op &::=& \key{+} \mid \key{-} \mid \key{*} \mid \key{read} \\
- \Exp &::=& \Int \mid (\Op \; \Exp^{*}) \mid \Var \mid \LET{\Var}{\Exp}{\Exp}
- \end{array}
- \]
- \end{minipage}
- }
- \caption{The syntax of the $S_0$ language. The abbreviation \Op{} is
- short for operator, \Exp{} is short for expression, \Int{} for integer,
- and \Var{} for variable.}
- \label{fig:s0-syntax}
- \end{figure}
- The result of evaluating an expression is a value. For $S_0$, values
- are integers. To make it straightforward to map these integers onto
- x86-64 assembly~\citep{Matz:2013aa}, we restrict the integers to just
- those representable with 64-bits, the range $-2^{63}$ to $2^{63}$.
- We will walk through some examples of $S_0$ programs, commenting on
- aspects of the language that will be relevant to compiling it. We
- start with one of the simplest $S_0$ programs; it adds two integers.
- \[
- \BINOP{+}{10}{32}
- \]
- The result is $42$, as you might expected.
- %
- The next example demonstrates that expressions may be nested within
- each other, in this case nesting several additions and negations.
- \[
- \BINOP{+}{10}{ \UNIOP{-}{ \BINOP{+}{12}{20} } }
- \]
- What is the result of the above program?
- The \key{let} construct stores a value in a variable which can then be
- used within the body of the \key{let}. So the following program stores
- $32$ in $x$ and then computes $\BINOP{+}{10}{x}$, producing $42$.
- \[
- \LET{x}{ \BINOP{+}{12}{20} }{ \BINOP{+}{10}{x} }
- \]
- When there are multiple \key{let}'s for the same variable, the closest
- enclosing \key{let} is used. Consider the following program with two
- \key{let}'s that define variables named $x$.
- \[
- \LET{x}{32}{ \BINOP{+}{ \LET{x}{10}{x} }{ x } }
- \]
- For the purposes of showing which variable uses correspond to which
- definitions, the following shows the $x$'s annotated with subscripts
- to distinguish them.
- \[
- \LET{x_1}{32}{ \BINOP{+}{ \LET{x_2}{10}{x_2} }{ x_1 } }
- \]
- The \key{read} operation prompts the user of the program for an
- integer. Given an input of $10$, the following program produces $42$.
- \[
- \BINOP{+}{(\key{read})}{32}
- \]
- We include the \key{read} operation in $S_0$ to demonstrate that order
- of evaluation can make a different. Given the input $52$ then $10$,
- the following produces $42$ (and not $-42$).
- \[
- \LET{x}{\READ}{ \LET{y}{\READ}{ \BINOP{-}{x}{y} } }
- \]
- The initializing expression is always evaluated before the body of the
- \key{let}, so in the above, the \key{read} for $x$ is performed before
- the \key{read} for $y$.
- %
- The behavior of the following program is somewhat subtle because
- Scheme does not specify an evaluation order for arguments of an
- operator such as $-$.
- \[
- \BINOP{-}{\READ}{\READ}
- \]
- Given the input $42$ then $10$, the above program can result in either
- $42$ or $-42$, depending on the whims of the Scheme implementation.
- The goal for this chapter is to implement a compiler that translates
- any program $p \in S_0$ into a x86-64 assembly program $p'$ such that
- the assembly program exhibits the same behavior on an x86 computer as
- the $S_0$ program running in a Scheme implementation.
- \[
- \xymatrix{
- p \in S_0 \ar[rr]^{\text{compile}} \ar[drr]_{\text{run in Scheme}\quad} && p' \in \text{x86-64} \ar[d]^{\quad\text{run on an x86 machine}}\\
- & & n \in \mathbb{Z}
- }
- \]
- In the next section we introduce enough of the x86-64 assembly
- language to compile $S_0$.
- \section{The x86-64 Assembly Language}
- An x86-64 program is a sequence of instructions. The instructions
- manipulate 16 variables called \emph{registers} and can also load and
- store values into \emph{memory}. Memory is a mapping of 64-bit
- addresses to 64-bit values. The syntax $n(r)$ is used to read the
- address $a$ stored in register $r$ and then offset it by $n$ bytes (8
- bits), producing the address $a + n$. The arithmetic instructions,
- such as $\key{addq}\,s\,d$, read from the source $s$ and destination
- argument $d$, apply the arithmetic operation, then stores the result
- in the destination $d$. In this case, computing $d \gets d + s$. The
- move instruction, $\key{movq}\,s\,d$ reads from $s$ and stores the
- result in $d$. The $\key{callq}\,\mathit{label}$ instruction executes
- the procedure specified by the label, which we shall use to implement
- \key{read}. Figure~\ref{fig:x86-a} defines the syntax for this subset
- of the x86-64 assembly language.
- \begin{figure}[tbp]
- \fbox{
- \begin{minipage}{0.96\textwidth}
- \[
- \begin{array}{lcl}
- \itm{register} &::=& \key{rsp} \mid \key{rbp} \mid \key{rax} \mid \key{rbx} \mid \key{rcx}
- \mid \key{rdx} \mid \key{rsi} \mid \key{rdi} \mid \\
- && \key{r8} \mid \key{r9} \mid \key{r10}
- \mid \key{r11} \mid \key{r12} \mid \key{r13}
- \mid \key{r14} \mid \key{r15} \\
- \Arg &::=& \key{\$}\Int \mid \key{\%}\itm{register} \mid \Int(\key{\%}\itm{register}) \\
- \Instr &::=& \key{addq} \; \Arg, \Arg \mid
- \key{subq} \; \Arg, \Arg \mid
- \key{imulq} \; \Arg,\Arg \mid
- \key{negq} \; \Arg \mid \\
- && \key{movq} \; \Arg, \Arg \mid
- \key{callq} \; \mathit{label} \mid
- \key{pushq}\;\Arg \mid \key{popq}\;\Arg \mid \key{retq} \\
- \Prog &::= & \key{.globl \_main}\\
- & & \key{\_main:} \; \Instr^{+}
- \end{array}
- \]
- \end{minipage}
- }
- \caption{A subset of the x86-64 assembly language.}
- \label{fig:x86-a}
- \end{figure}
- Figure~\ref{fig:p0-x86} depicts an x86-64 program that is equivalent
- to $\BINOP{+}{10}{32}$. The \key{globl} directive says that the
- \key{\_main} procedure is externally visible, which is necessary so
- that the operating system can call it. The label \key{\_main:}
- indicates the beginning of the \key{\_main} procedure. The
- instruction $\key{movq}\,\$10, \%\key{rax}$ puts $10$ into the
- register \key{rax}. The following instruction $\key{addq}\,\key{\$}32,
- \key{\%rax}$ adds $32$ to the $10$ in \key{rax} and puts the result,
- $42$, back into \key{rax}. The instruction \key{retq} finishes the
- \key{\_main} function by returning the integer in the \key{rax}
- register to the operating system.
- \begin{figure}[htbp]
- \centering
- \begin{minipage}{0.6\textwidth}
- \begin{lstlisting}
- .globl _main
- _main:
- movq $10, %rax
- addq $32, %rax
- retq
- \end{lstlisting}
- \end{minipage}
- \caption{A simple x86-64 program equivalent to $\BINOP{+}{10}{32}$.}
- \label{fig:p0-x86}
- \end{figure}
- The next example exhibits the use of memory. Figure~\ref{fig:p1-x86}
- lists an x86-64 program that is equivalent to $\BINOP{+}{52}{
- \UNIOP{-}{10} }$. To understand how this x86-64 program uses memory,
- we need to explain a region of memory called called the
- \emph{procedure call stack} (\emph{stack} for short). The stack
- consists of a separate \emph{frame} for each procedure call. The
- memory layout for an individual frame is shown in
- Figure~\ref{fig:frame}. The register \key{rsp} is called the
- \emph{stack pointer} and points to the item at the top of the
- stack. The stack grows downward in memory, so we increase the size of
- the stack by subtracting from the stack pointer. The frame size is
- required to be a multiple of 16 bytes. The register \key{rbp} is the
- \emph{base pointer} which serves two purposes: 1) it saves the
- location of the stack pointer for the procedure that called the
- current one and 2) it is used to access variables associated with the
- current procedure. We number the variables from $1$ to $n$. Variable
- $1$ is stored at address $-8\key{(\%rbp)}$, variable $2$ at
- $-16\key{(\%rbp)}$, etc.
- \begin{figure}
- \centering
- \begin{minipage}{0.6\textwidth}
- \begin{lstlisting}
- .globl _main
- _main:
- pushq %rbp
- movq %rsp, %rbp
- subq $16, %rsp
- movq $10, -8(%rbp)
- negq -8(%rbp)
- movq $52, %rax
- addq -8(%rbp), %rax
- addq $16, %rsp
- popq %rbp
- retq
- \end{lstlisting}
- \end{minipage}
- \caption{An x86-64 program equivalent to $\BINOP{+}{52}{\UNIOP{-}{10} }$.}
- \label{fig:p1-x86}
- \end{figure}
- \begin{figure}
- \centering
- \begin{tabular}{|r|l|} \hline
- Position & Contents \\ \hline
- 8(\key{\%rbp}) & return address \\
- 0(\key{\%rbp}) & old \key{rbp} \\
- -8(\key{\%rbp}) & variable $1$ \\
- -16(\key{\%rbp}) & variable $2$ \\
- \ldots & \ldots \\
- 0(\key{\%rsp}) & variable $n$\\ \hline
- \end{tabular}
- \caption{Memory layout of a frame.}
- \label{fig:frame}
- \end{figure}
- Getting back to the program in Figure~\ref{fig:p1-x86}, the first
- three instructions are the typical prelude for a procedure. The
- instruction \key{pushq \%rbp} saves the base pointer for the procedure
- that called the current one onto the stack and subtracts $8$ from the
- stack pointer. The second instruction \key{movq \%rsp, \%rbp} changes
- the base pointer to the top of the stack. The instruction \key{subq
- \$16, \%rsp} moves the stack pointer down to make enough room for
- storing variables. This program just needs one variable ($8$ bytes)
- but because the frame size is required to be a multiple of 16 bytes,
- it rounds to 16 bytes.
- The next four instructions carry out the work of computing
- $\BINOP{+}{52}{\UNIOP{-}{10} }$. The first instruction \key{movq \$10,
- -8(\%rbp)} stores $10$ in variable $1$. The instruction \key{negq
- -8(\%rbp)} changes variable $1$ to $-10$. The \key{movq \$52, \%rax}
- places $52$ in the register \key{rax} and \key{addq -8(\%rbp), \%rax}
- adds the contents of variable $1$ to \key{rax}, at which point
- \key{rax} contains $42$.
- The last three instructions are the typical \emph{conclusion} of a
- procedure. The \key{addq \$16, \%rsp} instruction moves the stack
- pointer back to point at the old base pointer. The amount added here
- needs to match the amount that was subtracted in the prelude of the
- procedure. Then \key{popq \%rbp} returns the old base pointer to
- \key{rbp} and adds $8$ to the stack pointer. The \key{retq}
- instruction jumps back to the procedure that called this one and
- subtracts 8 from the stack pointer.
- The compiler will need a convenient representation for manipulating
- x86 programs, so we define an abstract syntax for x86 in
- Figure~\ref{fig:x86-ast-a}. The \itm{info} field of the \key{program}
- AST node is for storing auxilliary information that needs to be
- communicated from one pass to the next. The function \key{print-x86}
- provided in the supplemental code converts an x86 abstract syntax tree
- into the text representation for x86 (Figure~\ref{fig:x86-a}).
- \begin{figure}[tbp]
- \fbox{
- \begin{minipage}{0.96\textwidth}
- \[
- \begin{array}{lcl}
- \Arg &::=& \INT{\Int} \mid \REG{\itm{register}}
- \mid \STACKLOC{\Int} \\
- \Instr &::=& (\key{add} \; \Arg\; \Arg) \mid
- (\key{sub} \; \Arg\; \Arg) \mid
- (\key{imul} \; \Arg\;\Arg) \mid
- (\key{neg} \; \Arg) \mid \\
- && (\key{mov} \; \Arg\; \Arg) \mid
- (\key{call} \; \mathit{label}) \mid
- (\key{push}\;\Arg) \mid (\key{pop}\;\Arg) \mid (\key{ret}) \\
- \Prog &::= & (\key{program} \;\itm{info} \; \Instr^{+})
- \end{array}
- \]
- \end{minipage}
- }
- \caption{Abstract syntax for x86-64 assembly.}
- \label{fig:x86-ast-a}
- \end{figure}
- \section{From $S_0$ to x86-64 through $C_0$}
- \label{sec:plan-s0-x86}
- To compile one language to another it helps to focus on the
- differences between the two languages. It is these differences that
- the compiler will need to bridge. What are the differences between
- $S_0$ and x86-64 assembly? Here we list some of the most important the
- differences.
- \begin{enumerate}
- \item x86-64 arithmetic instructions typically take two arguments and
- update the second argument in place. In contrast, $S_0$ arithmetic
- operations only read their arguments and produce a new value.
- \item An argument to an $S_0$ operator can be any expression, whereas
- x86-64 instructions restrict their arguments to integers, registers,
- and memory locations.
- \item An $S_0$ program can have any number of variables whereas x86-64
- has only 16 registers.
- \item Variables in $S_0$ can overshadow other variables with the same
- name. The registers and memory locations of x86-64 all have unique
- names.
- \end{enumerate}
- We ease the challenge of compiling from $S_0$ to x86 by breaking down
- the problem into several steps, dealing with the above differences one
- at a time. The main question then becomes: in what order to we tackle
- these differences? This is often one of the most challenging questions
- that a compiler writer must answer because some orderings may be much
- more difficult to implement than others. It is difficult to know ahead
- of time which orders will be better so often some trial-and-error is
- involved. However, we can try to plan ahead and choose the orderings
- based on what we find out.
- For example, to handle difference \#2 (nested expressions), we shall
- introduce new variables and pull apart the nested expressions into a
- sequence of assignment statements. To deal with difference \#3 we
- will be replacing variables with registers and/or stack
- locations. Thus, it makes sense to deal with \#2 before \#3 so that
- \#3 can replace both the original variables and the new ones. Next,
- consider where \#1 should fit in. Because it has to do with the format
- of x86 instructions, it makes more sense after we have flattened the
- nested expressions (\#2). Finally, when should we deal with \#4
- (variable overshadowing)? We shall be solving this problem by
- renaming variables to make sure they have unique names. Recall that
- our plan for \#2 involves moving nested expressions, which could be
- problematic if it changes the shadowing of variables. However, if we
- deal with \#4 first, then it will not be an issue. Thus, we arrive at
- the following ordering.
- \[
- \xymatrix{
- 4 \ar[r] & 2 \ar[r] & 1 \ar[r] & 3
- }
- \]
- We further simplify the translation from $S_0$ to x86 by identifying
- an intermediate language named $C_0$, roughly half-way between $S_0$
- and x86, to provide a rest stop along the way. The name $C_0$ comes
- from this language being vaguely similar to the $C$ language. The
- differences \#4 and \#1, regarding variables and nested expressions,
- are handled by the passes \textsf{uniquify} and \textsf{flatten} that
- bring us to $C_0$.
- \[\large
- \xymatrix@=50pt{
- S_0 \ar@/^/[r]^-{\textsf{uniquify}} &
- S_0 \ar@/^/[r]^-{\textsf{flatten}} &
- C_0
- }
- \]
- The syntax for $C_0$ is defined in Figure~\ref{fig:c0-syntax}. The
- $C_0$ language supports the same operators as $S_0$ but the arguments
- of operators are now restricted to just variables and integers. The
- \key{let} construct of $S_0$ is replaced by an assignment statement
- and there is a \key{return} construct to specify the return value of
- the program. A program consists of a sequence of statements that
- include at least one \key{return} statement.
- \begin{figure}[htbp]
- \[
- \begin{array}{lcl}
- \Arg &::=& \Int \mid \Var \\
- \Exp &::=& \Arg \mid (\Op \; \Arg^{*})\\
- \Stmt &::=& \ASSIGN{\Var}{\Exp} \mid \RETURN{\Arg} \\
- \Prog & ::= & (\key{program}\;\itm{info}\;\Stmt^{+})
- \end{array}
- \]
- \caption{The $C_0$ intermediate language.}
- \label{fig:c0-syntax}
- \end{figure}
- To get from $C_0$ to x86-64 assembly requires three more steps, which
- we discuss below.
- \[\large
- \xymatrix@=50pt{
- C_0 \ar@/^/[r]^-{\textsf{select\_instr.}}
- & \text{x86}^{*} \ar@/^/[r]^-{\textsf{assign\_homes}}
- & \text{x86}^{*} \ar@/^/[r]^-{\textsf{patch\_instr.}}
- & \text{x86}
- }
- \]
- We handle difference \#1, concerning the format of arithmetic
- instructions, in the \textsf{select\_instructions} pass. The result
- of this pass produces programs consisting of x86-64 instructions that
- use variables.
- %
- As there are only 16 registers, we cannot always map variables to
- registers (difference \#3). Fortunately, the stack can grow quite, so
- we can map variables to locations on the stack. This is handled in the
- \textsf{assign\_homes} pass. The topic of
- Chapter~\ref{ch:register-allocation} is implementing a smarter
- approach in which we make a best-effort to map variables to registers,
- resorting to the stack only when necessary.
- The final pass in our journey to x86 handles an indiosycracy of x86
- assembly. Many x86 instructions have two arguments but only one of the
- arguments may be a memory reference. Because we are mapping variables
- to stack locations, many of our generated instructions will violate
- this restriction. The purpose of the \textsf{patch\_instructions} pass
- is to fix this problem by replacing every bad instruction with a short
- sequence of instructions that use the \key{rax} register.
- \section{Uniquify Variables}
- The purpose of this pass is to make sure that each \key{let} uses a
- unique variable name. For example, the \textsf{uniquify} pass could
- translate
- \[
- \LET{x}{32}{ \BINOP{+}{ \LET{x}{10}{x} }{ x } }
- \]
- to
- \[
- \LET{x.1}{32}{ \BINOP{+}{ \LET{x.2}{10}{x.2} }{ x.1 } }
- \]
- We recommend implementing \textsf{uniquify} as a recursive function
- that mostly just copies the input program. However, when encountering
- a \key{let}, it should generate a unique name for the variable (the
- Racket function \key{gensym} is handy for this) and associate the old
- name with the new unique name in an association list. The
- \textsf{uniquify} function will need to access this association list
- when it gets to a variable reference, so we add another paramter to
- \textsf{uniquify} for the association list.
- \section{Flatten Expressions}
- The purpose of the \textsf{flatten} pass is to get rid of nested
- expressions, such as the $\UNIOP{-}{10}$ in the following program,
- without changing the behavior of the program.
- \[
- \BINOP{+}{52}{ \UNIOP{-}{10} }
- \]
- This can be accomplished by introducing a new variable, assigning the
- nested expression to the new variable, and then using the new variable
- in place of the nested expressions. For example, the above program is
- translated to the following one.
- \[
- \begin{array}{l}
- \ASSIGN{ \itm{x} }{ \UNIOP{-}{10} } \\
- \RETURN{ \BINOP{+}{52}{ \itm{x} } }
- \end{array}
- \]
- We recommend implementing \textsf{flatten} as a recursive function
- that returns two things, 1) the newly flattened expression, and 2) a
- list of assignment statements, one for each of the new variables
- introduced while flattening the expression.
- Take special care for programs such as the following that initialize
- variables with integers or other variables.
- \[
- \LET{a}{42}{ \LET{b}{a}{ b }}
- \]
- This program should be translated to
- \[
- \ASSIGN{a}{42} \;
- \ASSIGN{b}{a} \;
- \RETURN{b}
- \]
- and not the following, which could result from a naive implementation
- of \textsf{flatten}.
- \[
- \ASSIGN{x.1}{42}\;
- \ASSIGN{a}{x.1}\;
- \ASSIGN{x.2}{a}\;
- \ASSIGN{b}{x.2}\;
- \RETURN{b}
- \]
- \section{Select Instructions}
- In the \textsf{select\_instructions} pass we begin the work of
- translating from $C_0$ to x86. The target language of this pass is a
- pseudo-x86 language that still uses variables, so we add an AST node
- of the form $\VAR{\itm{var}}$. The \textsf{select\_instructions} pass
- deals with the differing format of arithmetic operations. For example,
- in $C_0$ an addition operation could take the following form:
- \[
- \ASSIGN{x}{ \BINOP{+}{10}{32} }
- \]
- To translate to x86, we need to express this addition using the
- \key{add} instruction that does an inplace update. So we first move
- $10$ to $x$ then perform the \key{add}.
- \[
- (\key{mov}\,\INT{10}\, \VAR{x})\; (\key{add} \;\INT{32}\; \VAR{x})
- \]
- There are some cases that require special care to avoid generating
- needlessly complicated code. If one of the arguments is the same as
- the left-hand side of the assignment, then there is no need for the
- extra move instruction. For example, the following
- \[
- \ASSIGN{x}{ \BINOP{+}{10}{x} }
- \quad\text{should translate to}\quad
- (\key{add} \; \INT{10}\; \VAR{x})
- \]
- Regarding the \RETURN{e} statement of $C_0$, we recommend treating it
- as an assignment to the \key{rax} register and let the procedure
- conclusion handle the transfer of control back to the calling
- procedure.
- \section{Assign Homes}
- As discussed in Section~\ref{sec:plan-s0-x86}, the
- \textsf{assign\_homes} pass places all of the variables on the stack.
- Consider again the example $S_0$ program $\BINOP{+}{52}{ \UNIOP{-}{10} }$,
- which after \textsf{select\_instructions} looks like the following.
- \[
- \begin{array}{l}
- (\key{mov}\;\INT{10}\; \VAR{x})\\
- (\key{neg}\; \VAR{x})\\
- (\key{mov}\; \INT{52}\; \REG{\itm{rax}})\\
- (\key{add}\; \VAR{x} \REG{\itm{rax}})
- \end{array}
- \]
- The one and only variable $x$ is assigned to stack location
- \key{-8(\%rbp)}, so the \textsf{assign\_homes} pass translates the
- above to
- \[
- \begin{array}{l}
- (\key{mov}\;\INT{10}\; \STACKLOC{{-}8})\\
- (\key{neg}\; \STACKLOC{{-}8})\\
- (\key{mov}\; \INT{52}\; \REG{\itm{rax}})\\
- (\key{add}\; \STACKLOC{{-}8}\; \REG{\itm{rax}})
- \end{array}
- \]
- In the process of assigning stack locations to variables, it is
- convenient to compute and store the size of the frame which will be
- needed later to generate the procedure conclusion.
- \section{Patch Instructions}
- The purpose of this pass is to make sure that each instruction adheres
- to the restrictions regarding which arguments can be memory
- references. For most instructions, the rule is that at most one
- argument may be a memory reference.
- Consider again the following example.
- \[
- \LET{a}{42}{ \LET{b}{a}{ b }}
- \]
- After \textsf{assign\_homes} pass, the above has been translated to
- \[
- \begin{array}{l}
- (\key{mov} \;\INT{42}\; \STACKLOC{{-}8})\\
- (\key{mov}\;\STACKLOC{{-}8}\; \STACKLOC{{-}16})\\
- (\key{mov}\;\STACKLOC{{-}16}\; \REG{\itm{rax}})
- \end{array}
- \]
- The second \key{mov} instruction is problematic because both arguments
- are stack locations. We suggest fixing this problem by moving from the
- source to \key{rax} and then from \key{rax} to the destination, as
- follows.
- \[
- \begin{array}{l}
- (\key{mov} \;\INT{42}\; \STACKLOC{{-}8})\\
- (\key{mov}\;\STACKLOC{{-}8}\; \REG{\itm{rax}})\\
- (\key{mov}\;\REG{\itm{rax}}\; \STACKLOC{{-}16})\\
- (\key{mov}\;\STACKLOC{{-}16}\; \REG{\itm{rax}})
- \end{array}
- \]
- The \key{imul} instruction is a special case because the destination
- argument must be a register.
- \section{Testing with Interpreters}
- The typical way to test a compiler is to run the generated assembly
- code on a diverse set of programs and check whether they behave as
- expected. However, when a compiler is structured as our is, with many
- passes, when there is an error in the generated assembly code it can
- be hard to determine which pass contains the source of the error. A
- good way to isolate the error is to not only test the generated
- assembly code but to also test the output of every pass. This requires
- having interpreters for all the intermediate languages. Indeed, the
- file \key{interp.rkt} in the supplemental code provides interpreters
- for all the intermediate languages described in this book, starting
- with interpreters for $S_0$, $C_0$, and x86 (in abstract syntax).
- The file \key{run-tests.rkt} automates the process of running the
- interpreters on the output programs of each pass and checking their
- result.
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Register Allocation}
- \label{ch:register-allocation}
- In Chapter~\ref{ch:int-exp} we simplified the generation of x86
- assembly by placing all variables on the stack. We can improve the
- performance of the generated code considerably if we instead try to
- place as many variables as possible into registers. The CPU can
- access a register in a single cycle, whereas accessing the stack can
- take from several cycles (to go to cache) to hundreds of cycles (to go
- to main memory). Figure~\ref{fig:reg-eg} shows a program with four
- variables that serves as a running example. We show the source program
- and also the output of instruction selection. At that point the
- program is almost x86 assembly but not quite; it still contains
- variables instead of stack locations or registers.
- \begin{figure}
- \begin{minipage}{0.45\textwidth}
- Source program:
- \begin{lstlisting}
- (let ([v 1])
- (let ([w 46])
- (let ([x (+ v 7)])
- (let ([y (+ 4 x)])
- (let ([z (+ x w)])
- (- z y))))))
- \end{lstlisting}
- \end{minipage}
- \begin{minipage}{0.45\textwidth}
- After instruction selection:
- \begin{lstlisting}
- (program (v w x y z)
- (mov (int 1) (var v))
- (mov (int 46) (var w))
- (mov (var v) (var x))
- (add (int 7) (var x))
- (mov (var x) (var y))
- (add (int 4) (var y))
- (mov (var x) (var z))
- (add (var w) (var z))
- (mov (var z) (reg rax))
- (sub (var y) (reg rax)))
- \end{lstlisting}
- \end{minipage}
- \caption{Running example for this chapter.}
- \label{fig:reg-eg}
- \end{figure}
- The goal of register allocation is to fit as many variables into
- registers as possible. It is often the case that we have more
- variables than registers, so we can't naively map each variable to a
- register. Fortunately, it is also common for different variables to be
- needed during different periods of time, and in such cases the
- variables can be mapped to the same register. Consider variables $x$
- and $y$ in Figure~\ref{fig:reg-eg}. After the variable $x$ is moved
- to $z$ it is no longer needed. Variable $y$, on the other hand, is
- used only after this point, so $x$ and $y$ could share the same
- register. The topic of the next section is how we compute where a
- variable is needed.
- \section{Liveness Analysis}
- A variable is \emph{live} if the variable is used at some later point
- in the program and there is not an intervening assignment to the
- variable.
- %
- To understand the latter condition, consider the following code
- fragment in which there are two writes to $b$. Are $a$ and
- $b$ both live at the same time?
- \begin{lstlisting}[numbers=left,numberstyle=\tiny]
- (mov (int 5) (var a)) ; @$a \gets 5$@
- (mov (int 30) (var b)) ; @$b \gets 30$@
- (mov (var a) (var c)) ; @$c \gets x$@
- (mov (int 10) (var b)) ; @$b \gets 10$@
- (add (var b) (var c)) ; @$c \gets c + b$@
- \end{lstlisting}
- The answer is no because the value $30$ written to $b$ on line 2 is
- never used. The variable $b$ is read on line 5 and there is an
- intervening write to $b$ on line 4, so the read on line 5 receives the
- value written on line 4, not line 2.
- The live variables can be computed by traversing the instruction
- sequence back to front (i.e., backwards in execution order). Let
- $I_1,\ldots, I_n$ be the instruction sequence. We write
- $L_{\mathsf{after}}(k)$ for the set of live variables after
- instruction $I_k$ and $L_{\mathsf{before}}(k)$ for the set of live
- variables before instruction $I_k$. The live variables after an
- instruction are always the same as the live variables before the next
- instruction.
- \begin{equation*}
- L_{\mathsf{after}}(k) = L_{\mathsf{before}}(k+1)
- \end{equation*}
- To start things off, there are no live variables after the last
- instruction, so
- \begin{equation*}
- L_{\mathsf{after}}(n) = \emptyset
- \end{equation*}
- We then apply the following rule repeatedly, traversing the
- instruction sequence back to front.
- \begin{equation*}
- L_{\mathtt{before}}(k) = (L_{\mathtt{after}}(k) - W(k)) \cup R(k),
- \end{equation*}
- where $W(k)$ are the variables written to by instruction $I_k$ and
- $R(k)$ are the variables read by instruction $I_k$.
- Figure~\ref{fig:live-eg} shows the results of live variables analysis
- for the running example. Next to each instruction we write its
- $L_{\mathtt{after}}$ set.
- \begin{figure}[tbp]
- \begin{lstlisting}
- (program (v w x y z)
- (mov (int 1) (var v)) @$\{ v \}$@
- (mov (int 46) (var w)) @$\{ v, w \}$@
- (mov (var v) (var x)) @$\{ w, x \}$@
- (add (int 7) (var x)) @$\{ w, x \}$@
- (mov (var x) (var y)) @$\{ w, x, y\}$@
- (add (int 4) (var y)) @$\{ w, x, y \}$@
- (mov (var x) (var z)) @$\{ w, y, z \}$@
- (add (var w) (var z)) @$\{ y, z \}$@
- (mov (var z) (reg rax)) @$\{ y \}$@
- (sub (var y) (reg rax))) @$\{\}$@
- \end{lstlisting}
- \caption{Running example program annotated with live-after sets.}
- \label{fig:live-eg}
- \end{figure}
- \section{Building the Interference Graph}
- Based on the liveness analysis, we know the program regions where each
- variable is needed. However, during register allocation, we need to
- answer questions of the specific form: are variables $u$ and $v$ ever
- live at the same time? (And therefore cannot be assigned to the same
- register.) To make this question easier to answer, we create an
- explicit data structure, an \emph{interference graph}. An
- interference graph is an undirected graph that has an edge between two
- variables if they are live at the same time, that is, if they
- interfere with each other.
- The most obvious way to compute the interference graph is to look at
- the set of live variables between each statement in the program, and
- add an edge to the graph for every pair of variables in the same set.
- This approach is less than ideal for two reasons. First, it can be
- rather expensive because it takes $O(n^2)$ time to look at every pair
- in a set of $n$ live variables. Second, there is a special case in
- which two variables that are live at the same time do not actually
- interfere with each other: when they both contain the same value
- because we have assigned one to the other.
- A better way to compute the edges of the intereference graph is given
- by the following rules.
- \begin{itemize}
- \item If instruction $I_k$ is a move: (\key{mov} $s$\, $d$), then add
- the edge $(d,v)$ for every $v \in L_{\mathsf{after}}(k)$ unless $v =
- d$ or $v = s$.
- \item If instruction $I_k$ is not a move but some other arithmetic
- instruction such as (\key{add} $s$\, $d$), then add the edge $(d,v)$
- for every $v \in L_{\mathsf{after}}(k)$ unless $v = d$.
-
- \item If instruction $I_k$ is of the form (\key{call}
- $\mathit{label}$), then add an edge $(r,v)$ for every caller-save
- register $r$ and every variable $v \in L_{\mathsf{after}}(k)$.
- \end{itemize}
- Working from the top to bottom of Figure~\ref{fig:live-eg}, $z$
- interferes with $x$, $y$ interferes with $z$, and $w$ interferes with
- $y$ and $z$. The resulting interference graph is shown in
- Figure~\ref{fig:interfere}.
- \begin{figure}[tbp]
- \large
- \[
- \xymatrix@=40pt{
- v \ar@{-}[r] & w \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x \ar@{-}[dl]\\
- & y \ar@{-}[r] & z
- }
- \]
- \caption{Interference graph for the running example.}
- \label{fig:interfere}
- \end{figure}
- \section{Graph Coloring via Sudoku}
- We now come to the main event, mapping variables to registers (or to
- stack locations in the event that we run out of registers). We need
- to make sure not to map two variables to the same register if the two
- variables interfere with each other. In terms of the interference
- graph, this means we cannot map adjacent nodes to the same register.
- If we think of registers as colors, the register allocation problem
- becomes the widely-studied graph coloring
- problem~\citep{Balakrishnan:1996ve,Rosen:2002bh}.
- The reader may be more familar with the graph coloring problem then he
- or she realizes; the popular game of Sudoku is an instance of the
- graph coloring problem. The following describes how to build a graph
- out of a Sudoku board.
- \begin{itemize}
- \item There is one node in the graph for each Sudoku square.
- \item There is an edge between two nodes if the corresponding squares
- are in the same row or column, or if the squares are in the same
- $3\times 3$ region.
- \item Choose nine colors to correspond to the numbers $1$ to $9$.
- \item Based on the initial assignment of numbers to squares in the
- Sudoku board, assign the corresponding colors to the corresponding
- nodes in the graph.
- \end{itemize}
- If you can color the remaining nodes in the graph with the nine
- colors, then you've also solved the corresponding game of Sudoku.
- Given that Sudoku is graph coloring, one can use Sudoku strategies to
- come up with an algorithm for allocating registers. For example, one
- of the basic techniques for Sudoku is Pencil Marks. The idea is that
- you use a process of elimination to determine what numbers still make
- sense for a square, and write down those numbers in the square
- (writing very small). At first, each number might be a
- possibility, but as the board fills up, more and more of the
- possibilities are crossed off (or erased). For example, if the number
- $1$ is assigned to a square, then by process of elimination, you can
- cross off the $1$ pencil mark from all the squares in the same row,
- column, and region. Many Sudoku computer games provide automatic
- support for Pencil Marks. This heuristic also reduces the degree of
- branching in the search tree.
- The Pencil Marks technique corresponds to the notion of color
- \emph{saturation} due to \cite{Brelaz:1979eu}. The
- saturation of a node, in Sudoku terms, is the number of possibilities
- that have been crossed off using the process of elimination mentioned
- above. In graph terminology, we have the following definition:
- \begin{equation*}
- \mathrm{saturation}(u) = |\{ c \;|\; \exists v. v \in \mathrm{Adj}(u)
- \text{ and } \mathrm{color}(v) = c \}|
- \end{equation*}
- where $\mathrm{Adj}(u)$ is the set of nodes adjacent to $u$ and
- the notation $|S|$ stands for the size of the set $S$.
- Using the Pencil Marks technique leads to a simple strategy for
- filling in numbers: if there is a square with only one possible number
- left, then write down that number! But what if there are no squares
- with only one possibility left? One brute-force approach is to just
- make a guess. If that guess ultimately leads to a solution, great. If
- not, backtrack to the guess and make a different guess. Of course,
- this is horribly time consuming. One standard way to reduce the amount
- of backtracking is to use the most-constrained-first heuristic. That
- is, when making a guess, always choose a square with the fewest
- possibilities left (the node with the highest saturation). The idea
- is that choosing highly constrained squares earlier rather than later
- is better because later there may not be any possibilities left.
- In some sense, register allocation is easier than Sudoku because we
- can always cheat and add more numbers by spilling variables to the
- stack. Also, we'd like to minimize the time needed to color the graph,
- and backtracking is expensive. Thus, it makes sense to keep the
- most-constrained-first heuristic but drop the backtracking in favor of
- greedy search (guess and just keep going).
- Figure~\ref{fig:satur-algo} gives the pseudo-code for this simple
- greedy algorithm for register allocation based on saturation and the
- most-constrained-first heuristic, which is roughly equivalent to the
- DSATUR algorithm of \cite{Brelaz:1979eu} (also known as
- saturation degree ordering
- (SDO)~\citep{Gebremedhin:1999fk,Omari:2006uq}). Just as in Sudoku,
- the algorithm represents colors with integers, with the first $k$
- colors corresponding to the $k$ registers in a given machine and the
- rest of the integers corresponding to stack locations.
- \begin{figure}[btp]
- \centering
- \begin{lstlisting}[basicstyle=\rmfamily,deletekeywords={for,from,with,is,not,in,find},morekeywords={while},columns=fullflexible]
- Algorithm: DSATUR
- Input: a graph @$G$@
- Output: an assignment @$\mathrm{color}[v]$@ for each node @$v \in G$@
- @$W \gets \mathit{vertices}(G)$@
- while @$W \neq \emptyset$@ do
- pick a node @$u$@ from @$W$@ with the highest saturation,
- breaking ties randomly
- find the lowest color @$c$@ that is not in @$\{ \mathrm{color}[v] \;|\; v \in \mathrm{Adj}(v)\}$@
- @$\mathrm{color}[u] \gets c$@
- @$W \gets W - \{u\}$@
- \end{lstlisting}
- \caption{Saturation-based greedy graph coloring algorithm.}
- \label{fig:satur-algo}
- \end{figure}
- With this algorithm in hand, let us return to the running example and
- consider how to color the interference graph in
- Figure~\ref{fig:interfere}. Initially, all of the nodes are not yet
- colored and they are unsaturated, so we annotate each of them with a
- dash for their color and an empty set for the saturation.
- \[
- \xymatrix{
- v:-,\{\} \ar@{-}[r] & w:-,\{\} \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x:-,\{\} \ar@{-}[dl]\\
- & y:-,\{\} \ar@{-}[r] & z:-,\{\}
- }
- \]
- We select a maximally saturated node and color it $0$. In this case we
- have a 5-way tie, so we arbitrarily pick $y$. The color $0$ is no
- longer available for $w$, $x$, and $z$ because they interfere with
- $y$.
- \[
- \xymatrix{
- v:-,\{\} \ar@{-}[r] & w:-,\{0\} \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x:-,\{0\} \ar@{-}[dl]\\
- & y:0,\{\} \ar@{-}[r] & z:-,\{0\}
- }
- \]
- Now we repeat the process, selecting another maximally saturated node.
- This time there is a three-way tie between $w$, $x$, and $z$. We color
- $w$ with $1$.
- \[
- \xymatrix{
- v:-,\{1\} \ar@{-}[r] & w:1,\{0\} \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x:-,\{0,1\} \ar@{-}[dl]\\
- & y:0,\{1\} \ar@{-}[r] & z:-,\{0,1\}
- }
- \]
- The most saturated nodes are now $x$ and $z$. We color $x$ with the
- next avialable color which is $2$.
- \[
- \xymatrix{
- v:-,\{1\} \ar@{-}[r] & w:1,\{0,2\} \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x:2,\{0,1\} \ar@{-}[dl]\\
- & y:0,\{1,2\} \ar@{-}[r] & z:-,\{0,1\}
- }
- \]
- We have only two nodes left to color, $v$ and $z$, but $z$ is
- more highly saturaded, so we color $z$ with $2$.
- \[
- \xymatrix{
- v:-,\{1\} \ar@{-}[r] & w:1,\{0,2\} \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x:2,\{0,1\} \ar@{-}[dl]\\
- & y:0,\{1,2\} \ar@{-}[r] & z:2,\{0,1\}
- }
- \]
- The last iteration of the coloring algorithm assigns color $0$ to $v$.
- \[
- \xymatrix{
- v:0,\{1\} \ar@{-}[r] & w:1,\{0,2\} \ar@{-}[r]\ar@{-}[d]\ar@{-}[dr] & x:2,\{0,1\} \ar@{-}[dl]\\
- & y:0,\{1,2\} \ar@{-}[r] & z:2,\{0,1\}
- }
- \]
- With the coloring complete, we can finalize assignment of variables to
- registers and stack locations. Recall that if we have $k$ registers,
- we map the first $k$ colors to registers and the rest to stack
- lcoations. Suppose for the moment that we just have one extra register
- to use for register allocation, just \key{rbx}. Then the following is
- the mapping of colors to registers and stack allocations.
- \[
- \{ 0 \mapsto \key{\%rbx}, \; 1 \mapsto \key{-8(\%rbp)}, \; 2 \mapsto \key{-16(\%rbp)}, \ldots \}
- \]
- Putting this together with the above coloring of the variables, we
- arrive at the following assignment.
- \[
- \{ v \mapsto \key{\%rbx}, \;
- w \mapsto \key{-8(\%rbp)}, \;
- x \mapsto \key{-16(\%rbp)}, \;
- y \mapsto \key{\%rbx}, \;
- z\mapsto \key{-16(\%rbp)} \}
- \]
- Applying this assignment to our running example
- (Figure~\ref{fig:reg-eg}) yields the following program.
- % why frame size of 32? -JGS
- \begin{lstlisting}
- (program 32
- (mov (int 1) (reg rbx))
- (mov (int 46) (stack-loc -8))
- (mov (reg rbx) (stack-loc -16))
- (add (int 7) (stack-loc -16))
- (mov (stack-loc 16) (reg rbx))
- (add (int 4) (reg rbx))
- (mov (stack-loc -16) (stack-loc -16))
- (add (stack-loc -8) (stack-loc -16))
- (mov (stack-loc -16) (reg rax))
- (sub (reg rbx) (reg rax)))
- \end{lstlisting}
- This program is almost an x86 program. The remaining step is to apply
- the patch instructions pass. In this example, the trivial move of
- \key{-16(\%rbp)} to itself is deleted and the addition of
- \key{-8(\%rbp)} to \key{-16(\%rbp)} is fixed by going through
- \key{\%rax}. The following shows the portion of the program that
- changed.
- \begin{lstlisting}
- (add (int 4) (reg rbx))
- (mov (stack-loc -8) (reg rax)
- (add (reg rax) (stack-loc -16))
- \end{lstlisting}
- An overview of all of the passes involved in register allocation is
- shown in Figure~\ref{fig:reg-alloc-passes}.
- \begin{figure}[tbp]
- \[
- \xymatrix{
- C_0 \ar@/^/[r]^-{\textsf{select\_instr.}}
- & \text{x86}^{*} \ar[d]^-{\textsf{uncover\_live}} \\
- & \text{x86}^{*} \ar[d]^-{\textsf{build\_interference}} \\
- & \text{x86}^{*} \ar[d]_-{\textsf{allocate\_register}} \\
- & \text{x86}^{*} \ar@/^/[r]^-{\textsf{patch\_instr.}}
- & \text{x86}
- }
- \]
- \caption{Diagram of the passes for register allocation.}
- \label{fig:reg-alloc-passes}
- \end{figure}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Booleans, Type Checking, and Control Flow}
- \label{ch:bool-types}
- \section{The $S_1$ Language}
- \begin{figure}[htbp]
- \centering
- \fbox{
- \begin{minipage}{0.85\textwidth}
- \[
- \begin{array}{lcl}
- \Op &::=& \ldots \mid \key{and} \mid \key{or} \mid \key{not} \mid \key{eq?} \\
- \Exp &::=& \ldots \mid \key{\#t} \mid \key{\#f} \mid
- \IF{\Exp}{\Exp}{\Exp}
- \end{array}
- \]
- \end{minipage}
- }
- \caption{The $S_1$ language, an extension of $S_0$
- (Figure~\ref{fig:s0-syntax}).}
- \label{fig:s1-syntax}
- \end{figure}
- \section{Type Checking $S_1$ Programs}
- % T ::= Integer | Boolean
- It is common practice to specify a type system by writing rules for
- each kind of AST node. For example, the rule for \key{if} is:
- \begin{quote}
- For any expressions $e_1, e_2, e_3$ and any type $T$, if $e_1$ has
- type \key{bool}, $e_2$ has type $T$, and $e_3$ has type $T$, then
- $\IF{e_1}{e_2}{e_3}$ has type $T$.
- \end{quote}
- It is also common practice to write rules using a horizontal line,
- with the conditions written above the line and the conclusion written
- below the line.
- \begin{equation*}
- \inference{e_1 \text{ has type } \key{bool} &
- e_2 \text{ has type } T & e_3 \text{ has type } T}
- {\IF{e_1}{e_2}{e_3} \text{ has type } T}
- \end{equation*}
- Because the phrase ``has type'' is repeated so often in these type
- checking rules, it is abbreviated to just a colon. So the above rule
- is abbreviated to the following.
- \begin{equation*}
- \inference{e_1 : \key{bool} & e_2 : T & e_3 : T}
- {\IF{e_1}{e_2}{e_3} : T}
- \end{equation*}
- The $\LET{x}{e_1}{e_2}$ construct poses an interesting challenge. The
- variable $x$ is assigned the value of $e_1$ and then $x$ can be used
- inside $e_2$. When we get to an occurrence of $x$ inside $e_2$, how do
- we know what type the variable should be? The answer is that we need
- a way to map from variable names to types. Such a mapping is called a
- \emph{type environment} (aka. \emph{symbol table}). The capital Greek
- letter gamma, written $\Gamma$, is used for referring to type
- environments environments. The notation $\Gamma, x : T$ stands for
- making a copy of the environment $\Gamma$ and then associating $T$
- with the variable $x$ in the new environment. We write $\Gamma(x)$ to
- lookup the associated type for $x$. The type checking rules for
- \key{let} and variables are as follows.
- \begin{equation*}
- \inference{e_1 : T_1 \text{ in } \Gamma &
- e_2 : T_2 \text{ in } \Gamma,x:T_1}
- {\LET{x}{e_1}{e_2} : T_2 \text{ in } \Gamma}
- \qquad
- \inference{\Gamma(x) = T}
- {x : T \text{ in } \Gamma}
- \end{equation*}
- Type checking has roots in logic, and logicians have a tradition of
- writing the environment on the left-hand side and separating it from
- the expression with a turn-stile ($\vdash$). The turn-stile does not
- have any intrinsic meaning per se. It is punctuation that separates
- the environment $\Gamma$ from the expression $e$. So the above typing
- rules are written as follows.
- \begin{equation*}
- \inference{\Gamma \vdash e_1 : T_1 &
- \Gamma,x:T_1 \vdash e_2 : T_2}
- {\Gamma \vdash \LET{x}{e_1}{e_2} : T_2}
- \qquad
- \inference{\Gamma(x) = T}
- {\Gamma \vdash x : T}
- \end{equation*}
- Overall, the statement $\Gamma \vdash e : T$ is an example of what is
- called a \emph{judgment}. In particular, this judgment says, ``In
- environment $\Gamma$, expression $e$ has type $T$.''
- Figure~\ref{fig:S1-type-system} shows the type checking rules for
- $S_1$.
- \begin{figure}
- \begin{gather*}
- \inference{\Gamma(x) = T}
- {\Gamma \vdash x : T}
- \qquad
- \inference{\Gamma \vdash e_1 : T_1 &
- \Gamma,x:T_1 \vdash e_2 : T_2}
- {\Gamma \vdash \LET{x}{e_1}{e_2} : T_2}
- \\[2ex]
- \inference{}{\Gamma \vdash n : \key{Integer}}
- \quad
- \inference{\Gamma \vdash e_i : T_i \ ^{\forall i \in 1\ldots n} & \Delta(\Op,T_1,\ldots,T_n) = T}
- {\Gamma \vdash (\Op \; e_1 \ldots e_n) : T}
- \\[2ex]
- \inference{}{\Gamma \vdash \key{\#t} : \key{Boolean}}
- \quad
- \inference{}{\Gamma \vdash \key{\#f} : \key{Boolean}}
- \quad
- \inference{\Gamma \vdash e_1 : \key{bool} \\
- \Gamma \vdash e_2 : T &
- \Gamma \vdash e_3 : T}
- {\Gamma \vdash \IF{e_1}{e_2}{e_3} : T}
- \end{gather*}
- \caption{Type System for $S_1$.}
- \label{fig:S1-type-system}
- \end{figure}
- \begin{figure}
- \begin{align*}
- \Delta(\key{+},\key{Integer},\key{Integer}) &= \key{Integer} \\
- \Delta(\key{-},\key{Integer},\key{Integer}) &= \key{Integer} \\
- \Delta(\key{-},\key{Integer}) &= \key{Integer} \\
- \Delta(\key{*},\key{Integer},\key{Integer}) &= \key{Integer} \\
- \Delta(\key{read}) &= \key{Integer} \\
- \Delta(\key{and},\key{Boolean},\key{Boolean}) &= \key{Boolean} \\
- \Delta(\key{or},\key{Boolean},\key{Boolean}) &= \key{Boolean} \\
- \Delta(\key{not},\key{Boolean}) &= \key{Boolean} \\
- \Delta(\key{eq?},\key{Integer},\key{Integer}) &= \key{Boolean} \\
- \Delta(\key{eq?},\key{Boolean},\key{Boolean}) &= \key{Boolean}
- \end{align*}
- \caption{Types for the primitives operators.}
- \end{figure}
- \section{The $C_1$ Language}
- \begin{figure}[htbp]
- \[
- \begin{array}{lcl}
- \Arg &::=& \ldots \mid \key{\#t} \mid \key{\#f} \\
- \Stmt &::=& \ldots \mid \IF{\Exp}{\Stmt^{*}}{\Stmt^{*}}
- \end{array}
- \]
- \caption{The $C_1$ intermediate language, an extension of $C_0$
- (Figure~\ref{fig:c0-syntax}).}
- \label{fig:c1-syntax}
- \end{figure}
- \section{Flatten Expressions}
- \section{Select Instructions}
- \section{Register Allocation}
- \section{Patch Instructions}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Tuples and Heap Allocation}
- \label{ch:tuples}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Functions}
- \label{ch:functions}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Lexically Scoped Functions}
- \label{ch:lambdas}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Mutable Data}
- \label{ch:mutable-data}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{The Dynamic Type}
- \label{ch:type-dynamic}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Parametric Polymorphism}
- \label{ch:parametric-polymorphism}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{High-level Optimization}
- \label{ch:high-level-optimization}
- \bibliographystyle{plainnat}
- \bibliography{all}
- \end{document}
- %% LocalWords: Dybvig Waddell Abdulaziz Ghuloum Dipanwita
- %% LocalWords: Sarkar lcl Matz aa representable
|