123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307 |
- \documentclass[10pt]{book}
- \usepackage[T1]{fontenc}
- \usepackage[utf8]{inputenc}
- \usepackage{lmodern}
- \usepackage{hyperref}
- \usepackage{graphicx}
- \usepackage[english]{babel}
- \usepackage{listings}
- \usepackage{amsmath}
- \usepackage{amsthm}
- \usepackage{amssymb}
- \usepackage{natbib}
- \usepackage{stmaryrd}
- \usepackage{xypic}
- \lstset{%
- basicstyle=\ttfamily%
- }
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % 'dedication' environment: To add a dedication paragraph at the start of book %
- % Source: http://www.tug.org/pipermail/texhax/2010-June/015184.html %
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \newenvironment{dedication}
- {
- \cleardoublepage
- \thispagestyle{empty}
- \vspace*{\stretch{1}}
- \hfill\begin{minipage}[t]{0.66\textwidth}
- \raggedright
- }
- {
- \end{minipage}
- \vspace*{\stretch{3}}
- \clearpage
- }
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- % Chapter quote at the start of chapter %
- % Source: http://tex.stackexchange.com/a/53380 %
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \makeatletter
- \renewcommand{\@chapapp}{}% Not necessary...
- \newenvironment{chapquote}[2][2em]
- {\setlength{\@tempdima}{#1}%
- \def\chapquote@author{#2}%
- \parshape 1 \@tempdima \dimexpr\textwidth-2\@tempdima\relax%
- \itshape}
- {\par\normalfont\hfill--\ \chapquote@author\hspace*{\@tempdima}\par\bigskip}
- \makeatother
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \newcommand{\itm}[1]{\ensuremath{\mathit{#1}}}
- \newcommand{\Atom}{\itm{atom}}
- \newcommand{\Stmt}{\itm{stmt}}
- \newcommand{\Exp}{\itm{exp}}
- \newcommand{\Ins}{\itm{instr}}
- \newcommand{\Prog}{\itm{prog}}
- \newcommand{\Arg}{\itm{arg}}
- \newcommand{\Int}{\itm{int}}
- \newcommand{\Var}{\itm{var}}
- \newcommand{\Op}{\itm{op}}
- \newcommand{\key}[1]{\texttt{#1}}
- \newcommand{\READ}{(\key{read})}
- \newcommand{\UNIOP}[2]{(\key{#1}\,#2)}
- \newcommand{\BINOP}[3]{(\key{#1}\,#2\,#3)}
- \newcommand{\LET}[3]{(\key{let}\,([#1\;#2])\,#3)}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \title{\Huge \textbf{Essentials of Compilation} \\
- \huge From Scheme to x86 Assembly}
- \author{\textsc{Jeremy G. Siek}
- \thanks{\url{http://homes.soic.indiana.edu/jsiek/}}
- }
- \begin{document}
- \frontmatter
- \maketitle
- \begin{dedication}
- This book is dedicated to the programming languages group at Indiana University.
- \end{dedication}
- \tableofcontents
- %\listoffigures
- %\listoftables
- \mainmatter
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter*{Preface}
- \cite{Sarkar:2004fk}
- \cite{Keep:2012aa}
- \cite{Ghuloum:2006bh}
- %\section*{Structure of book}
- % You might want to add short description about each chapter in this book.
- %\section*{About the companion website}
- %The website\footnote{\url{https://github.com/amberj/latex-book-template}} for %this file contains:
- %\begin{itemize}
- % \item A link to (freely downlodable) latest version of this document.
- % \item Link to download LaTeX source for this document.
- % \item Miscellaneous material (e.g. suggested readings etc).
- %\end{itemize}
- \section*{Acknowledgements}
- Need to give thanks to
- \begin{itemize}
- \item Kent Dybvig
- \item Daniel P. Freidman
- \item Oscar Waddell
- \item Abdulaziz Ghuloum
- \item Dipanwita Sarkar
- \end{itemize}
- %\mbox{}\\
- %\noindent Amber Jain \\
- %\noindent \url{http://amberj.devio.us/}
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- \chapter{Integers and Variables}
- %\begin{chapquote}{Author's name, \textit{Source of this quote}}
- %``This is a quote and I don't know who said this.''
- %\end{chapquote}
- The $S_0$ language includes integers, operations on integers,
- (arithmetic and input), and variable definitions. The syntax of the
- $S_0$ language is defined by the grammar in
- Figure~\ref{fig:s0-syntax}. This language is rich enough to exhibit
- several compilation techniques but simple enough so that we can
- implement a compiler for it in two weeks of hard work. To give the
- reader a feeling for the scale of this first compiler, the instructor
- solution for the $S_0$ compiler consists of 6 recursive functions and
- a few small helper functions that together span 256 lines of code.
- \begin{figure}[htbp]
- \centering
- \fbox{
- \begin{minipage}{0.85\textwidth}
- \[
- \begin{array}{lcl}
- \Op &::=& \key{+} \mid \key{-} \mid \key{*} \mid \key{read} \\
- \Exp &::=& \Int \mid (\Op \; \Exp^{+}) \mid \Var \mid \LET{\Var}{\Exp}{\Exp}
- \end{array}
- \]
- \end{minipage}
- }
- \caption{The syntax of the $S_0$ language. The abbreviation \Op{} is
- short for operator, \Exp{} is short for expression, \Int{} for integer,
- and \Var{} for variable.}
- \label{fig:s0-syntax}
- \end{figure}
- The result of evaluating an expression is a value. For $S_0$, values
- are integers. To make it straightforward to map these integers onto
- x86-64 assembly~\citep{Matz:2013aa}, we restrict the integers to just
- those representable with 64-bits, the range $-2^{63}$ to $2^{63}$.
- We will walk through some examples of $S_0$ programs, commenting on
- aspects of the language that will be relevant to compiling it. We
- start with one of the simplest $S_0$ programs; it adds two integers.
- \[
- \BINOP{+}{10}{32}
- \]
- The result is $42$, as you might expected.
- %
- The next example demonstrates that expressions may be nested within
- eachother, in this case nesting several additions and negations.
- \[
- \BINOP{+}{10}{ \UNIOP{-}{ \BINOP{+}{12}{20} } }
- \]
- What is the result of the above program?
- The \key{let} construct stores a value in a variable which can then be
- used within the body of the \key{let}. So the following program stores
- $32$ in $x$ and then computes $\BINOP{+}{10}{x}$, producing $42$.
- \[
- \LET{x}{ \BINOP{+}{12}{20} }{ \BINOP{+}{10}{x} }
- \]
- When there are multiple \key{let}'s for the same variable, the closest
- enclosing \key{let} is used. Consider the following program with two
- \key{let}'s that define variables named $x$.
- \[
- \LET{x}{32}{ \BINOP{+}{ \LET{x}{10}{x} }{ x } }
- \]
- For the purposes of showing which variable uses correspond to which
- definitions, the following shows the $x$'s annotated with subscripts
- to distinguish them.
- \[
- \LET{x_1}{32}{ \BINOP{+}{ \LET{x_2}{10}{x_2} }{ x_1 } }
- \]
- The \key{read} operation prompts the user of the program for an
- integer. Given an input of $10$, the following program produces $42$.
- \[
- \BINOP{+}{(\key{read})}{32}
- \]
- We include the \key{read} operation in $S_0$ to demonstrate that order
- of evaluation can make a different. Given the input $52$ then $10$,
- the following produces $42$ (and not $-42$).
- \[
- \LET{x}{\READ}{ \LET{y}{\READ}{ \BINOP{-}{x}{y} } }
- \]
- The initializing expression of a \key{let} is always evaluated before
- the body of the \key{let}, so in the above, the \key{read} for $x$ is
- performed before the \key{read} for $y$.
- %
- The behavior of the following program is somewhat subtle because
- Scheme does not specify an evaluation order for arguments of an
- operator such as $-$.
- \[
- \BINOP{-}{\READ}{\READ}
- \]
- Given the input $42$ then $10$, the above program can result in either
- $42$ or $-42$, depending on the whims of the Scheme implementation.
- The goal for this chapter is to implement a compiler that translates
- any program $p \in S_0$ into a x86-64 assembly program $p'$ such that
- the assembly program exhibits the same behavior on Intel hardward as
- the $S_0$ program running in a Scheme implementation.
- \[
- \xymatrix{
- p \in S_0 \ar[rr]^{\text{compile}} \ar[drr]_{\text{run in Scheme}\quad} && p' \in \text{x86-64} \ar[d]^{\quad\text{run on an x86 machine}}\\
- & & n \in \mathbb{Z}
- }
- \]
- In the next section we introduce enough of the x86-64 assembly
- language to compile $S_0$.
- \section{x86-64 Assembly}
- An x86-64 program is a sequence of instructions. The instructions
- manipulate a fixed number of variables called \emph{registers} and can
- load and store values into \emph{memory}. Memory is a mapping of
- 64-bit addresses to 64-bit values. The syntax $n(r)$ is used to read
- the address $a$ stored in register $r$ and then offset it by $n$,
- producing the address $a + n$. The arithmetic instructions, such as
- $\key{addq}\,s\,d$, read from the source $s$ and destination argument
- $d$, apply the arithmetic operation, then stores the result in the
- destination $d$. In this case, computing $d \gets d + s$. The move
- instruction, $\key{movq}\,s\,d$ reads from $s$ and stores the result
- in $d$. The $\key{callq}\,\mathit{label}$ instruction executes the
- function specified by the label, which we shall use to implement
- \key{read}. Figure~\ref{fig:x86-a} defines the syntax for this
- subset of the x86-64 assembly language.
- \begin{figure}[tbp]
- \fbox{
- \begin{minipage}{0.96\textwidth}
- \[
- \begin{array}{lcl}
- \itm{register} &::=& \key{rax} \mid \key{rbx} \mid \key{rcx}
- \mid \key{rdx} \mid \key{rsi} \mid \key{rdi} \mid \\
- && \key{r8} \mid \key{r9} \mid \key{r10}
- \mid \key{r11} \mid \key{r12} \mid \key{r13}
- \mid \key{r14} \mid \key{r15} \\
- \Arg &::=& \Int \mid \key{\%}\itm{register} \mid \Int(\key{\%}\itm{register}) \\
- \Ins &::=& \key{addq} \; \Arg \; \Arg \mid
- \key{subq} \; \Arg \; \Arg \mid
- \key{imulq} \; \Arg \; \Arg \mid
- \key{negq} \; \Arg \mid \\
- && \key{movq} \; \Arg \; \Arg \mid
- \key{callq} \; \mathit{label} \\
- \Prog &::= & \Ins^{*}
- \end{array}
- \]
- \end{minipage}
- }
- \caption{A subset of the x86-64 assembly language.}
- \label{fig:x86-a}
- \end{figure}
- \begin{figure}[tbp]
- \begin{lstlisting}
- .globl _main
- _main:
- movq $10, %rax
- addq $32, %rax
- retq
- \end{lstlisting}
- \caption{A simple x86-64 program equivalent to $(+ \; 10 \; 32)$.}
- \label{fig:p0-x86}
- \end{figure}
- \section{An intermediate C-like language}
- \[
- \begin{array}{lcl}
- \Atom &::=& \Int \mid \Var \\
- \Exp &::=& \Atom \mid (\Op \; \Atom^{*})\\
- \Stmt &::=& (\key{assign} \; \Var \; \Exp) \mid (\key{return}\; \Exp)
- \end{array}
- \]
- \bibliographystyle{plainnat}
- \bibliography{all}
- \end{document}
|