6 年之前 · 0007ead16b
--- a/book.tex
+++ b/book.tex
@@ -3182,7 +3182,7 @@ programs to make sure that your move biasing is working properly.
 
															 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
														
 
															-\chapter{Booleans, Control Flow, and Type Checking}
														
 
															+\chapter{Booleans and Control Flow}
														
 
															 \label{ch:bool-types}
														
 
															 The $R_0$ and $R_1$ languages only had a single kind of value, the
														
@@ -3283,11 +3283,9 @@ With the addition of the comparison operations, there are quite a few
 
															 primitive operations and the interpreter code for them is somewhat
														
 
															 repetitive. In Figure~\ref{fig:interp-R2} we factor out the different
														
 
															 parts into the \code{interp-op} function and the similar parts into
														
 
															-the one match clause shown in Figure~\ref{fig:interp-R2}. It is
														
 
															-important for that match clause to come last because it matches
														
 
															-\emph{any} compound S-expression.  We do not use \code{interp-op} for
														
 
															-the \code{and} operation because of the short-circuiting behavior in
														
 
															-the order of evaluation of its arguments.
														
 
															+the one match clause shown in Figure~\ref{fig:interp-R2}. We do not
														
 
															+use \code{interp-op} for the \code{and} operation because of the
														
 
															+short-circuiting behavior in the order of evaluation of its arguments.
														
 
															 \begin{figure}[tbp]
														
@@ -3409,9 +3407,9 @@ association list.
 
															    (define (type-check-R2 env)
														
 
															      (lambda (e)
														
 
															        (match e
														
 
															-         [`(program ,body)
														
 
															+         [`(program ,info ,body)
														
 
															           (define ty ((type-check-exp '()) body))
														
 
															-          `(program (type ,ty) ,body)]
														
 
															+          `(program ,info ,body)]
														
 
															          )))
														
 
															 \end{lstlisting}
														
 
															 \caption{Skeleton of a type checker for the $R_2$ language.}
														
@@ -3450,16 +3448,31 @@ for a program, then interpreting that program should not encounter an
 
															 error.  If it does, there is something wrong with your type checker.
														
 
															 \end{exercise}
														
 
															-\section{Shrink}
														
 
															+\section{Shrink the $R_2$ Language}
														
 
															 \label{sec:shrink-r2}
														
 
															+The $R_2$ language includes several operators that are easily
														
 
															+expressible in terms of other operators. For example, subtraction is
														
 
															+expressible in terms of addition and negation
														
 
															+\[
														
 
															+ (\key{-}\; e_1 \; e_2) \quad \Rightarrow \quad (\key{+} \; e_1 \; (\key{-} \; e_2))
														
 
															+\]
														
 
															+and several of the comparison operations are expressible in terms of
														
 
															+less-than and logical negation.
														
 
															+\[
														
 
															+  (\key{<=}\; e_1 \; e_2) \quad \Rightarrow \quad (\key{not}\;(\key{<}\;e_2\;e_1))
														
 
															+\]
														
 
															+By performing these translations near the front-end of the compiler,
														
 
															+the later passes of the compiler will not need to deal with these
														
 
															+constructs, making those passes shorter.
														
 
															-UNDER CONSTRUCTION
														
 
															-
														
 
															-\section{Remove Complex Operators and Operands}
														
 
															-\label{sec:rco-r2}
														
 
															-
														
 
															-UNDER CONSTRUCTION
														
 
															+\begin{exercise}\normalfont
														
 
															+  Implement the pass \code{shrink} that removes subtraction,
														
 
															+  \key{and}, \key{or}, \key{<=}, \key{>}, and \key{>=} from the language
														
 
															+  by translating them to other constructs in $R_2$.  Create tests to
														
 
															+  make sure that the behavior of all of these constructs stays the
														
 
															+  same after translation.
														
 
															+\end{exercise}
														
 
															 \section{The $C_1$ Intermediate Language}
														
 
															 \label{sec:c1}
														
@@ -3469,12 +3482,12 @@ language, but we need to grow that intermediate language to handle the
 
															 new features in $R_2$: Booleans and conditional expressions.
														
 
															 Figure~\ref{fig:c1-syntax} shows the new features of $C_1$; we add
														
 
															 logic and comparison operators to the $\Exp$ non-terminal, the
														
 
															-literals \key{\#t} and \key{\#f} to the $\Arg$ non-terminal, and we
														
 
															-add an \key{if} statement. The \key{if} statement of $C_1$ includes a
														
 
															-built-in comparison (unlike the $C$ language), which is needed for
														
 
															-improving code generation in Section~\ref{sec:opt-if}.  We do not
														
 
															-include \key{and} in $C_1$ because it is not needed in the translation
														
 
															-of $R_2$'s \key{and} construct.
														
 
															+literals \key{\#t} and \key{\#f} to the $\Arg$ non-terminal.
														
 
															+Regarding control flow, $C_1$ differs considerably from $R_2$.
														
 
															+Instead of \key{if} expressions, it has goto's and conditional goto's
														
 
															+in the grammar for $\Tail$. This means that basic blocks may now end
														
 
															+with a goto (to another block), or a conditional goto, which jumps to
														
 
															+one of two other blocks depending on the outcome of the comparison.
														
 
															 \begin{figure}[tp]
														
 
															 \fbox{
														
@@ -3482,7 +3495,7 @@ of $R_2$'s \key{and} construct.
 
															 \[
														
 
															 \begin{array}{lcl}
														
 
															 \Arg &::=& \gray{\Int \mid \Var} \mid \key{\#t} \mid \key{\#f} \\
														
 
															-\itm{cmp} &::= & \key{eq?} \mid \key{<} \mid \key{<=} \mid \key{>} \mid \key{>=} \\
														
 
															+\itm{cmp} &::= & \key{eq?} \mid \key{<}  \\
														
 
															 \Exp &::= & \gray{\Arg \mid (\key{read}) \mid (\key{-}\;\Arg) \mid (\key{+} \; \Arg\;\Arg)}
														
 
															       \mid (\key{not}\;\Arg) \mid (\itm{cmp}\;\Arg\;\Arg) \\
														
 
															 \Stmt &::=& \gray{\ASSIGN{\Var}{\Exp} \mid \RETURN{\Arg}} \\
														
@@ -3596,11 +3609,6 @@ C_1 & ::= & (\key{program}\;\itm{info}\; ((\itm{label}\,\key{.}\,\Tail)^{+}))
 
															 %% (Appendix~\ref{appendix:interp}).
														
 
															 %% \end{exercise}
														
 
															-\section{Explicate Control}
														
 
															-\label{sec:explicate-control-r2}
														
 
															-
														
 
															-UNDER CONSTRUCTION
														
 
															-
														
 
															 \section{XOR, Comparisons, and Control Flow in x86}
														
 
															 \label{sec:x86-1}
														
@@ -3689,10 +3697,129 @@ counter to point to the instruction after the indicated label.  The
 
															 instruction after the indicated label depending on whether the result
														
 
															 in the EFLAGS register matches the condition code \itm{cc}, otherwise
														
 
															 the \key{jmp-if} instruction falls through to the next
														
 
															-instruction. Our abstract syntax for \key{jmp-if} differs from the
														
 
															-concrete syntax for x86 to separate the instruction name from the
														
 
															-condition code. For example, \code{(jmp-if le foo)} corresponds to
														
 
															-\code{jle foo}.
														
 
															+instruction. Because the \key{jmp-if} instruction relies on the EFLAGS
														
 
															+register, it is quite common for the \key{jmp-if} to be immediately
														
 
															+preceeded by a \key{cmpq} instruction, to set the EFLAGS regsiter.
														
 
															+Our abstract syntax for \key{jmp-if} differs from the concrete syntax
														
 
															+for x86 to separate the instruction name from the condition code. For
														
 
															+example, \code{(jmp-if le foo)} corresponds to \code{jle foo}.
														
 
															+
														
 
															+
														
 
															+\section{Explicate Control}
														
 
															+\label{sec:explicate-control-r2}
														
 
															+
														
 
															+Recall that the purpose of \code{explicate-control} is to make the
														
 
															+order of evaluation explicit in the syntax of the program.  With the
														
 
															+addition of \key{if} in $R_2$, things get more interesting.
														
 
															+
														
 
															+As a motivating example, consider the following program that has an
														
 
															+\key{if} expression nested in the predicate of another \key{if}.
														
 
															+% s1_38.rkt
														
 
															+\begin{lstlisting}
														
 
															+(program ()
														
 
															+  (if (if (eq? (read) 1)
														
 
															+          (eq? (read) 0)
														
 
															+          (eq? (read) 2))
														
 
															+      (+ 10 32)
														
 
															+      (+ 700 77)))
														
 
															+\end{lstlisting}
														
 
															+%
														
 
															+The naive way to compile \key{if} and \key{eq?} would be to handle
														
 
															+each of them in isolation, regardless of their context.  Each
														
 
															+\key{eq?} would be translated into a \key{cmpq} instruction (and a
														
 
															+couple more instructions, as we shall see in
														
 
															+Section~\ref{sec:select-r2}), and each \key{if} would be translated
														
 
															+into the combination of a \key{cmpq} and \key{jmp-if}.  However, if we
														
 
															+take context into account we can do better and reduce the use of
														
 
															+\key{cmpq} and other instructions.
														
 
															+
														
 
															+Another thought is to try and reorganize the code at the level of
														
 
															+$R_2$, pushing the outer \key{if} inside the inner one. This would
														
 
															+yield the following code.
														
 
															+\begin{lstlisting}
														
 
															+(if (eq? (read) 1)
														
 
															+    (if (eq? (read) 0)
														
 
															+            (+ 10 32)
														
 
															+            (+ 700 77))
														
 
															+        (if (eq? (read) 2))
														
 
															+            (+ 10 32)
														
 
															+            (+ 700 77))
														
 
															+\end{lstlisting}
														
 
															+Unfortunately, this approach forced us to duplicate the two branches,
														
 
															+and a compiler must never duplicate code.
														
 
															+
														
 
															+We need a way to perform the above transformation, but without
														
 
															+duplicating code. The solution to this problem is straightforward if
														
 
															+we instead think at the level of x86 assembly: we just need to label
														
 
															+the code for the two branches and insert jumps to those labels. Put
														
 
															+another way, we need to move away from abstract syntax \emph{trees}
														
 
															+and instead use \emph{graphs}. In particular, we shall use a standard
														
 
															+program representation called a \emph{control flow graph} (CFG).  Each
														
 
															+vertex is a labeled sequence of code, called a \emph{basic block}, and
														
 
															+each edge represents a jump to a label. Now we are in a position to
														
 
															+appreciate the \key{program} form of $C_0$ and $C_1$, which includes
														
 
															+an association list mapping labels to basic blocks.
														
 
															+
														
 
															+Recall that in Section~\ref{sec:explicate-control-r1} we implemented
														
 
															+this pass for $R_1$ in terms of the mutually recursive
														
 
															+\code{explicate-control-tail} and \code{explicate-control-assign}
														
 
															+functions.  The former function translated expressions in tail
														
 
															+position whereas the later function translated expressions on the
														
 
															+right-hand-side of a \key{let}. With the addition of \key{if} we have
														
 
															+a new kind of context: the predicate position of the \key{if}. So we
														
 
															+shall need another function, \code{explicate-control-pred}, that takes
														
 
															+an $R_2$ expression and two pieces of $C_1$ code (two $\Tail$'s) for
														
 
															+the then-branch and else-branch. The output of
														
 
															+\code{explicate-control-pred} is a $C_1$ $\Tail$.  However, these
														
 
															+three functions also need to contruct the control-flow graph, which we
														
 
															+recommend they do via updates to a global variable.
														
 
															+
														
 
															+
														
 
															+UNDER CONSTRUCTION
														
 
															+
														
 
															+\begin{tabular}{lll}
														
 
															+\begin{minipage}{0.35\textwidth}
														
 
															+\begin{lstlisting}
														
 
															+(program ()
														
 
															+  (if (if (eq? (read) 1)
														
 
															+          (eq? (read) 0)
														
 
															+          (eq? (read) 2))
														
 
															+      (+ 10 32)
														
 
															+      (+ 700 77)))  
														
 
															+\end{lstlisting}
														
 
															+\end{minipage}
														
 
															+&
														
 
															+$\Rightarrow$
														
 
															+&
														
 
															+\begin{minipage}{0.55\textwidth}
														
 
															+\begin{lstlisting}
														
 
															+(program ()
														
 
															+  ((block62 .
														
 
															+     (seq (assign tmp54 (read))
														
 
															+          (if (eq? tmp54 2)
														
 
															+              (goto block59)
														
 
															+              (goto block60))))
														
 
															+   (block61 .
														
 
															+     (seq (assign tmp53 (read))
														
 
															+          (if (eq? tmp53 0)
														
 
															+               (goto block57)
														
 
															+               (goto block58))))
														
 
															+   (block60 . (goto block56))
														
 
															+   (block59 . (goto block55))
														
 
															+   (block58 . (goto block56))
														
 
															+   (block57 . (goto block55))
														
 
															+   (block56 . (return (+ 700 77)))
														
 
															+   (block55 . (return (+ 10 32)))
														
 
															+   (start . 
														
 
															+     (seq (assign tmp52 (read))
														
 
															+          (if (eq? tmp52 1)
														
 
															+               (goto block61)
														
 
															+               (goto block62))))))
														
 
															+\end{lstlisting}
														
 
															+\end{minipage}
														
 
															+\end{tabular} \\
														
 
															+
														
 
															+
														
 
															 \section{Select Instructions}
														
 
															 \label{sec:select-r2}
														
@@ -4008,14 +4135,13 @@ if_end21289:
 
															 \node (x86-2) at (3,-2)  {\large $\text{x86}^{*}$};
														
 
															 \node (x86-3) at (6,-2)  {\large $\text{x86}^{*}$};
														
 
															 \node (x86-4) at (9,-2) {\large $\text{x86}^{*}$};
														
 
															-\node (x86-5) at (12,-2) {\large $\text{x86}$};
														
 
															-\node (x86-6) at (12,-4) {\large $\text{x86}^{\dagger}$};
														
 
															+\node (x86-5) at (12,-2) {\large $\text{x86}^{\dagger}$};
														
 
															 \node (x86-2-1) at (3,-4)  {\large $\text{x86}^{*}$};
														
 
															 \node (x86-2-2) at (6,-4)  {\large $\text{x86}^{*}$};
														
 
															 \path[->,bend left=15] (R2) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R2-2);
														
 
															-\path[->,bend left=15] (R2-2) edge [above] node {\ttfamily\footnotesize shrink} (R2-3);
														
 
															+\path[->,bend left=15] (R2-2) edge [above] node {\ttfamily\footnotesize\color{red} shrink} (R2-3);
														
 
															 \path[->,bend left=15] (R2-3) edge [above] node {\ttfamily\footnotesize uniquify} (R2-4);
														
 
															 \path[->,bend left=15] (R2-4) edge [above] node {\ttfamily\footnotesize remove-complex.} (R2-5);
														
 
															 \path[->,bend left=15] (R2-5) edge [right] node {\ttfamily\footnotesize\color{red} explicate-control} (C1-1);
														
@@ -4024,9 +4150,8 @@ if_end21289:
 
															 \path[->,bend left=15] (x86-2) edge [right] node {\ttfamily\footnotesize\color{red} uncover-live} (x86-2-1);
														
 
															 \path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize build-inter.} (x86-2-2);
														
 
															 \path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize allocate-reg.} (x86-3);
														
 
															-\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize\color{red} lower-cond.} (x86-4);
														
 
															-\path[->,bend left=15] (x86-4) edge [above] node {\ttfamily\footnotesize\color{red} patch-instr.} (x86-5);
														
 
															-\path[->,bend right=15] (x86-5) edge [left] node {\ttfamily\footnotesize print-x86} (x86-6);
														
 
															+\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize\color{red} patch-instr.} (x86-4);
														
 
															+\path[->,bend left=15] (x86-4) edge [above] node {\ttfamily\footnotesize\color{red} print-x86 } (x86-5);
														
 
															 \end{tikzpicture}
														
 
															 \caption{Diagram of the passes for $R_2$, a language with conditionals.}
														
 
															  \label{fig:R2-passes}
														
@@ -4035,9 +4160,11 @@ if_end21289:
 
															 Figure~\ref{fig:R2-passes} gives an overview of all the passes needed
														
 
															 for the compilation of $R_2$.
														
 
															-\section{Challenge: Optimizing Jumps$^{*}$}
														
 
															+\section{Challenge: Optimize Jumps$^{*}$}
														
 
															 \label{sec:opt-jumps}
														
 
															+UNDER CONSTRUCTION
														
 
															+
														
 
															 %% \section{Challenge: Optimizing Conditions$^{*}$}
														
 
															 %% \label{sec:opt-if}