Explorar el Código

progress on chapter 4

Jeremy Siek hace 4 años
padre
commit
a67c20e5b4
Se han modificado 2 ficheros con 246 adiciones y 201 borrados
  1. 231 197
      book.tex
  2. 15 4
      defs.tex

+ 231 - 197
book.tex

@@ -77,7 +77,7 @@
 \lstset{%
 language=Lisp,
 basicstyle=\ttfamily\small,
-morekeywords={seq,assign,program,block,define,lambda,match},
+morekeywords={seq,assign,program,block,define,lambda,match,goto,if,else,then},
 deletekeywords={read},
 escapechar=|,
 columns=flexible,
@@ -1157,10 +1157,11 @@ simplicity, here we use an association list to represent the
 environment. The \code{interp-R1} function takes the current
 environment, \code{env}, as an extra parameter.  When the interpreter
 encounters a variable, it finds the corresponding value using the
-\code{lookup} function (Appendix~\ref{appendix:utilities}).  When the
-interpreter encounters a \key{Let}, it evaluates the initializing
+\code{dict-ref} function from the \code{racket/dict} package.  When
+the interpreter encounters a \key{Let}, it evaluates the initializing
 expression, extends the environment with the result value bound to the
-variable, then evaluates the body of the \key{Let}.
+variable (using \code{dict-set}), then evaluates the body of the
+\key{Let}.
 
 \begin{figure}[tbp]
 \begin{lstlisting}
@@ -1179,7 +1180,7 @@ variable, then evaluates the body of the \key{Let}.
        (define v1 ((interp-exp env) e1))
        (define v2 ((interp-exp env) e2))
        (fx+ v1 v2)]
-      [(Var x) (lookup x env)]
+      [(Var x) (dict-ref env x)]
       [(Let x e body)
        (define new-env (dict-set env x ((interp-exp env) e)))
        ((interp-exp new-env) body)]
@@ -1261,10 +1262,10 @@ the x86 instructions used in this book.
 \[
 \begin{array}{lcl}
 \Reg &::=& \allregisters{} \\
-\Arg &::=&  \key{\$}\Int \mid \key{\%}\Reg \mid \Int(\key{\%}\Reg) \\
-\Instr &::=& \key{addq} \; \Arg, \Arg \mid
-      \key{subq} \; \Arg, \Arg \mid
-      \key{negq} \; \Arg \mid \key{movq} \; \Arg, \Arg \mid \\
+\Arg &::=&  \key{\$}\Int \mid \key{\%}\Reg \mid \Int\key{(}\key{\%}\Reg\key{)}\\
+\Instr &::=& \key{addq} \; \Arg\key{,} \Arg \mid
+      \key{subq} \; \Arg\key{,} \Arg \mid
+      \key{negq} \; \Arg \mid \key{movq} \; \Arg\key{,} \Arg \mid \\
   &&  \key{callq} \; \mathit{label} \mid
       \key{pushq}\;\Arg \mid \key{popq}\;\Arg \mid \key{retq} \mid \itm{label}\key{:}\; \Instr \\
 \Prog &::= & \key{.globl main}\\
@@ -1289,12 +1290,12 @@ bytes to the address. The resulting address is used to either load or
 store to memory depending on whether it occurs as a source or
 destination argument of an instruction.
 
-An arithmetic instruction such as $\key{addq}\,s,\,d$ reads from the
+An arithmetic instruction such as $\key{addq}\,s\key{,}\,d$ reads from the
 source $s$ and destination $d$, applies the arithmetic operation, then
 writes the result back to the destination $d$.
 %
-The move instruction $\key{movq}\,s\,d$ reads from $s$ and stores the
-result in $d$.
+The move instruction $\key{movq}\,s\key{,}\,d$ reads from $s$ and
+stores the result in $d$.
 %
 The $\key{callq}\,\mathit{label}$ instruction executes the procedure
 specified by the label. We discuss procedure calls in more detail
@@ -1449,24 +1450,25 @@ we introduce conditional branching.
 \begin{figure}[tp]
 \fbox{
 \begin{minipage}{0.96\textwidth}
+\small    
 \[
 \begin{array}{lcl}
-\itm{reg} &::=& \allregisters{} \\
-\Arg &::=&  \IMM{\Int} \mid \REG{\itm{reg}}
-   \mid \DEREF{\itm{reg}}{\Int} \\
-\Instr &::=& \BININSTR{\code{'addq}}{\Arg}{\Arg} \\
-       &\mid& \BININSTR{\code{'subq}}{\Arg}{\Arg} \\
-       &\mid& \BININSTR{\code{'movq}}{\Arg}{\Arg}\\
-       &\mid& \UNIINSTR{\code{'negq}}{\Arg}\\
-       &\mid& \CALLQ{\itm{label}} \mid \RETQ{} \\
-       &\mid& \PUSHQ{\Arg} \mid \POPQ{\Arg} \\
+\Reg &::=& \allregisters{} \\
+\Arg &::=&  \IMM{\Int} \mid \REG{\code{'}\Reg}
+   \mid \DEREF{\Reg}{\Int} \\
+\Instr &::=& \BININSTR{\code{'addq}}{\Arg}{\Arg} 
+       \mid \BININSTR{\code{'subq}}{\Arg}{\Arg} \\
+       &\mid& \BININSTR{\code{'movq}}{\Arg}{\Arg}
+       \mid \UNIINSTR{\code{'negq}}{\Arg}\\
+       &\mid& \CALLQ{\itm{label}} \mid \RETQ{} 
+       \mid \PUSHQ{\Arg} \mid \POPQ{\Arg} \\
 \Block &::= & \BLOCK{\itm{info}}{\Instr^{+}} \\
 x86_0 &::= & \PROGRAM{\itm{info}}{\CFG{\key{(}\itm{label} \,\key{.}\, \Block \key{)}^{+}}}
 \end{array}
 \]
 \end{minipage}
 }
-\caption{Abstract syntax for $x86_0$ assembly.}
+\caption{Abstract syntax of $x86_0$ assembly.}
 \label{fig:x86-ast-a}
 \end{figure}
 
@@ -1693,8 +1695,8 @@ assignment.
 \begin{minipage}{0.96\textwidth}
 \[
 \begin{array}{lcl}
-\Arg &::=& \Int \mid \Var \\
-\Exp &::=& \Arg \mid \key{(read)} \mid \key{(-}~\Arg\key{)} \mid \key{(+}~\Arg~\Arg\key{)}\\
+\Atm &::=& \Int \mid \Var \\
+\Exp &::=& \Atm \mid \key{(read)} \mid \key{(-}~\Atm\key{)} \mid \key{(+}~\Atm~\Atm\key{)}\\
 \Stmt &::=& \Var~\key{=}~\Exp\key{;} \\
 \Tail &::= & \key{return}~\Exp\key{;} \mid \Stmt~\Tail \\
 C_0 & ::= & (\itm{label}\key{:}~ \Tail)^{+}
@@ -1711,9 +1713,9 @@ C_0 & ::= & (\itm{label}\key{:}~ \Tail)^{+}
 \begin{minipage}{0.96\textwidth}
 \[
 \begin{array}{lcl}
-\Arg &::=& \INT{\Int} \mid \VAR{\Var} \\
-\Exp &::=& \Arg \mid \READ{} \mid \NEG{\Arg} \\
- &\mid& \ADD{\Arg}{\Arg}\\
+\Atm &::=& \INT{\Int} \mid \VAR{\Var} \\
+\Exp &::=& \Atm \mid \READ{} \mid \NEG{\Atm} \\
+ &\mid& \ADD{\Atm}{\Atm}\\
 \Stmt &::=& \ASSIGN{\Var}{\Exp} \\
 \Tail &::= & \RETURN{\Exp} \mid \SEQ{\Stmt}{\Tail} \\
 C_0 & ::= & \PROGRAM{\itm{info}}{\CFG{\key{(}\itm{label}\,\key{.}\,\Tail\key{)}^{+}}}
@@ -2079,9 +2081,9 @@ this pass is a variable of x86 that still uses variables, so we add an
 AST node of the form $\VAR{\itm{var}}$ to the $\text{x86}_0$ abstract
 syntax of Figure~\ref{fig:x86-ast-a}.  We recommend implementing the
 \code{select-instructions} in terms of three auxiliary functions, one
-for each of the non-terminals of $C_0$: $\Arg$, $\Stmt$, and $\Tail$.
+for each of the non-terminals of $C_0$: $\Atm$, $\Stmt$, and $\Tail$.
 
-The cases for $\Arg$ are straightforward, variables stay
+The cases for $\Atm$ are straightforward, variables stay
 the same and integer constants are changed to immediates:
 $\INT{n}$ changes to $\IMM{n}$.
 
@@ -3565,7 +3567,7 @@ comparing integers.
      &\mid& \key{\#t} \mid \key{\#f} 
       \mid (\key{and}\;\Exp\;\Exp) \mid (\key{or}\;\Exp\;\Exp)
       \mid (\key{not}\;\Exp) \\
-      &\mid& (\itm{cmp}\;\Exp\;\Exp) \mid \IF{\Exp}{\Exp}{\Exp} \\
+      &\mid& (\itm{cmp}\;\Exp\;\Exp) \mid (\key{if}~\Exp~\Exp~\Exp) \\
   R_2 &::=& \Exp
 \end{array}
 \]
@@ -3582,14 +3584,17 @@ comparing integers.
 \begin{minipage}{0.96\textwidth}
 \[
 \begin{array}{lcl}
+  \itm{bool} &::=& \key{\#t} \mid \key{\#f} \\
   \itm{cmp} &::= & \key{eq?} \mid \key{<} \mid \key{<=} \mid \key{>} \mid \key{>=} \\
 \Exp &::=& \gray{\INT{\Int} \mid \READ{} \mid \NEG{\Exp}} \\
-     &\mid& \gray{\ADD{\Exp}{\Exp}  
-      \mid  \VAR{\Var} \mid \LET{\Var}{\Exp}{\Exp}} \\
-     &\mid& \key{\#t} \mid \key{\#f} 
-      \mid (\key{and}\;\Exp\;\Exp) \mid (\key{or}\;\Exp\;\Exp)
-      \mid (\key{not}\;\Exp) \\
-      &\mid& (\itm{cmp}\;\Exp\;\Exp) \mid \IF{\Exp}{\Exp}{\Exp} \\
+     &\mid& \gray{\ADD{\Exp}{\Exp}}
+      \mid \BINOP{\code{'-}}{\Exp}{\Exp} \\
+     &\mid& \gray{\VAR{\Var} \mid \LET{\Var}{\Exp}{\Exp}} \\
+     &\mid& \BOOL{\itm{bool}} 
+      \mid \AND{\Exp}{\Exp}\\
+     &\mid& \OR{\Exp}{\Exp}
+      \mid \NOT{\Exp} \\
+      &\mid& \BINOP{\code{'}\itm{cmp}}{\Exp}{\Exp} \mid \IF{\Exp}{\Exp}{\Exp} \\
   R_2 &::=& \PROGRAM{\key{'()}}{\Exp}
 \end{array}
 \]
@@ -3775,6 +3780,7 @@ for a program, then interpreting that program should not encounter an
 error.  If it does, there is something wrong with your type checker.
 \end{exercise}
 
+
 \section{Shrink the $R_2$ Language}
 \label{sec:shrink-r2}
 
@@ -3782,7 +3788,7 @@ The $R_2$ language includes several operators that are easily
 expressible in terms of other operators. For example, subtraction is
 expressible in terms of addition and negation.
 \[
- (\key{-}\; e_1 \; e_2) \quad \Rightarrow \quad (\key{+} \; e_1 \; (\key{-} \; e_2))
+ \key{(-}\; e_1 \; e_2\key{)} \quad \Rightarrow \quad (\key{+} \; e_1 \; (\key{-} \; e_2))
 \]
 Several of the comparison operations are expressible in terms of
 less-than and logical negation.
@@ -3791,7 +3797,7 @@ less-than and logical negation.
 \LET{t_1}{e_1}{(\key{not}\;(\key{<}\;e_2\;t_1))}
 \]
 By performing these translations near the front-end of the compiler,
-the later passes of the compiler will not need to deal with these
+the later passes of the compiler do not need to deal with these
 constructs, making those passes shorter. On the other hand, sometimes
 these translations make it more difficult to generate the most
 efficient code with respect to the number of instructions. However,
@@ -3832,50 +3838,53 @@ exclusive-or:
 \end{tabular}
 \end{center}
 For example, $0011 \mathrel{\mathrm{XOR}} 0101 = 0110$.  Notice that
-in row of the table for the bit $1$, the result is the opposite of the
+in the row of the table for the bit $1$, the result is the opposite of the
 second bit.  Thus, the \code{not} operation can be implemented by
-\code{xorq} with $1$ as the first argument: $0001
-\mathrel{\mathrm{XOR}} 0000 = 0001$ and $0001 \mathrel{\mathrm{XOR}}
-0001 = 0000$.
+\code{xorq} with $1$ as the first argument:
+\begin{align*}
+  0001 \mathrel{\mathrm{XOR}} 0000 &= 0001\\
+  0001 \mathrel{\mathrm{XOR}} 0001 &= 0000
+\end{align*}
 
 \begin{figure}[tp]
 \fbox{
 \begin{minipage}{0.96\textwidth}
+\small    
 \[
 \begin{array}{lcl}
-\Arg &::=&  \gray{\INT{\Int} \mid \REG{\itm{register}}
-    \mid (\key{deref}\,\itm{register}\,\Int)} \\
-     &\mid& (\key{byte-reg}\; \itm{register}) \\
+\Arg &::=&  \gray{\IMM{\Int} \mid \REG{\code{'}\Reg} \mid \DEREF{\Reg}{\Int}} 
+     \mid \BYTEREG{\code{'}\Reg} \\
 \itm{cc} & ::= & \key{e} \mid \key{l} \mid \key{le} \mid \key{g} \mid \key{ge} \\
-\Instr &::=& \gray{(\key{addq} \; \Arg\; \Arg) \mid
-             (\key{subq} \; \Arg\; \Arg) \mid
-             (\key{negq} \; \Arg) \mid (\key{movq} \; \Arg\; \Arg)} \\
-      &\mid& \gray{(\key{callq} \; \mathit{label}) \mid
-             (\key{pushq}\;\Arg) \mid
-             (\key{popq}\;\Arg) \mid
-             (\key{retq})} \\
-       &\mid& (\key{xorq} \; \Arg\;\Arg)
-       \mid (\key{cmpq} \; \Arg\; \Arg) \mid (\key{set}\;\itm{cc} \; \Arg) \\
-       &\mid& (\key{movzbq}\;\Arg\;\Arg)
-       \mid  (\key{jmp} \; \itm{label})
-       \mid (\key{jmp-if}\; \itm{cc} \; \itm{label}) \\
-       &\mid& (\key{label} \; \itm{label}) \\
-x86_1 &::= & (\key{program} \;\itm{info} \;(\key{type}\;\itm{type})\; \Instr^{+})
+\Instr &::=& \gray{\BININSTR{\code{'addq}}{\Arg}{\Arg}} 
+       \mid \gray{\BININSTR{\code{'subq}}{\Arg}{\Arg}} \\
+       &\mid& \gray{\BININSTR{\code{'movq}}{\Arg}{\Arg}} 
+       \mid \gray{\UNIINSTR{\code{'negq}}{\Arg}} \\
+       &\mid& \gray{\CALLQ{\itm{label}} \mid \RETQ{}} 
+       \mid \gray{\PUSHQ{\Arg} \mid \POPQ{\Arg}} \\
+       &\mid& \BININSTR{\code{'xorq}}{\Arg}{\Arg}
+       \mid \BININSTR{\code{'cmpq}}{\Arg}{\Arg}\\
+       &\mid& \BININSTR{\code{'set}}{\code{'}\itm{cc}}{\Arg} 
+       \mid \BININSTR{\code{'movzbq}}{\Arg}{\Arg}\\
+       &\mid&  \JMP{\itm{label}}
+       \mid \JMPIF{\code{'}\itm{cc}}{\itm{label}} \\
+%       &\mid& (\key{label} \; \itm{label}) \\
+\Block &::= & \gray{\BLOCK{\itm{info}}{\Instr^{+}}} \\
+x86_1 &::= & \gray{\PROGRAM{\itm{info}}{\CFG{\key{(}\itm{label} \,\key{.}\, \Block \key{)}^{+}}}}
 \end{array}
 \]
 \end{minipage}
 }
-\caption{The x86$_1$ language (extends x86$_0$ of Figure~\ref{fig:x86-ast-a}).}
+\caption{The abstract syntax of $x86_1$ (extends x86$_0$ of Figure~\ref{fig:x86-ast-a}).}
 \label{fig:x86-1}
 \end{figure}
 
-Next we consider the x86 instructions that are relevant for
-compiling the comparison operations. The \key{cmpq} instruction
-compares its two arguments to determine whether one argument is less
-than, equal, or greater than the other argument. The \key{cmpq}
-instruction is unusual regarding the order of its arguments and where
-the result is placed. The argument order is backwards: if you want to
-test whether $x < y$, then write \code{cmpq y, x}. The result of
+Next we consider the x86 instructions that are relevant for compiling
+the comparison operations. The \key{cmpq} instruction compares its two
+arguments to determine whether one argument is less than, equal, or
+greater than the other argument. The \key{cmpq} instruction is unusual
+regarding the order of its arguments and where the result is
+placed. The argument order is backwards: if you want to test whether
+$x < y$, then write \code{cmpq} $y$\code{,} $x$. The result of
 \key{cmpq} is placed in the special EFLAGS register. This register
 cannot be accessed directly but it can be queried by a number of
 instructions, including the \key{set} instruction. The \key{set}
@@ -3883,25 +3892,26 @@ instruction puts a \key{1} or \key{0} into its destination depending
 on whether the comparison came out according to the condition code
 \itm{cc} (\key{e} for equal, \key{l} for less, \key{le} for
 less-or-equal, \key{g} for greater, \key{ge} for greater-or-equal).
-The set instruction has an annoying quirk in that its destination
-argument must be single byte register, such as \code{al}, which is
-part of the \code{rax} register.  Thankfully, the \key{movzbq}
-instruction can then be used to move from a single byte register to a
-normal 64-bit register.
+The \key{set} instruction has an annoying quirk in that its
+destination argument must be single byte register, such as \code{al},
+which is part of the \code{rax} register.  Thankfully, the
+\key{movzbq} instruction can then be used to move from a single byte
+register to a normal 64-bit register.
 
 For compiling the \key{if} expression, the x86 instructions for
-jumping are relevant. The \key{jmp} instruction updates the program
+jumping are relevant. The \key{Jmp} instruction updates the program
 counter to point to the instruction after the indicated label.  The
-\key{jmp-if} instruction updates the program counter to point to the
+\key{JmpIf} instruction updates the program counter to point to the
 instruction after the indicated label depending on whether the result
 in the EFLAGS register matches the condition code \itm{cc}, otherwise
-the \key{jmp-if} instruction falls through to the next
-instruction. Because the \key{jmp-if} instruction relies on the EFLAGS
-register, it is quite common for the \key{jmp-if} to be immediately
+the \key{JmpIf} instruction falls through to the next
+instruction. Because the \key{JmpIf} instruction relies on the EFLAGS
+register, it is quite common for the \key{JmpIf} to be immediately
 preceded by a \key{cmpq} instruction, to set the EFLAGS register.
-Our abstract syntax for \key{jmp-if} differs from the concrete syntax
+Our abstract syntax for \key{JmpIf} differs from the concrete syntax
 for x86 to separate the instruction name from the condition code. For
-example, \code{(jmp-if le foo)} corresponds to \code{jle foo}.
+example, \code{(JmpIf le foo)} corresponds to \code{jle foo}.
+
 
 \section{The $C_1$ Intermediate Language}
 \label{sec:c1}
@@ -3925,24 +3935,28 @@ and \key{goto}'s.
 \begin{figure}[tp]
 \fbox{
 \begin{minipage}{0.96\textwidth}
+\small    
 \[
 \begin{array}{lcl}
-\Arg &::=& \gray{\Int \mid \Var} \mid \key{\#t} \mid \key{\#f} \\
+\Atm &::=& \gray{\INT{\Int} \mid \VAR{\Var}} \mid \BOOL{\itm{bool}} \\
 \itm{cmp} &::= & \key{eq?} \mid \key{<}  \\
-\Exp &::= & \gray{\Arg \mid (\key{read}) \mid (\key{-}\;\Arg) \mid (\key{+} \; \Arg\;\Arg)}
-      \mid (\key{not}\;\Arg) \mid (\itm{cmp}\;\Arg\;\Arg) \\
+\Exp &::= & \gray{\Atm \mid \READ{} \mid \NEG{\Atm} }\\
+     &\mid& \gray{ \ADD{\Atm}{\Atm} } 
+     \mid \UNIOP{\key{not}}{\Atm} \\
+     &\mid& \BINOP{\itm{cmp}}{\Atm}{\Atm} \\
 \Stmt &::=& \gray{ \ASSIGN{\Var}{\Exp} } \\
-\Tail &::= & \gray{\RETURN{\Exp} \mid (\key{seq}\;\Stmt\;\Tail)} \\
-      &\mid& (\key{goto}\,\itm{label}) \mid \IF{(\itm{cmp}\, \Arg\,\Arg)}{(\key{goto}\,\itm{label})}{(\key{goto}\,\itm{label})} \\
-C_1 & ::= & (\key{program}\;\itm{info}\; ((\itm{label}\,\key{.}\,\Tail)^{+}))
+\Tail &::= & \gray{\RETURN{\Exp} \mid \SEQ{\Stmt}{\Tail} } \\
+      &\mid& \GOTO{\itm{label}} \mid \IFSTMT{\key{(}\itm{cmp}\,\Atm\,\Atm\key{)}}{\GOTO{\itm{label}}}{\GOTO{\itm{label}}} \\
+C_1 & ::= & \gray{\PROGRAM{\itm{info}}{\CFG{\key{(}\itm{label}\,\key{.}\,\Tail\key{)}^{+}}}}
 \end{array}
 \]
 \end{minipage}
 }
-\caption{The $C_1$ language, extending $C_0$ with Booleans and conditionals.}
+\caption{The abstract syntax of $C_1$, extending $C_0$ with Booleans and conditionals.}
 \label{fig:c1-syntax}
 \end{figure}
 
+
 \section{Explicate Control}
 \label{sec:explicate-control-r2}
 
@@ -3953,57 +3967,64 @@ addition of \key{if} in $R_2$, things get more interesting.
 As a motivating example, consider the following program that has an
 \key{if} expression nested in the predicate of another \key{if}.
 % s1_38.rkt
+\begin{center}
+\begin{minipage}{0.96\textwidth}
 \begin{lstlisting}
-    (program ()
-      (if (if (eq? (read) 1)
-              (eq? (read) 0)
-              (eq? (read) 2))
-          (+ 10 32)
-          (+ 700 77)))
+(if (if (eq? (read) 1)
+        (eq? (read) 0)
+        (eq? (read) 2))
+    (+ 10 32)
+    (+ 700 77))
 \end{lstlisting}
+\end{minipage}
+\end{center}
 %
 The naive way to compile \key{if} and \key{eq?} would be to handle
 each of them in isolation, regardless of their context.  Each
 \key{eq?} would be translated into a \key{cmpq} instruction followed
 by a couple instructions to move the result from the EFLAGS register
 into a general purpose register or stack location. Each \key{if} would
-be translated into the combination of a \key{cmpq} and \key{jmp-if}.
+be translated into the combination of a \key{cmpq} and \key{JmpIf}.
 However, if we take context into account we can do better and reduce
 the use of \key{cmpq} and EFLAG-accessing instructions.
 
 One idea is to try and reorganize the code at the level of $R_2$,
 pushing the outer \key{if} inside the inner one. This would yield the
 following code.
+\begin{center}
+\begin{minipage}{0.96\textwidth}
 \begin{lstlisting}
-    (if (eq? (read) 1)
-        (if (eq? (read) 0)
-                (+ 10 32)
-                (+ 700 77))
-            (if (eq? (read) 2))
-                (+ 10 32)
-                (+ 700 77))
+(if (eq? (read) 1)
+    (if (eq? (read) 0)
+            (+ 10 32)
+            (+ 700 77))
+        (if (eq? (read) 2))
+            (+ 10 32)
+            (+ 700 77))
 \end{lstlisting}
+\end{minipage}
+\end{center}
 Unfortunately, this approach duplicates the two branches, and a
 compiler must never duplicate code!
 
 We need a way to perform the above transformation, but without
 duplicating code. The solution is straightforward if we think at the
 level of x86 assembly: we can label the code for each of the branches
-and insert \key{goto}'s in all the places that need to execute the
+and insert jumps in all the places that need to execute the
 branches. Put another way, we need to move away from abstract syntax
 \emph{trees} and instead use \emph{graphs}. In particular, we shall
 use a standard program representation called a \emph{control flow
   graph} (CFG), due to Frances Elizabeth \citet{Allen:1970uq}.  Each
 vertex is a labeled sequence of code, called a \emph{basic block}, and
-each edge represents a jump to another block. The \key{program}
-construct of $C_0$ and $C_1$ represents a control flow graph as an
-association list mapping labels to basic blocks. Each block is
+each edge represents a jump to another block. The \key{Program}
+construct of $C_0$ and $C_1$ contains a control flow graph represented
+as an association list mapping labels to basic blocks. Each block is
 represented by the $\Tail$ non-terminal.
 
 Figure~\ref{fig:explicate-control-s1-38} shows the output of the
 \code{remove-complex-opera*} pass and then the
-\code{explicate-control} pass on the example program. We shall walk
-through the output program and then discuss the algorithm.
+\code{explicate-control} pass on the example program. We walk through
+the output program and then discuss the algorithm.
 %
 Following the order of evaluation in the output of
 \code{remove-complex-opera*}, we first have the \code{(read)} and
@@ -4023,24 +4044,22 @@ story for \code{block62} is similar.
 \begin{tabular}{lll}
 \begin{minipage}{0.4\textwidth}
 \begin{lstlisting}
-(program ()
-  (if (if (eq? (read) 1)
-          (eq? (read) 0)
-          (eq? (read) 2))
-      (+ 10 32)
-      (+ 700 77)))  
+(if (if (eq? (read) 1)
+        (eq? (read) 0)
+        (eq? (read) 2))
+    (+ 10 32)
+    (+ 700 77))
 \end{lstlisting}
 \hspace{40pt}$\Downarrow$
 \begin{lstlisting}
-(program ()
-  (if (if (let ([tmp52 (read)])
-            (eq? tmp52 1))
-          (let ([tmp53 (read)]) 
-            (eq? tmp53 0))
-          (let ([tmp54 (read)]) 
-            (eq? tmp54 2)))
-   (+ 10 32)
-   (+ 700 77)))
+(if (if (let ([tmp52 (read)])
+          (eq? tmp52 1))
+        (let ([tmp53 (read)]) 
+          (eq? tmp53 0))
+        (let ([tmp54 (read)]) 
+          (eq? tmp54 2)))
+ (+ 10 32)
+ (+ 700 77))
 \end{lstlisting}
 \end{minipage}
 &
@@ -4048,28 +4067,36 @@ $\Rightarrow$
 &
 \begin{minipage}{0.55\textwidth}
 \begin{lstlisting}
-(program ()
-  ((block62 .
-     (seq (assign tmp54 (read))
-          (if (eq? tmp54 2)
-              (goto block59)
-              (goto block60))))
-   (block61 .
-     (seq (assign tmp53 (read))
-          (if (eq? tmp53 0)
-               (goto block57)
-               (goto block58))))
-   (block60 . (goto block56))
-   (block59 . (goto block55))
-   (block58 . (goto block56))
-   (block57 . (goto block55))
-   (block56 . (return (+ 700 77)))
-   (block55 . (return (+ 10 32)))
-   (start . 
-     (seq (assign tmp52 (read))
-          (if (eq? tmp52 1)
-               (goto block61)
-               (goto block62))))))
+block62:
+    tmp54 = (read);
+    if (eq? tmp54 2) then
+       goto block59;
+    else
+       goto block60;
+block61:
+    tmp53 = (read);
+    if (eq? tmp53 0) then
+       goto block57;
+    else
+       goto block58;
+block60:
+    goto block56;
+block59:
+    goto block55;
+block58:
+    goto block56;
+block57:
+    goto block55;
+block56:
+    return (+ 700 77);
+block55:
+    return (+ 10 32);
+start:
+    tmp52 = (read);
+    if (eq? tmp52 1) then
+       goto block61;
+    else
+       goto block62;
 \end{lstlisting}
 \end{minipage}
 \end{tabular} 
@@ -4086,47 +4113,46 @@ is that it includes trivial blocks, such as \code{block57} through
 \code{block60}, that only jump to another block. We discuss a solution
 to this problem in Section~\ref{sec:opt-jumps}.
 
-Recall that in Section~\ref{sec:explicate-control-r1} we implement the
-\code{explicate-control} pass for $R_1$ using two mutually recursive
-functions, \code{explicate-tail} and
-\code{explicate-assign}.  The former function translated
-expressions in tail position whereas the later function translated
-expressions on the right-hand-side of a \key{let}. With the addition
-of \key{if} expression in $R_2$ we have a new kind of context to deal
-with: the predicate position of the \key{if}. So we shall need another
-function, \code{explicate-pred}, that takes an $R_2$
-expression and two pieces of $C_1$ code (two $\Tail$'s) for the
-then-branch and else-branch. The output of
-\code{explicate-pred} is a $C_1$ $\Tail$.  However, these
-three functions also need to construct the control-flow graph, which we
-recommend they do via updates to a global variable. Next we consider
-the specific additions to the tail and assign functions, and some of
-cases for the pred function.
-
-The \code{explicate-tail} function needs an additional case
-for \key{if}. The branches of the \key{if} inherit the current
-context, so they are in tail position.  Let $B_1$ be the result of
-\code{explicate-tail} on the $\itm{thn}$ branch and $B_2$ be
-the result of apply \code{explicate-tail} to the $\itm{else}$
-branch. Then the \key{if} translates to the block $B_3$ which is the
-result of applying \code{explicate-pred} to the predicate
-$\itm{cnd}$ and the blocks $B_1$ and $B_2$.
+Recall that in Section~\ref{sec:explicate-control-r1} we implement
+\code{explicate-control} for $R_1$ using two mutually recursive
+functions, \code{explicate-tail} and \code{explicate-assign}.  The
+former function translates expressions in tail position whereas the
+later function translates expressions on the right-hand-side of a
+\key{let}. With the addition of \key{if} expression in $R_2$ we have a
+new kind of context to deal with: the predicate position of the
+\key{if}. We need another function, \code{explicate-pred}, that takes
+an $R_2$ expression and two pieces of $C_1$ code (two $\Tail$'s) for
+the then-branch and else-branch. The output of \code{explicate-pred}
+is a $C_1$ $\Tail$.  However, these three functions also need to
+construct the control-flow graph, which we recommend they do via
+updates to a global variable (be careful!). Next we consider the
+specific additions to the tail and assign functions, and some of cases
+for the pred function.
+
+The \code{explicate-tail} function needs an additional case for
+\key{if}. The branches of the \key{if} inherit the current context, so
+they are in tail position.  Let $B_1$ be the result of
+\code{explicate-tail} on the $\itm{thn}$ branch and $B_2$ be the
+result of apply \code{explicate-tail} to the $\itm{else}$ branch. Then
+the \key{if} as a whole translates to the block $B_3$ which is the
+result of applying \code{explicate-pred} to the predicate $\itm{cnd}$
+and the blocks $B_1$ and $B_2$.
 \[
     (\key{if}\; \itm{cnd}\; \itm{thn}\; \itm{els}) \quad\Rightarrow\quad B_3
 \]
 
 Next we consider the case for \key{if} in the
-\code{explicate-assign} function. So the context of the
+\code{explicate-assign} function. The context of the
 \key{if} is an assignment to some variable $x$ and then the control
-continues to some block $B_1$.  The code that we generate for both the
-$\itm{thn}$ and $\itm{els}$ branches shall both need to continue to
+continues to some block $B_1$.  The code that we generate for the
+$\itm{thn}$ and $\itm{els}$ branches needs to continue to
 $B_1$, so we add $B_1$ to the control flow graph with a fresh label
 $\ell_1$.  Again, the branches of the \key{if} inherit the current
 context, so that are in assignment positions.  Let $B_2$ be the result
 of applying \code{explicate-assign} to the $\itm{thn}$ branch,
-variable $x$, and the block \code{(goto $\ell_1$)}.  Let $B_3$ be the
+variable $x$, and the block \GOTO{$\ell_1$}.  Let $B_3$ be the
 result of applying \code{explicate-assign} to the $\itm{else}$
-branch, variable $x$, and the block \code{(goto $\ell_1$)}. The
+branch, variable $x$, and the block \GOTO{$\ell_1$}. The
 \key{if} translates to the block $B_4$ which is the result of applying
 \code{explicate-pred} to the predicate $\itm{cnd}$ and the
 blocks $B_2$ and $B_3$.
@@ -4139,14 +4165,18 @@ expression that can have type \code{Boolean}. We detail a few cases
 here and leave the rest for the reader. The input to this function is
 an expression and two blocks, $B_1$ and $B_2$, for the branches of the
 enclosing \key{if}. One of the base cases of this function is when the
-expression is a less-than comparison. We translate it to a
-conditional \code{goto}. We need labels for the two branches $B_1$ and
-$B_2$, so we add them to the control flow graph and obtain some labels
-$\ell_1$ and $\ell_2$. The translation of the less-than comparison is
-as follows.
+expression is a less-than comparison. We translate it to a conditional
+goto. We need labels for the two branches $B_1$ and $B_2$, so we add
+them to the control flow graph and obtain some labels $\ell_1$ and
+$\ell_2$. The translation of the less-than comparison is as follows.
 \[
-(\key{<}\;e_1\;e_2) \quad\Rightarrow\quad
-(\key{if}\;(\key{<}\;e_1\;e_2)\;(\key{goto}\;\ell_1)\;(\key{goto}\;\ell_2))
+(\key{<}~e_1~e_2) \quad\Rightarrow\quad
+\begin{array}{l}
+\key{if}~(\key{<}~e_1~e_2)~\key{then} \\
+\qquad\key{goto}~\ell_1\key{;}\\
+\key{else}\\
+\qquad\key{goto}~\ell_2\key{;}
+\end{array}
 \]
 
 The case for \key{if} in \code{explicate-pred} is particularly
@@ -4157,8 +4187,8 @@ and obtain the labels $\ell_1$ and $\ell_2$.  The branches $\itm{thn}$
 and $\itm{els}$ of the current \key{if} inherit their context from the
 current one, i.e., predicate context. So we apply
 \code{explicate-pred} to $\itm{thn}$ with the two blocks
-\code{(goto $\ell_1$)} and \code{(goto $\ell_2$)}, to obtain $B_3$.
-Similarly for the $\itm{els}$ branch, to obtain $B_4$.
+\GOTO{$\ell_1$} and \GOTO{$\ell_2$}, to obtain $B_3$.
+Proceed in a similar way with the $\itm{els}$ branch, to obtain $B_4$.
 Finally, we apply \code{explicate-pred} to
 the predicate $\itm{cnd}$ and the blocks $B_3$ and $B_4$
 to obtain the result $B_5$.
@@ -4183,9 +4213,9 @@ Recall that the \code{select-instructions} pass lowers from our
 $C$-like intermediate representation to the pseudo-x86 language, which
 is suitable for conducting register allocation. The pass is
 implemented using three auxiliary functions, one for each of the
-non-terminals $\Arg$, $\Stmt$, and $\Tail$.
+non-terminals $\Atm$, $\Stmt$, and $\Tail$.
 
-For $\Arg$, we have new cases for the Booleans.  We take the usual
+For $\Atm$, we have new cases for the Booleans.  We take the usual
 approach of encoding them as integers, with true as 1 and false as 0.
 \[
 \key{\#t} \Rightarrow \key{1}
@@ -4195,21 +4225,25 @@ approach of encoding them as integers, with true as 1 and false as 0.
 
 For $\Stmt$, we discuss a couple cases.  The \code{not} operation can
 be implemented in terms of \code{xorq} as we discussed at the
-beginning of this section. Given an assignment \code{(assign
-  $\itm{lhs}$ (not $\Arg$))}, if the left-hand side $\itm{lhs}$ is
-the same as $\Arg$, then just the \code{xorq} suffices:
+beginning of this section. Given an assignment
+$\itm{lhs}$ \key{=} \key{(not} $\Arg$\key{);},
+if the left-hand side $\itm{lhs}$ is
+the same as $\Arg$, then just the \code{xorq} suffices.
 \[
-(\key{assign}\; x\; (\key{not}\; x))
+x~\key{=}~ \key{(not}\; x\key{);}
 \quad\Rightarrow\quad
-((\key{xorq}\;(\key{int}\;1)\;x'))
+\key{xorq}~\key{\$}1\key{,}~x
 \]
 Otherwise, a \key{movq} is needed to adapt to the update-in-place
 semantics of x86. Let $\Arg'$ be the result of recursively processing
 $\Arg$. Then we have
 \[
-(\key{assign}\; \itm{lhs}\; (\key{not}\; \Arg))
+\itm{lhs}~\key{=}~ \key{(not}\; \Arg\key{);}
 \quad\Rightarrow\quad
-((\key{movq}\; \Arg'\; \itm{lhs}') \; (\key{xorq}\;(\key{int}\;1)\;\itm{lhs}'))
+\begin{array}{l}
+\key{movq}~\Arg'\key{,}~\itm{lhs}\\
+\key{xorq}~\key{\$}1\key{,}~\itm{lhs}
+\end{array}
 \]
 
 Next consider the cases for \code{eq?} and less-than comparison.
@@ -5455,9 +5489,9 @@ the register allocator.
 \begin{minipage}{0.96\textwidth}
 \[
 \begin{array}{lcl}
-\Arg &::=&  \gray{  \INT{\Int} \mid \REG{\itm{register}}
-    \mid (\key{deref}\,\itm{register}\,\Int) } \\
-   &\mid& \gray{ (\key{byte-reg}\; \itm{register})  }
+\Arg &::=&  \gray{  \INT{\Int} \mid \REG{\Reg}
+    \mid (\key{deref}\,\Reg\,\Int) } \\
+   &\mid& \gray{ (\key{byte-reg}\; \Reg)  }
    \mid (\key{global-value}\; \itm{name}) \\
 \itm{cc} & ::= & \gray{  \key{e} \mid \key{l} \mid \key{le} \mid \key{g} \mid \key{ge}  } \\
 \Instr &::=& \gray{(\key{addq} \; \Arg\; \Arg) \mid
@@ -6312,9 +6346,9 @@ language, whose syntax is defined in Figure~\ref{fig:x86-3}.
 \begin{minipage}{0.96\textwidth}
 \[
 \begin{array}{lcl}
-\Arg &::=&  \gray{  \INT{\Int} \mid \REG{\itm{register}}
-    \mid (\key{deref}\,\itm{register}\,\Int) } \\
-   &\mid& \gray{ (\key{byte-reg}\; \itm{register}) 
+\Arg &::=&  \gray{  \INT{\Int} \mid \REG{\Reg}
+    \mid (\key{deref}\,\Reg\,\Int) } \\
+   &\mid& \gray{ (\key{byte-reg}\; \Reg) 
     \mid   (\key{global-value}\; \itm{name})  } \\
    &\mid& (\key{fun-ref}\; \itm{label})\\
 \itm{cc} & ::= & \gray{  \key{e} \mid \key{l} \mid \key{le} \mid \key{g} \mid \key{ge}  } \\

+ 15 - 4
defs.tex

@@ -10,7 +10,8 @@
 \newcommand{\Block}{\itm{block}}
 \newcommand{\Tail}{\itm{tail}}
 \newcommand{\Prog}{\itm{prog}}
-\newcommand{\Arg}{\itm{atm}}
+\newcommand{\Arg}{\itm{arg}}
+\newcommand{\Atm}{\itm{atm}}
 \newcommand{\Reg}{\itm{reg}}
 \newcommand{\Int}{\itm{int}}
 \newcommand{\Var}{\itm{var}}
@@ -19,21 +20,29 @@
 \newcommand{\code}[1]{\texttt{#1}}
 
 \newcommand{\INT}[1]{\key{(Int}\;#1\key{)}}
+\newcommand{\BOOL}[1]{\key{(Bool}\;#1\key{)}}
 \newcommand{\READ}{\key{(Prim}\;\code{'read}\;\key{'())}}
 \newcommand{\NEG}[1]{\key{(Prim}\;\code{'-}\;\code{(list}\;#1\;\code{))}}
 \newcommand{\PROGRAM}[2]{\code{(Program}\;#1\;#2\code{)}}
 \newcommand{\ADD}[2]{\key{(Prim}\;\code{'+}\;\code{(list}\;#1\;#2\code{))}}
-\newcommand{\UNIOP}[2]{(\key{#1}~#2)}
-\newcommand{\BINOP}[3]{(\key{#1}~#2~#3)}
+\newcommand{\AND}[2]{\key{(Prim}\;\code{'and}\;\code{(list}\;#1\;#2\code{))}}
+\newcommand{\OR}[2]{\key{(Prim}\;\code{'or}\;\code{(list}\;#1\;#2\code{))}}
+\newcommand{\NOT}[1]{\key{(Prim}\;\code{'not}\;\code{(list}\;#1\;\code{))}}
+\newcommand{\UNIOP}[2]{\key{(Prim}\;#1\;\code{(list}\;#2\;\code{))}}
+\newcommand{\BINOP}[3]{\key{(Prim}\;#1\;\code{(list}\;#2\;#3\code{))}}
 \newcommand{\VAR}[1]{\key{(Var}\;#1\key{)}}
 \newcommand{\LET}[3]{\key{(Let}~#1~#2~#3\key{)}}
+\newcommand{\IF}[3]{\key{(If}\,#1\;#2\;#3\key{)}}
 
 \newcommand{\ASSIGN}[2]{\key{(Assign}~#1\;#2\key{)}}
 \newcommand{\RETURN}[1]{\key{(Return}~#1\key{)}}
 \newcommand{\SEQ}[2]{\key{(Seq}~#1~#2\key{)}}
+\newcommand{\GOTO}[1]{\key{(Goto}~#1\key{)}}
+\newcommand{\IFSTMT}[3]{\key{(IfStmt}\,#1\;#2\;#3\key{)}}
 
 \newcommand{\IMM}[1]{\key{(Imm}\;#1\key{)}}
 \newcommand{\REG}[1]{\key{(Reg}\;#1\key{)}}
+\newcommand{\BYTEREG}[1]{\key{(ByteReg}\;#1\key{)}}
 \newcommand{\DEREF}[2]{\key{(Deref}~#1~#2\key{)}}
 \newcommand{\CFG}[1]{\key{(CFG}\;#1\key{)}}
 \newcommand{\BLOCK}[2]{\key{(Block}\;#1\;#2\key{)}}
@@ -44,8 +53,10 @@
 \newcommand{\RETQ}{\key{(Retq)}}
 \newcommand{\PUSHQ}[1]{\key{(Pushq}~#1\key{)}}
 \newcommand{\POPQ}[1]{\key{(Popq}~#1\key{)}}
+\newcommand{\JMP}[1]{\key{(Jmp}~#1\key{)}}
+\newcommand{\JMPIF}[2]{\key{(JmpIf}~#1~#2\key{)}}
+
 
-\newcommand{\IF}[3]{(\key{if}\,#1\;#2\;#3)}
 
 \newcommand{\TTKEY}[1]{{\normalfont\tt #1}}