4 年之前 · 9c77ff6356
--- a/all.bib
+++ b/all.bib
@@ -2,13 +2,73 @@
 
															 %% http://bibdesk.sourceforge.net/
														
 
															-%% Created for Jeremy Siek at 2020-09-23 09:59:19 -0400 
														
 
															+%% Created for Jeremy Siek at 2020-09-28 12:47:15 -0400 
														
 
															 %% Saved with string encoding Unicode (UTF-8) 
														
 
															+@inproceedings{Osterlund:2016aa,
														
 
															+	Abstract = { On-the-fly Garbage Collectors (GCs) are the state-of-the-art concurrent GC algorithms today. Everything is done concurrently, but phases are separated by blocking handshakes. Hence, progress relies on the scheduler to let application threads (mutators) run into GC checkpoints to reply to the handshakes. For a non-blocking GC, these blocking handshakes need to be addressed. Therefore, we propose a new non-blocking handshake to replace previous blocking handshakes. It guarantees scheduling-independent operation level progress without blocking. It is scheduling independent but requires some other OS support. It allows bounded waiting for threads that are currently running on a processor, regardless of threads that are not running on a processor. We discuss this non-blocking handshake in two GC algorithms for stack scanning and copying objects. They pave way for a future completely non-blocking GC by solving hard open theory problems when OS support is permitted. The GC algorithms were integrated to the G1 GC of OpenJDK for Java. GC pause times were reduced to 12.5% compared to the original G1 on average in DaCapo. For a memory intense benchmark, latencies were reduced from 174 ms to 0.67 ms for the 99.99% percentile. The improved latency comes at a cost of 15% lower throughput. },
														
 
															+	Address = {New York, NY, USA},
														
 
															+	Author = {\"{O}sterlund, Erik and L\"{o}we, Welf},
														
 
															+	Booktitle = {Proceedings of the 2016 ACM SIGPLAN International Symposium on Memory Management},
														
 
															+	Date-Added = {2020-09-28 12:46:53 -0400},
														
 
															+	Date-Modified = {2020-09-28 12:46:55 -0400},
														
 
															+	Doi = {10.1145/2926697.2926701},
														
 
															+	Isbn = {9781450343176},
														
 
															+	Keywords = {compaction, block-free, garbage collection, stack scanning, non-blocking},
														
 
															+	Location = {Santa Barbara, CA, USA},
														
 
															+	Numpages = {12},
														
 
															+	Pages = {1--12},
														
 
															+	Publisher = {Association for Computing Machinery},
														
 
															+	Series = {ISMM 2016},
														
 
															+	Title = {Block-Free Concurrent GC: Stack Scanning and Copying},
														
 
															+	Url = {https://doi.org/10.1145/2926697.2926701},
														
 
															+	Year = {2016},
														
 
															+	Bdsk-Url-1 = {https://doi.org/10.1145/2926697.2926701}}
														
 
															+
														
 
															+@inproceedings{Jacek:2019aa,
														
 
															+	Abstract = {Generational garbage collectors are one of the most common types of automatic memory management. We can minimize the costs they incur by carefully choosing the points in a program's execution at which they run. However, this decision is generally based on simple, crude heuristics. Instead, we train random forest classifiers to decide when to collect based on features gathered from a running program. This reduces the total cost of collection in both time and space. We demonstrate useful generalization of learned policies to unseen traces of the same program, showing this approach may be fruitful for further investigation.},
														
 
															+	Address = {New York, NY, USA},
														
 
															+	Author = {Jacek, Nicholas and Moss, J. Eliot B.},
														
 
															+	Booktitle = {Proceedings of the 2019 ACM SIGPLAN International Symposium on Memory Management},
														
 
															+	Date-Added = {2020-09-28 12:42:43 -0400},
														
 
															+	Date-Modified = {2020-09-28 12:42:45 -0400},
														
 
															+	Doi = {10.1145/3315573.3329983},
														
 
															+	Isbn = {9781450367226},
														
 
															+	Keywords = {garbage collection, machine learning},
														
 
															+	Location = {Phoenix, AZ, USA},
														
 
															+	Numpages = {11},
														
 
															+	Pages = {53--63},
														
 
															+	Publisher = {Association for Computing Machinery},
														
 
															+	Series = {ISMM 2019},
														
 
															+	Title = {Learning When to Garbage Collect with Random Forests},
														
 
															+	Url = {https://doi.org/10.1145/3315573.3329983},
														
 
															+	Year = {2019},
														
 
															+	Bdsk-Url-1 = {https://doi.org/10.1145/3315573.3329983}}
														
 
															+
														
 
															+@inproceedings{Gamari:2020aa,
														
 
															+	Abstract = {Modern hardware and applications require runtime systems that can operate under large-heap and low-latency requirements. For many client/server or interactive applications, reducing average and maximum pause times is more important than maximizing throughput.  The GHC Haskell runtime system version 8.10.1 offers a new latency-optimized garbage collector as an alternative to the existing throughput-optimized copying garbage collector. This paper details the latency-optimized GC design, which is a generational collector integrating GHC's existing collector and bump-pointer allocator with a non-moving collector and non-moving heap suggested by Ueno and Ohori. We provide an empirical analysis on the latency/throughput tradeoffs. We augment the established nofib micro benchmark with a response-time focused benchmark that simulates real-world applications such as LRU caches, web search, and key-value stores.},
														
 
															+	Address = {New York, NY, USA},
														
 
															+	Author = {Gamari, Ben and Dietz, Laura},
														
 
															+	Booktitle = {Proceedings of the 2020 ACM SIGPLAN International Symposium on Memory Management},
														
 
															+	Date-Added = {2020-09-28 12:40:46 -0400},
														
 
															+	Date-Modified = {2020-09-28 12:40:48 -0400},
														
 
															+	Doi = {10.1145/3381898.3397214},
														
 
															+	Isbn = {9781450375665},
														
 
															+	Keywords = {garbage collection implementations},
														
 
															+	Location = {London, UK},
														
 
															+	Numpages = {13},
														
 
															+	Pages = {87--99},
														
 
															+	Publisher = {Association for Computing Machinery},
														
 
															+	Series = {ISMM 2020},
														
 
															+	Title = {Alligator Collector: A Latency-Optimized Garbage Collector for Functional Programming Languages},
														
 
															+	Url = {https://doi.org/10.1145/3381898.3397214},
														
 
															+	Year = {2020},
														
 
															+	Bdsk-Url-1 = {https://doi.org/10.1145/3381898.3397214}}
														
 
															+
														
 
															 @book{Cooper:2011aa,
														
 
															 	Author = {Keith Cooper and Linda Torczon},
														
 
															 	Date-Added = {2020-09-23 09:57:47 -0400},
														
--- a/book.tex
+++ b/book.tex
@@ -1936,7 +1936,7 @@ of how to use \key{interp-tests}.
 
															 \end{exercise}
														
 
															 \section{Remove Complex Operands}
														
 
															-\label{sec:remove-complex-opera-r1}
														
 
															+\label{sec:remove-complex-opera-R1}
														
 
															 The \code{remove-complex-opera*} pass compiles $R_1$ programs into
														
 
															 $R_1$ programs in which the arguments of operations are atomic
														
@@ -2197,17 +2197,17 @@ addq $10, x
 
															 The \key{read} operation does not have a direct counterpart in x86
														
 
															 assembly, so we have instead implemented this functionality in the C
														
 
															-language, with the function \code{read\_int} in the file
														
 
															-\code{runtime.c}. In general, we refer to all of the functionality in
														
 
															-this file as the \emph{runtime system}, or simply the \emph{runtime}
														
 
															-for short. When compiling your generated x86 assembly code, you need
														
 
															-to compile \code{runtime.c} to \code{runtime.o} (an ``object file'',
														
 
															-using \code{gcc} option \code{-c}) and link it into the
														
 
															-executable. For our purposes of code generation, all you need to do is
														
 
															-translate an assignment of \key{read} into some variable $\itm{lhs}$
														
 
															-(for left-hand side) into a call to the \code{read\_int} function
														
 
															-followed by a move from \code{rax} to the left-hand side.  The move
														
 
															-from \code{rax} is needed because the return value from
														
 
															+language~\citep{Kernighan:1988nx}, with the function \code{read\_int}
														
 
															+in the file \code{runtime.c}. In general, we refer to all of the
														
 
															+functionality in this file as the \emph{runtime system}, or simply the
														
 
															+\emph{runtime} for short. When compiling your generated x86 assembly
														
 
															+code, you need to compile \code{runtime.c} to \code{runtime.o} (an
														
 
															+``object file'', using \code{gcc} option \code{-c}) and link it into
														
 
															+the executable. For our purposes of code generation, all you need to
														
 
															+do is translate an assignment of \key{read} into some variable
														
 
															+$\itm{lhs}$ (for left-hand side) into a call to the \code{read\_int}
														
 
															+function followed by a move from \code{rax} to the left-hand side.
														
 
															+The move from \code{rax} is needed because the return value from
														
 
															 \code{read\_int} goes into \code{rax}, as is the case in general.  \\
														
 
															 \begin{tabular}{lll}
														
 
															 \begin{minipage}{0.3\textwidth}
														
@@ -3348,7 +3348,7 @@ shown in Figure~\ref{fig:reg-alloc-passes}.
 
															 \node (x86-2) at (3,-2)  {\large $\text{x86}^{*}$};
														
 
															 \node (x86-3) at (6,-2)  {\large $\text{x86}^{*}$};
														
 
															 \node (x86-4) at (9,-2) {\large $\text{x86}$};
														
 
															-\node (x86-5) at (12,-2) {\large $\text{x86}^{\dagger}$};
														
 
															+\node (x86-5) at (9,-4) {\large $\text{x86}^{\dagger}$};
														
 
															 \node (x86-2-1) at (3,-4)  {\large $\text{x86}^{*}$};
														
 
															 \node (x86-2-2) at (6,-4)  {\large $\text{x86}^{*}$};
														
@@ -3361,7 +3361,7 @@ shown in Figure~\ref{fig:reg-alloc-passes}.
 
															 \path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize\color{red} build-inter.} (x86-2-2);
														
 
															 \path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize\color{red} allocate-reg.} (x86-3);
														
 
															 \path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize patch-instr.} (x86-4);
														
 
															-\path[->,bend left=15] (x86-4) edge [above] node {\ttfamily\footnotesize print-x86} (x86-5);
														
 
															+\path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize print-x86} (x86-5);
														
 
															 \end{tikzpicture}
														
 
															 \caption{Diagram of the passes for $R_1$ with register allocation.}
														
 
															 \label{fig:reg-alloc-passes}
														
@@ -4216,7 +4216,8 @@ C_1 & ::= & \gray{\PROGRAM{\itm{info}}{\CFG{\key{(}\itm{label}\,\key{.}\,\Tail\k
 
															 \]
														
 
															 \end{minipage}
														
 
															 }
														
 
															-\caption{The abstract syntax of $C_1$, extending $C_0$ with Booleans and conditionals.}
														
 
															+\caption{The abstract syntax of $C_1$, an extention of $C_0$
														
 
															+  (Figure~\ref{fig:c0-syntax}).}
														
 
															 \label{fig:c1-syntax}
														
 
															 \end{figure}
														
@@ -4796,29 +4797,28 @@ conclusion:
 
															 \node (R2-2) at (3,2)  {\large $R_2$};
														
 
															 \node (R2-3) at (6,2)  {\large $R_2$};
														
 
															 \node (R2-4) at (9,2)  {\large $R_2$};
														
 
															-\node (R2-5) at (12,2)  {\large $R_2$};
														
 
															-\node (C1-1) at (6,0)  {\large $C_1$};
														
 
															-%\node (C1-2) at (3,0)  {\large $C_1$};
														
 
															+\node (R2-5) at (9,0)  {\large $R_2$};
														
 
															+\node (C1-1) at (3,-2)  {\large $C_1$};
														
 
															-\node (x86-2) at (3,-2)  {\large $\text{x86}^{*}$};
														
 
															-\node (x86-3) at (6,-2)  {\large $\text{x86}^{*}$};
														
 
															-\node (x86-4) at (9,-2) {\large $\text{x86}^{*}$};
														
 
															-\node (x86-5) at (12,-2) {\large $\text{x86}^{\dagger}$};
														
 
															+\node (x86-2) at (3,-4)  {\large $\text{x86}^{*}$};
														
 
															+\node (x86-3) at (6,-4)  {\large $\text{x86}^{*}$};
														
 
															+\node (x86-4) at (9,-4) {\large $\text{x86}^{*}$};
														
 
															+\node (x86-5) at (9,-6) {\large $\text{x86}^{\dagger}$};
														
 
															-\node (x86-2-1) at (3,-4)  {\large $\text{x86}^{*}$};
														
 
															-\node (x86-2-2) at (6,-4)  {\large $\text{x86}^{*}$};
														
 
															+\node (x86-2-1) at (3,-6)  {\large $\text{x86}^{*}$};
														
 
															+\node (x86-2-2) at (6,-6)  {\large $\text{x86}^{*}$};
														
 
															 \path[->,bend left=15] (R2) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R2-2);
														
 
															 \path[->,bend left=15] (R2-2) edge [above] node {\ttfamily\footnotesize\color{red} shrink} (R2-3);
														
 
															 \path[->,bend left=15] (R2-3) edge [above] node {\ttfamily\footnotesize uniquify} (R2-4);
														
 
															-\path[->,bend left=15] (R2-4) edge [above] node {\ttfamily\footnotesize remove-complex.} (R2-5);
														
 
															-\path[->,bend left=15] (R2-5) edge [right] node {\ttfamily\footnotesize\color{red} explicate-control} (C1-1);
														
 
															+\path[->,bend left=15] (R2-4) edge [right] node {\ttfamily\footnotesize remove-complex.} (R2-5);
														
 
															+\path[->,bend right=15] (R2-5) edge [left] node {\ttfamily\footnotesize\color{red} explicate-control} (C1-1);
														
 
															 \path[->,bend right=15] (C1-1) edge [left] node {\ttfamily\footnotesize\color{red} select-instructions} (x86-2);
														
 
															 \path[->,bend left=15] (x86-2) edge [right] node {\ttfamily\footnotesize\color{red} uncover-live} (x86-2-1);
														
 
															 \path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize build-inter.} (x86-2-2);
														
 
															 \path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize allocate-reg.} (x86-3);
														
 
															 \path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize\color{red} patch-instr.} (x86-4);
														
 
															-\path[->,bend left=15] (x86-4) edge [above] node {\ttfamily\footnotesize\color{red} print-x86 } (x86-5);
														
 
															+\path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize\color{red} print-x86 } (x86-5);
														
 
															 \end{tikzpicture}
														
 
															 \caption{Diagram of the passes for $R_2$, a language with conditionals.}
														
 
															  \label{fig:R2-passes}
														
@@ -5241,7 +5241,7 @@ Subsequently, in the \code{uncover-locals} pass
 
															 propagated to all variables (including the temporaries generated by
														
 
															 \code{remove-complex-opera*}).
														
 
															-\begin{figure}[tbp]
														
 
															+\begin{figure}[hb]
														
 
															 \begin{lstlisting}
														
 
															 (define (type-check-exp env)
														
 
															   (lambda (e)
														
@@ -5250,8 +5250,7 @@ propagated to all variables (including the temporaries generated by
 
															       ...
														
 
															       [(Void) (values (HasType (Void) 'Void) 'Void)]
														
 
															       [(Prim 'vector es)
														
 
															-       (define-values (e* t*) (for/lists (e* t*) ([e es])
														
 
															-                                (recur e)))
														
 
															+       (define-values (e* t*) (for/lists (e* t*) ([e es]) (recur e)))
														
 
															        (let ([t `(Vector ,@t*)])
														
 
															          (values (HasType (Prim 'vector e*) t) t))]
														
 
															       [(Prim 'vector-ref (list e (Int i)))
														
@@ -5262,7 +5261,9 @@ propagated to all variables (including the temporaries generated by
 
															             (error 'type-check-exp "invalid index ~a" i))
														
 
															           (let ([t (list-ref ts i)])
														
 
															             (values
														
 
															-             (HasType (Prim 'vector-ref (list e^ (HasType (Int i) 'Integer))) t)
														
 
															+               (HasType (Prim 'vector-ref
														
 
															+                           (list e^ (HasType (Int i) 'Integer)))
														
 
															+                  t)
														
 
															              t))]
														
 
															          [else (error "expected a vector in vector-ref, not" t)])]
														
 
															       [(Prim 'eq? (list e1 e2))
														
@@ -5291,14 +5292,13 @@ collector~\citep{Wilson:1992fk} that uses Cheney's algorithm to
 
															 perform the
														
 
															 copy~\citep{Cheney:1970aa}. Figure~\ref{fig:copying-collector} gives a
														
 
															 coarse-grained depiction of what happens in a two-space collector,
														
 
															-showing two time steps, prior to garbage collection on the top and
														
 
															-after garbage collection on the bottom. In a two-space collector, the
														
 
															-heap is divided into two parts, the FromSpace and the
														
 
															+showing two time steps, prior to garbage collection (on the top) and
														
 
															+after garbage collection (on the bottom). In a two-space collector,
														
 
															+the heap is divided into two parts named the FromSpace and the
														
 
															 ToSpace. Initially, all allocations go to the FromSpace until there is
														
 
															 not enough room for the next allocation request. At that point, the
														
 
															 garbage collector goes to work to make more room.
														
 
															-
														
 
															 The garbage collector must be careful not to reclaim tuples that will
														
 
															 be used by the program in the future. Of course, it is impossible in
														
 
															 general to predict what a program will do, but we can over approximate
														
@@ -5327,14 +5327,15 @@ root set, one in a register and two on the stack.  All of the live
 
															 objects have been copied to the ToSpace (the right-hand side of
														
 
															 Figure~\ref{fig:copying-collector}) in a way that preserves the
														
 
															 pointer relationships. For example, the pointer in the register still
														
 
															-points to a 2-tuple whose first element is a 3-tuple and second
														
 
															+points to a 2-tuple whose first element is a 3-tuple and whose second
														
 
															 element is a 2-tuple.  There are four tuples that are not reachable
														
 
															 from the root set and therefore do not get copied into the ToSpace.
														
 
															-(The situation in Figure~\ref{fig:copying-collector}, with a
														
 
															-cycle, cannot be created by a well-typed program in $R_3$. However,
														
 
															-creating cycles will be possible once we get to $R_6$.  We design
														
 
															-the garbage collector to deal with cycles to begin with, so we will
														
 
															-not need to revisit this issue.)
														
 
															+
														
 
															+The exact situation in Figure~\ref{fig:copying-collector} cannot be
														
 
															+created by a well-typed program in $R_3$ because it contains a
														
 
															+cycle. However, creating cycles will be possible once we get to $R_6$.
														
 
															+We design the garbage collector to deal with cycles to begin with so
														
 
															+we will not need to revisit this issue.
														
 
															 \begin{figure}[tbp]
														
 
															 \centering
														
@@ -5348,35 +5349,38 @@ There are many alternatives to copying collectors (and their older
 
															 siblings, the generational collectors) when its comes to garbage
														
 
															 collection, such as mark-and-sweep and reference counting.  The
														
 
															 strengths of copying collectors are that allocation is fast (just a
														
 
															-test and pointer increment), there is no fragmentation, cyclic garbage
														
 
															-is collected, and the time complexity of collection only depends on
														
 
															-the amount of live data, and not on the amount of
														
 
															-garbage~\citep{Wilson:1992fk}. The main disadvantage of two-space
														
 
															-copying collectors is that they use a lot of space, though that
														
 
															-problem is ameliorated in generational collectors.  Racket and Scheme
														
 
															-programs tend to allocate many small objects and generate a lot of
														
 
															-garbage, so copying and generational collectors are a good fit.  Of
														
 
															-course, garbage collection is an active research topic, especially
														
 
															-concurrent garbage collection~\citep{Tene:2011kx}. Researchers are
														
 
															-continuously developing new techniques and revisiting old
														
 
															-trade-offs~\citep{Blackburn:2004aa,Jones:2011aa,Shahriyar:2013aa,Cutler:2015aa,Shidal:2015aa}.
														
 
															+comparison and pointer increment), there is no fragmentation, cyclic
														
 
															+garbage is collected, and the time complexity of collection only
														
 
															+depends on the amount of live data, and not on the amount of
														
 
															+garbage~\citep{Wilson:1992fk}. The main disadvantage of a two-space
														
 
															+copying collector is that it uses a lot of space, though that problem
														
 
															+is ameliorated in generational collectors.  Racket and Scheme programs
														
 
															+tend to allocate many small objects and generate a lot of garbage, so
														
 
															+copying and generational collectors are a good fit.  Garbage
														
 
															+collection is an active research topic, especially concurrent garbage
														
 
															+collection~\citep{Tene:2011kx}. Researchers are continuously
														
 
															+developing new techniques and revisiting old
														
 
															+trade-offs~\citep{Blackburn:2004aa,Jones:2011aa,Shahriyar:2013aa,Cutler:2015aa,Shidal:2015aa,Osterlund:2016aa,Jacek:2019aa,Gamari:2020aa}. Researchers
														
 
															+meet every year at the International Symposium on Memory Management to
														
 
															+present these findings.
														
 
															+
														
 
															 \subsection{Graph Copying via Cheney's Algorithm}
														
 
															 \label{sec:cheney}
														
 
															-Let us take a closer look at how the copy works. The allocated objects
														
 
															-and pointers can be viewed as a graph and we need to copy the part of
														
 
															-the graph that is reachable from the root set. To make sure we copy
														
 
															-all of the reachable vertices in the graph, we need an exhaustive
														
 
															-graph traversal algorithm, such as depth-first search or breadth-first
														
 
															-search~\citep{Moore:1959aa,Cormen:2001uq}. Recall that such algorithms
														
 
															-take into account the possibility of cycles by marking which vertices
														
 
															-have already been visited, so as to ensure termination of the
														
 
															-algorithm. These search algorithms also use a data structure such as a
														
 
															-stack or queue as a to-do list to keep track of the vertices that need
														
 
															-to be visited. We shall use breadth-first search and a trick due to
														
 
															-\citet{Cheney:1970aa} for simultaneously representing the queue and
														
 
															-copying tuples into the ToSpace.
														
 
															+Let us take a closer look at the copying of the live objects. The
														
 
															+allocated objects and pointers can be viewed as a graph and we need to
														
 
															+copy the part of the graph that is reachable from the root set. To
														
 
															+make sure we copy all of the reachable vertices in the graph, we need
														
 
															+an exhaustive graph traversal algorithm, such as depth-first search or
														
 
															+breadth-first search~\citep{Moore:1959aa,Cormen:2001uq}. Recall that
														
 
															+such algorithms take into account the possibility of cycles by marking
														
 
															+which vertices have already been visited, so as to ensure termination
														
 
															+of the algorithm. These search algorithms also use a data structure
														
 
															+such as a stack or queue as a to-do list to keep track of the vertices
														
 
															+that need to be visited. We shall use breadth-first search and a trick
														
 
															+due to \citet{Cheney:1970aa} for simultaneously representing the queue
														
 
															+and copying tuples into the ToSpace.
														
 
															 Figure~\ref{fig:cheney} shows several snapshots of the ToSpace as the
														
 
															 copy progresses. The queue is represented by a chunk of contiguous
														
@@ -5384,25 +5388,16 @@ memory at the beginning of the ToSpace, using two pointers to track
 
															 the front and the back of the queue. The algorithm starts by copying
														
 
															 all tuples that are immediately reachable from the root set into the
														
 
															 ToSpace to form the initial queue.  When we copy a tuple, we mark the
														
 
															-old tuple to indicate that it has been visited. (We discuss the
														
 
															-marking in Section~\ref{sec:data-rep-gc}.) Note that any pointers
														
 
															-inside the copied tuples in the queue still point back to the
														
 
															+old tuple to indicate that it has been visited. We discuss how this
														
 
															+marking is accomplish in Section~\ref{sec:data-rep-gc}. Note that any
														
 
															+pointers inside the copied tuples in the queue still point back to the
														
 
															 FromSpace. Once the initial queue has been created, the algorithm
														
 
															 enters a loop in which it repeatedly processes the tuple at the front
														
 
															 of the queue and pops it off the queue.  To process a tuple, the
														
 
															 algorithm copies all the tuple that are directly reachable from it to
														
 
															 the ToSpace, placing them at the back of the queue. The algorithm then
														
 
															 updates the pointers in the popped tuple so they point to the newly
														
 
															-copied tuples. Getting back to Figure~\ref{fig:cheney}, in the first
														
 
															-step we copy the tuple whose second element is $42$ to the back of the
														
 
															-queue. The other pointer goes to a tuple that has already been copied,
														
 
															-so we do not need to copy it again, but we do need to update the
														
 
															-pointer to the new location. This can be accomplished by storing a
														
 
															-\emph{forwarding} pointer to the new location in the old tuple, back
														
 
															-when we initially copied the tuple into the ToSpace. This completes
														
 
															-one step of the algorithm. The algorithm continues in this way until
														
 
															-the front of the queue is empty, that is, until the front catches up
														
 
															-with the back.
														
 
															+copied tuples.
														
 
															 \begin{figure}[tbp]
														
 
															 \centering \includegraphics[width=0.9\textwidth]{figs/cheney}
														
@@ -5410,6 +5405,16 @@ with the back.
 
															 \label{fig:cheney}
														
 
															 \end{figure}
														
 
															+Getting back to Figure~\ref{fig:cheney}, in the first step we copy the
														
 
															+tuple whose second element is $42$ to the back of the queue. The other
														
 
															+pointer goes to a tuple that has already been copied, so we do not
														
 
															+need to copy it again, but we do need to update the pointer to the new
														
 
															+location. This can be accomplished by storing a \emph{forwarding}
														
 
															+pointer to the new location in the old tuple, back when we initially
														
 
															+copied the tuple into the ToSpace. This completes one step of the
														
 
															+algorithm. The algorithm continues in this way until the front of the
														
 
															+queue is empty, that is, until the front catches up with the back.
														
 
															+
														
 
															 \subsection{Data Representation}
														
 
															 \label{sec:data-rep-gc}
														
@@ -5434,83 +5439,78 @@ However, $R_3$ is a statically typed language, so it would be
 
															 unfortunate to require tags on every object, especially small and
														
 
															 pervasive objects like integers and Booleans.  Option 3 is the
														
 
															 best-performing choice for statically typed languages, but comes with
														
 
															-a relatively high implementation complexity. To keep this chapter to a
														
 
															-2-week time budget, we recommend a combination of options 1 and 2,
														
 
															-with separate strategies used for the stack and the heap.
														
 
															-
														
 
															-Regarding the stack, we recommend using a separate stack for
														
 
															-pointers~\citep{Siebert:2001aa,Henderson:2002aa,Baker:2009aa}, which
														
 
															-we call a \emph{root stack} (a.k.a. ``shadow stack''). That is, when a
														
 
															-local variable needs to be spilled and is of type \code{(Vector
														
 
															-  $\Type_1 \ldots \Type_n$)}, then we put it on the root stack instead
														
 
															-of the normal procedure call stack. Furthermore, we always spill
														
 
															-vector-typed variables if they are live during a call to the
														
 
															-collector, thereby ensuring that no pointers are in registers during a
														
 
															-collection. Figure~\ref{fig:shadow-stack} reproduces the example from
														
 
															-Figure~\ref{fig:copying-collector} and contrasts it with the data
														
 
															-layout using a root stack. The root stack contains the two pointers
														
 
															-from the regular stack and also the pointer in the second
														
 
															+a relatively high implementation complexity. To keep this chapter
														
 
															+within a 2-week time budget, we recommend a combination of options 1
														
 
															+and 2, using separate strategies for the stack and the heap.
														
 
															+
														
 
															+Regarding the stack, we recommend using a separate stack for pointers,
														
 
															+which we call a \emph{root stack} (a.k.a. ``shadow
														
 
															+stack'')~\citep{Siebert:2001aa,Henderson:2002aa,Baker:2009aa}. That
														
 
															+is, when a local variable needs to be spilled and is of type
														
 
															+\code{(Vector $\Type_1 \ldots \Type_n$)}, then we put it on the root
														
 
															+stack instead of the normal procedure call stack. Furthermore, we
														
 
															+always spill vector-typed variables if they are live during a call to
														
 
															+the collector, thereby ensuring that no pointers are in registers
														
 
															+during a collection. Figure~\ref{fig:shadow-stack} reproduces the
														
 
															+example from Figure~\ref{fig:copying-collector} and contrasts it with
														
 
															+the data layout using a root stack. The root stack contains the two
														
 
															+pointers from the regular stack and also the pointer in the second
														
 
															 register.
														
 
															 \begin{figure}[tbp]
														
 
															-\centering \includegraphics[width=0.7\textwidth]{figs/root-stack}
														
 
															+\centering \includegraphics[width=0.65\textwidth]{figs/root-stack}
														
 
															 \caption{Maintaining a root stack to facilitate garbage collection.}
														
 
															 \label{fig:shadow-stack}
														
 
															 \end{figure}
														
 
															 The problem of distinguishing between pointers and other kinds of data
														
 
															-also arises inside of each tuple. We solve this problem by attaching a
														
 
															-tag, an extra 64-bits, to each tuple. Figure~\ref{fig:tuple-rep} zooms
														
 
															-in on the tags for two of the tuples in the example from
														
 
															-Figure~\ref{fig:copying-collector}. Note that we have drawn the bits
														
 
															-in a big-endian way, from right-to-left, with bit location 0 (the
														
 
															-least significant bit) on the far right, which corresponds to the
														
 
															-directional of the x86 shifting instructions \key{salq} (shift
														
 
															-left) and \key{sarq} (shift right). Part of each tag is dedicated to
														
 
															-specifying which elements of the tuple are pointers, the part labeled
														
 
															-``pointer mask''. Within the pointer mask, a 1 bit indicates there is
														
 
															-a pointer and a 0 bit indicates some other kind of data. The pointer
														
 
															-mask starts at bit location 7. We have limited tuples to a maximum
														
 
															-size of 50 elements, so we just need 50 bits for the pointer mask. The
														
 
															-tag also contains two other pieces of information. The length of the
														
 
															-tuple (number of elements) is stored in bits location 1 through
														
 
															-6. Finally, the bit at location 0 indicates whether the tuple has yet
														
 
															-to be copied to the ToSpace.  If the bit has value 1, then this tuple
														
 
															-has not yet been copied.  If the bit has value 0 then the entire tag
														
 
															-is in fact a forwarding pointer. (The lower 3 bits of an pointer are
														
 
															-always zero anyways because our tuples are 8-byte aligned.)
														
 
															+also arises inside of each tuple on the heap. We solve this problem by
														
 
															+attaching a tag, an extra 64-bits, to each
														
 
															+tuple. Figure~\ref{fig:tuple-rep} zooms in on the tags for two of the
														
 
															+tuples in the example from Figure~\ref{fig:copying-collector}. Note
														
 
															+that we have drawn the bits in a big-endian way, from right-to-left,
														
 
															+with bit location 0 (the least significant bit) on the far right,
														
 
															+which corresponds to the direction of the x86 shifting instructions
														
 
															+\key{salq} (shift left) and \key{sarq} (shift right). Part of each tag
														
 
															+is dedicated to specifying which elements of the tuple are pointers,
														
 
															+the part labeled ``pointer mask''. Within the pointer mask, a 1 bit
														
 
															+indicates there is a pointer and a 0 bit indicates some other kind of
														
 
															+data. The pointer mask starts at bit location 7. We have limited
														
 
															+tuples to a maximum size of 50 elements, so we just need 50 bits for
														
 
															+the pointer mask. The tag also contains two other pieces of
														
 
															+information. The length of the tuple (number of elements) is stored in
														
 
															+bits location 1 through 6. Finally, the bit at location 0 indicates
														
 
															+whether the tuple has yet to be copied to the ToSpace.  If the bit has
														
 
															+value 1, then this tuple has not yet been copied.  If the bit has
														
 
															+value 0 then the entire tag is a forwarding pointer. (The lower 3 bits
														
 
															+of a pointer are always zero anyways because our tuples are 8-byte
														
 
															+aligned.)
														
 
															 \begin{figure}[tbp]
														
 
															 \centering \includegraphics[width=0.8\textwidth]{figs/tuple-rep}
														
 
															-\caption{Representation for tuples in the heap.}
														
 
															+\caption{Representation of tuples in the heap.}
														
 
															 \label{fig:tuple-rep}
														
 
															 \end{figure}
														
 
															 \subsection{Implementation of the Garbage Collector}
														
 
															 \label{sec:organize-gz}
														
 
															-The implementation of the garbage collector needs to do a lot of
														
 
															-bit-level data manipulation and we need to link it with our
														
 
															-compiler-generated x86 code. Thus, we recommend implementing the
														
 
															-garbage collector in C~\citep{Kernighan:1988nx} and putting the code
														
 
															-in the \code{runtime.c} file. Figure~\ref{fig:gc-header} shows the
														
 
															-interface to the garbage collector. The \code{initialize} function
														
 
															-creates the FromSpace, ToSpace, and root stack. The \code{initialize}
														
 
															-function is meant to be called near the beginning of \code{main},
														
 
															-before the rest of the program executes.  The \code{initialize}
														
 
															-function puts the address of the beginning of the FromSpace into the
														
 
															-global variable \code{free\_ptr}. The global \code{fromspace\_end}
														
 
															-points to the address that is 1-past the last element of the
														
 
															-FromSpace. (We use half-open intervals to represent chunks of
														
 
															-memory~\citep{Dijkstra:1982aa}.)  The \code{rootstack\_begin} global
														
 
															+An implementation of the copying collector is provided in the
														
 
															+\code{runtime.c} file. Figure~\ref{fig:gc-header} defines the
														
 
															+interface to the garbage collector that is used by the compiler. The
														
 
															+\code{initialize} function creates the FromSpace, ToSpace, and root
														
 
															+stack and should be called in the prelude of the \code{main}
														
 
															+function. The \code{initialize} function puts the address of the
														
 
															+beginning of the FromSpace into the global variable
														
 
															+\code{free\_ptr}. The global variable \code{fromspace\_end} points to
														
 
															+the address that is 1-past the last element of the FromSpace. (We use
														
 
															+half-open intervals to represent chunks of
														
 
															+memory~\citep{Dijkstra:1982aa}.)  The \code{rootstack\_begin} variable
														
 
															 points to the first element of the root stack.
														
 
															 As long as there is room left in the FromSpace, your generated code
														
 
															 can allocate tuples simply by moving the \code{free\_ptr} forward.
														
 
															 %
														
 
															-\margincomment{\tiny Should we dedicate a register to the free pointer? \\
														
 
															---Jeremy}
														
 
															-%
														
 
															 The amount of room left in FromSpace is the difference between the
														
 
															 \code{fromspace\_end} and the \code{free\_ptr}.  The \code{collect}
														
 
															 function should be called when there is not enough room left in the
														
@@ -5534,69 +5534,101 @@ succeed.
 
															 \label{fig:gc-header}
														
 
															 \end{figure}
														
 
															-\begin{exercise}
														
 
															-  In the file \code{runtime.c} you will find the implementation of
														
 
															-  \code{initialize} and a partial implementation of \code{collect}.
														
 
															-  The \code{collect} function calls another function, \code{cheney},
														
 
															-  to perform the actual copy, and that function is left to the reader
														
 
															-  to implement. The following is the prototype for \code{cheney}.
														
 
															-\begin{lstlisting}
														
 
															-   static void cheney(int64_t** rootstack_ptr);
														
 
															-\end{lstlisting}
														
 
															-  The parameter \code{rootstack\_ptr} is a pointer to the top of the
														
 
															-  rootstack (which is an array of pointers).  The \code{cheney} function
														
 
															-  also communicates with \code{collect} through the global
														
 
															-  variables \code{fromspace\_begin} and \code{fromspace\_end}
														
 
															-  mentioned in Figure~\ref{fig:gc-header} as well as the pointers for
														
 
															-  the ToSpace:
														
 
															-\begin{lstlisting}
														
 
															-   static int64_t* tospace_begin;
														
 
															-   static int64_t* tospace_end;
														
 
															-\end{lstlisting}
														
 
															-  The job of the \code{cheney} function is to copy all the live
														
 
															-  objects (reachable from the root stack) into the ToSpace, update
														
 
															-  \code{free\_ptr} to point to the next unused spot in the ToSpace,
														
 
															-  update the root stack so that it points to the objects in the
														
 
															-  ToSpace, and finally to swap the global pointers for the FromSpace
														
 
															-  and ToSpace.
														
 
															-\end{exercise}
														
 
															+%% \begin{exercise}
														
 
															+%%   In the file \code{runtime.c} you will find the implementation of
														
 
															+%%   \code{initialize} and a partial implementation of \code{collect}.
														
 
															+%%   The \code{collect} function calls another function, \code{cheney},
														
 
															+%%   to perform the actual copy, and that function is left to the reader
														
 
															+%%   to implement. The following is the prototype for \code{cheney}.
														
 
															+%% \begin{lstlisting}
														
 
															+%%    static void cheney(int64_t** rootstack_ptr);
														
 
															+%% \end{lstlisting}
														
 
															+%%   The parameter \code{rootstack\_ptr} is a pointer to the top of the
														
 
															+%%   rootstack (which is an array of pointers).  The \code{cheney} function
														
 
															+%%   also communicates with \code{collect} through the global
														
 
															+%%   variables \code{fromspace\_begin} and \code{fromspace\_end}
														
 
															+%%   mentioned in Figure~\ref{fig:gc-header} as well as the pointers for
														
 
															+%%   the ToSpace:
														
 
															+%% \begin{lstlisting}
														
 
															+%%    static int64_t* tospace_begin;
														
 
															+%%    static int64_t* tospace_end;
														
 
															+%% \end{lstlisting}
														
 
															+%%   The job of the \code{cheney} function is to copy all the live
														
 
															+%%   objects (reachable from the root stack) into the ToSpace, update
														
 
															+%%   \code{free\_ptr} to point to the next unused spot in the ToSpace,
														
 
															+%%   update the root stack so that it points to the objects in the
														
 
															+%%   ToSpace, and finally to swap the global pointers for the FromSpace
														
 
															+%%   and ToSpace.
														
 
															+%% \end{exercise}
														
 
															 %% \section{Compiler Passes}
														
 
															 %% \label{sec:code-generation-gc}
														
 
															 The introduction of garbage collection has a non-trivial impact on our
														
 
															-compiler passes. We introduce one new compiler pass called
														
 
															-\code{expose-allocation} and make non-trivial changes to
														
 
															-\code{type-check}, \code{flatten}, \code{select-instructions},
														
 
															-\code{allocate-registers}, and \code{print-x86}.  The following
														
 
															-program will serve as our running example.  It creates two tuples, one
														
 
															-nested inside the other. Both tuples have length one. The example then
														
 
															-accesses the element in the inner tuple tuple via two vector
														
 
															+compiler passes. We introduce two new compiler passes named
														
 
															+\code{expose-allocation} and \code{uncover-locals}. We make
														
 
															+significant changes to \code{select-instructions},
														
 
															+\code{build-interference}, \code{allocate-registers}, and
														
 
															+\code{print-x86} and make minor changes in severl more passes.  The
														
 
															+following program will serve as our running example.  It creates two
														
 
															+tuples, one nested inside the other. Both tuples have length one. The
														
 
															+program accesses the element in the inner tuple tuple via two vector
														
 
															 references.
														
 
															 % tests/s2_17.rkt
														
 
															 \begin{lstlisting}
														
 
															 (vector-ref (vector-ref (vector (vector 42)) 0) 0))
														
 
															 \end{lstlisting}
														
 
															-Next we proceed to discuss the new \code{expose-allocation} pass.
														
 
															+\section{Shrink}
														
 
															+\label{sec:shrink-R3}
														
 
															+
														
 
															+Recall that the \code{shrink} pass translates the primitives operators
														
 
															+into a smaller set of primitives. Because this pass comes after type
														
 
															+checking, but before the passes that require the type information in
														
 
															+the \code{HasType} AST nodes, the \code{shrink} pass must be modified
														
 
															+to wrap \code{HasType} around each AST node that it generates.
														
 
															+
														
 
															 \section{Expose Allocation}
														
 
															 \label{sec:expose-allocation}
														
 
															 The pass \code{expose-allocation} lowers the \code{vector} creation
														
 
															 form into a conditional call to the collector followed by the
														
 
															-allocation. We choose to place the \code{expose-allocation} pass
														
 
															-before \code{flatten} because \code{expose-allocation} introduces new
														
 
															-variables, which can be done locally with \code{let}, but \code{let}
														
 
															-is gone after \code{flatten}.  In the following, we show the
														
 
															-transformation for the \code{vector} form into let-bindings for the
														
 
															-initializing expressions, by a conditional \code{collect}, an
														
 
															-\code{allocate}, and the initialization of the vector.
														
 
															-(The \itm{len} is the length of the vector and \itm{bytes} is how many
														
 
															-total bytes need to be allocated for the vector, which is 8 for the
														
 
															-tag plus \itm{len} times 8.)
														
 
															+allocation.  We choose to place the \code{expose-allocation} pass
														
 
															+before \code{remove-complex-opera*} because the code generated by
														
 
															+\code{expose-allocation} contains complex operands.  We also place
														
 
															+\code{expose-allocation} before \code{explicate-control} because
														
 
															+\code{expose-allocation} introduces new variables using \code{let},
														
 
															+but \code{let} is gone after \code{explicate-control}.
														
 
															+The output of \code{expose-allocation} is a language that extends
														
 
															+$R_3$ with the three new forms that we use in the translation of the
														
 
															+\code{vector} form.
														
 
															+\[
														
 
															+\begin{array}{lcl}
														
 
															+  \Exp &::=& \cdots
														
 
															+      \mid (\key{collect} \,\itm{int})
														
 
															+      \mid (\key{allocate} \,\itm{int}\,\itm{type})
														
 
															+      \mid (\key{global-value} \,\itm{name})
														
 
															+\end{array}
														
 
															+\]
														
 
															+The $(\key{collect}\,n)$ form runs the garbage collector, requesting
														
 
															+$n$ bytes. It will become a call to the \code{collect} function in
														
 
															+\code{runtime.c} in \code{select-instructions}.  The
														
 
															+$(\key{allocate}\,n\,T)$ form creates an tuple of $n$ elements.  The
														
 
															+$T$ parameter is the type of the tuple: \code{(Vector $\Type_1 \ldots
														
 
															+  \Type_n$)} where $\Type_i$ is the type of the $i$th element in the
														
 
															+tuple. The $(\key{global-value}\,\itm{name})$ form reads the value of
														
 
															+a global variable, such as \code{free\_ptr}.
														
 
															+
														
 
															+In the following, we show the transformation for the \code{vector}
														
 
															+form into 1) a sequence of let-bindings for the initializing
														
 
															+expressions, 2) a conditional call to \code{collect}, 3) a call to
														
 
															+\code{allocate}, and 4) the initialization of the vector. In the
														
 
															+following, \itm{len} refers to the length of the vector and
														
 
															+\itm{bytes} is how many total bytes need to be allocated for the
														
 
															+vector, which is 8 for the tag plus \itm{len} times 8.
														
 
															 \begin{lstlisting}
														
 
															   (has-type (vector |$e_0 \ldots e_{n-1}$|) |\itm{type}|)
														
 
															 |$\Longrightarrow$|
														
@@ -5610,34 +5642,12 @@ tag plus \itm{len} times 8.)
 
															   (let ([_ (vector-set! |$v$| |$n-1$| |$x_{n-1}$|)])
														
 
															      |$v$|) ... )))) ...)
														
 
															 \end{lstlisting}
														
 
															-(In the above, we suppressed all of the \code{has-type} forms in the
														
 
															-output for the sake of readability.)  The placement of the initializing
														
 
															-expressions $e_0,\ldots,e_{n-1}$ prior to the \code{allocate} and
														
 
															-the sequence of \code{vector-set!}'s is important, as those expressions
														
 
															-may trigger garbage collection and we do not want an allocated but
														
 
															-uninitialized tuple to be present during a garbage collection.
														
 
															-
														
 
															-The output of \code{expose-allocation} is a language that extends
														
 
															-$R_3$ with the three new forms that we use above in the translation of
														
 
															-\code{vector}.
														
 
															-\[
														
 
															-\begin{array}{lcl}
														
 
															-  \Exp &::=& \cdots
														
 
															-      \mid (\key{collect} \,\itm{int})
														
 
															-      \mid (\key{allocate} \,\itm{int}\,\itm{type})
														
 
															-      \mid (\key{global-value} \,\itm{name})
														
 
															-\end{array}
														
 
															-\]
														
 
															-
														
 
															-%% The \code{expose-allocation} inserts an \code{initialize} statement at
														
 
															-%% the beginning of the program which will instruct the garbage collector
														
 
															-%% to set up the FromSpace, ToSpace, and all the global variables.  The
														
 
															-%% two arguments of \code{initialize} specify the initial allocated space
														
 
															-%% for the root stack and for the heap.
														
 
															-%
														
 
															-%% The \code{expose-allocation} pass annotates all of the local variables
														
 
															-%% in the \code{program} form with their type.
														
 
															-
														
 
															+In the above, we suppressed all of the \code{has-type} forms in the
														
 
															+output for the sake of readability.  The placement of the initializing
														
 
															+expressions $e_0,\ldots,e_{n-1}$ prior to the \code{allocate} and the
														
 
															+sequence of \code{vector-set!} is important, as those expressions may
														
 
															+trigger garbage collection and we cannot have an allocated but
														
 
															+uninitialized tuple on the heap during a collection.
														
 
															 Figure~\ref{fig:expose-alloc-output} shows the output of the
														
 
															 \code{expose-allocation} pass on our running example.
														
@@ -5652,7 +5662,7 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
 
															            (let ([collectret7974
														
 
															                   (if (< (+ free_ptr 16) fromspace_end)
														
 
															                       (void)
														
 
															-                      (collect 16);
														
 
															+                      (collect 16)
														
 
															                       )])
														
 
															              (let ([alloc7971 (allocate 1 (Vector Integer))])
														
 
															                (let ([initret7973 (vector-set! alloc7971 0 vecinit7972)])
														
@@ -5664,7 +5674,7 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
 
															     (let ([collectret7978
														
 
															            (if (< (+ free_ptr 16) fromspace_end)
														
 
															                (void)
														
 
															-               (collect 16);
														
 
															+               (collect 16)
														
 
															                )])
														
 
															       (let ([alloc7975 (allocate 1 (Vector (Vector Integer)))])
														
 
															         (let ([initret7977 (vector-set! alloc7975 0 vecinit7976)])
														
@@ -5681,7 +5691,15 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
 
															 \end{figure}
														
 
															-%\clearpage
														
 
															+\section{Remove Complex Operands}
														
 
															+\label{sec:remove-complex-opera-R2}
														
 
															+
														
 
															+The new forms \code{collect}, \code{allocate}, and \code{global-value}
														
 
															+should all be treated as complex operands. A new case for
														
 
															+\code{HasType} is needed and the case for \code{Prim} needs to be
														
 
															+handled carefully to prevent the \code{Prim} node from being separated
														
 
															+from its enclosing \code{HasType}.
														
 
															+
														
 
															 \section{Explicate Control and the $C_2$ language}
														
 
															 \label{sec:explicate-control-r3}
														
@@ -5694,29 +5712,30 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
 
															 \begin{array}{lcl}
														
 
															 \Atm &::=& \gray{ \INT{\Int} \mid \VAR{\Var} \mid \BOOL{\itm{bool}} }\\
														
 
															 \itm{cmp} &::= & \gray{  \key{eq?} \mid \key{<} } \\
														
 
															-\Exp &::= & \gray{ \Atm \mid \READ{} \mid \NEG{\Atm} \mid \ADD{\Atm}{\Atm} }\\
														
 
															+\Exp &::= & \gray{ \Atm \mid \READ{} } \\
														
 
															+   &\mid& \gray{ \NEG{\Atm} \mid \ADD{\Atm}{\Atm} }\\
														
 
															    &\mid& \gray{ \UNIOP{\key{not}}{\Atm} \mid \BINOP{\itm{cmp}}{\Atm}{\Atm}  } \\
														
 
															-   &\mid& (\key{Allocate} \,\itm{int}\,\itm{type})
														
 
															-   \mid \BINOP{\key{'vector-ref}}{\Atm}{\Int}  \\
														
 
															+   &\mid& (\key{Allocate} \,\itm{int}\,\itm{type}) \\
														
 
															+   &\mid& \BINOP{\key{'vector-ref}}{\Atm}{\Int}  \\
														
 
															    &\mid& (\key{Prim}~\key{'vector-set!}\,(\key{list}\,\Atm\,\Int\,\Atm))\\
														
 
															-   &\mid& (\key{GlobalValue} \,\itm{name}) \mid (\key{Void}) \\
														
 
															-\Stmt &::=& \gray{ \ASSIGN{\VAR{\Var}}{\Exp} \mid \RETURN{\Exp} } 
														
 
															+   &\mid& (\key{GlobalValue} \,\itm{name}) \mid (\key{Void})\\
														
 
															+\Stmt &::=& \gray{ \ASSIGN{\VAR{\Var}}{\Exp} } 
														
 
															        \mid (\key{Collect} \,\itm{int}) \\
														
 
															-\Tail &::= & \gray{ \RETURN{\Exp} \mid \SEQ{\Stmt}{\Tail} }\\
														
 
															-      &\mid& \gray{ \GOTO{\itm{label}} }\\
														
 
															+\Tail &::= & \gray{ \RETURN{\Exp} \mid \SEQ{\Stmt}{\Tail} 
														
 
															+       \mid \GOTO{\itm{label}} } \\
														
 
															       &\mid& \gray{ \IFSTMT{\BINOP{\itm{cmp}}{\Atm}{\Atm}}{\GOTO{\itm{label}}}{\GOTO{\itm{label}}}  }\\
														
 
															 C_2 & ::= & \PROGRAM{\itm{info}}{\CFG{(\itm{label}\,\key{.}\,\Tail)^{+}}}
														
 
															 \end{array}
														
 
															 \]
														
 
															 \end{minipage}
														
 
															 }
														
 
															-\caption{The abstract syntax of the $C_2$ language.
														
 
															-   TODO: UPDATE}
														
 
															+\caption{The abstract syntax $C_2$, an extention of $C_1$
														
 
															+  (Figure~\ref{fig:c1-syntax}).}
														
 
															 \label{fig:c2-syntax}
														
 
															 \end{figure}
														
 
															 The output of \code{explicate-control} is a program in the
														
 
															-intermediate language $C_2$, whose syntax is defined in
														
 
															+intermediate language $C_2$, whose abstract syntax is defined in
														
 
															 Figure~\ref{fig:c2-syntax}.  The new forms of $C_2$ include the
														
 
															 \key{allocate}, \key{vector-ref}, and \key{vector-set!}, and
														
 
															 \key{global-value} expressions and the \code{collect} statement.  The
														
@@ -5733,25 +5752,26 @@ the \code{Program} structure. Also recall that we need to know the
 
															 types of all the local variables for purposes of identifying the root
														
 
															 set for the garbage collector.  Thus, we create a pass named
														
 
															 \code{uncover-locals} to collect not just the variables but the
														
 
															-variables and their types in the form of an alist. Thanks
														
 
															-to the \code{HasType} nodes, the types are readily available in the
														
 
															-AST. Figure~\ref{fig:uncover-locals-r3} lists the output of the
														
 
															-\code{uncover-locals} pass on the running example.
														
 
															+variables and their types in the form of an alist. Thanks to the
														
 
															+\code{HasType} nodes, the types are readily available at every
														
 
															+assignment to a variable. We recommend storing the resulting alist in
														
 
															+the $\itm{info}$ field of the program, associated with the
														
 
															+\code{locals} key. Figure~\ref{fig:uncover-locals-r3} lists the output
														
 
															+of the \code{uncover-locals} pass on the running example.
														
 
															 \begin{figure}[tbp]
														
 
															 % tests/s2_17.rkt
														
 
															 \begin{lstlisting}
														
 
															-program:
														
 
															 locals:
														
 
															     vecinit7976 : '(Vector Integer), tmp7980 : 'Integer,
														
 
															     alloc7975 : '(Vector (Vector Integer)), tmp7983 : 'Integer,
														
 
															     collectret7974 : 'Void, initret7977 : 'Void,
														
 
															     collectret7978 : 'Void, tmp7985 : '(Vector Integer),
														
 
															     tmp7984 : 'Integer, tmp7979 : 'Integer, tmp7982 : 'Integer,
														
 
															-    alloc7971 : '(Vector Integer), tmp7981 : 'Integer, vecinit7972 : 'Integer,
														
 
															-    initret7973 : 'Void, 
														
 
															+    alloc7971 : '(Vector Integer), tmp7981 : 'Integer,
														
 
															+    vecinit7972 : 'Integer, initret7973 : 'Void, 
														
 
															 block7991:
														
 
															-    (collect 16);
														
 
															+    (collect 16)
														
 
															     goto block7989;
														
 
															 block7990:
														
 
															     collectret7974 = (void);
														
@@ -5768,7 +5788,7 @@ block7989:
 
															     else
														
 
															        goto block7988;
														
 
															 block7988:
														
 
															-    (collect 16);
														
 
															+    (collect 16)
														
 
															     goto block7986;
														
 
															 block7987:
														
 
															     collectret7978 = (void);
														
@@ -6151,30 +6171,30 @@ _conclusion:
 
															 \node (R3-2) at (3,2)  {\large $R_3$};
														
 
															 \node (R3-3) at (6,2)  {\large $R_3$};
														
 
															 \node (R3-4) at (9,2)  {\large $R_3$};
														
 
															-\node (R3-5) at (12,2)  {\large $R_3$};
														
 
															-\node (R3-6) at (12,0)  {\large $R_3$};
														
 
															-\node (C2-4) at (3,0)  {\large $C_2$};
														
 
															-\node (C2-3) at (6,0)  {\large $C_2$};
														
 
															+\node (R3-5) at (9,0)  {\large $R_3$};
														
 
															+\node (R3-6) at (6,0)  {\large $R_3$};
														
 
															+\node (C2-4) at (3,-2)  {\large $C_2$};
														
 
															+\node (C2-3) at (0,-2)  {\large $C_2$};
														
 
															-\node (x86-2) at (3,-2)  {\large $\text{x86}^{*}_2$};
														
 
															-\node (x86-3) at (6,-2)  {\large $\text{x86}^{*}_2$};
														
 
															-\node (x86-4) at (9,-2) {\large $\text{x86}^{*}_2$};
														
 
															-\node (x86-5) at (9,-4) {\large $\text{x86}^{\dagger}_2$};
														
 
															+\node (x86-2) at (3,-4)  {\large $\text{x86}^{*}_2$};
														
 
															+\node (x86-3) at (6,-4)  {\large $\text{x86}^{*}_2$};
														
 
															+\node (x86-4) at (9,-4) {\large $\text{x86}^{*}_2$};
														
 
															+\node (x86-5) at (9,-6) {\large $\text{x86}^{\dagger}_2$};
														
 
															-\node (x86-2-1) at (3,-4)  {\large $\text{x86}^{*}_2$};
														
 
															-\node (x86-2-2) at (6,-4)  {\large $\text{x86}^{*}_2$};
														
 
															+\node (x86-2-1) at (3,-6)  {\large $\text{x86}^{*}_2$};
														
 
															+\node (x86-2-2) at (6,-6)  {\large $\text{x86}^{*}_2$};
														
 
															 \path[->,bend left=15] (R3) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R3-2);
														
 
															 \path[->,bend left=15] (R3-2) edge [above] node {\ttfamily\footnotesize shrink} (R3-3);
														
 
															 \path[->,bend left=15] (R3-3) edge [above] node {\ttfamily\footnotesize uniquify} (R3-4);
														
 
															-\path[->,bend left=15] (R3-4) edge [above] node {\ttfamily\footnotesize\color{red} expose-alloc.} (R3-5);
														
 
															-\path[->,bend left=15] (R3-5) edge [right] node {\ttfamily\footnotesize remove-complex.} (R3-6);
														
 
															-\path[->,bend right=20] (R3-6) edge [above] node {\ttfamily\footnotesize explicate-control} (C2-3);
														
 
															-\path[->,bend right=15] (C2-3) edge [above] node {\ttfamily\footnotesize\color{red} uncover-locals} (C2-4);
														
 
															-\path[->,bend right=15] (C2-4) edge [left] node {\ttfamily\footnotesize\color{red} select-instr.} (x86-2);
														
 
															-\path[->,bend left=15] (x86-2) edge [right] node {\ttfamily\footnotesize uncover-live} (x86-2-1);
														
 
															-\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize \color{red}build-inter.} (x86-2-2);
														
 
															-\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize allocate-reg.} (x86-3);
														
 
															+\path[->,bend left=15] (R3-4) edge [right] node {\ttfamily\footnotesize\color{red} expose-alloc.} (R3-5);
														
 
															+\path[->,bend left=15] (R3-5) edge [below] node {\ttfamily\footnotesize remove-complex.} (R3-6);
														
 
															+\path[->,bend right=20] (R3-6) edge [left] node {\ttfamily\footnotesize explicate-control} (C2-3);
														
 
															+\path[->,bend right=15] (C2-3) edge [below] node {\ttfamily\footnotesize\color{red} uncover-locals} (C2-4);
														
 
															+\path[->,bend left=15] (C2-4) edge [right] node {\ttfamily\footnotesize\color{red} select-instr.} (x86-2);
														
 
															+\path[->,bend right=15] (x86-2) edge [left] node {\ttfamily\footnotesize uncover-live} (x86-2-1);
														
 
															+\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize\color{red} build-inter.} (x86-2-2);
														
 
															+\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize\color{red} allocate-reg.} (x86-3);
														
 
															 \path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize patch-instr.} (x86-4);
														
 
															 \path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize\color{red} print-x86} (x86-5);
														
 
															 \end{tikzpicture}