|
@@ -1936,7 +1936,7 @@ of how to use \key{interp-tests}.
|
|
\end{exercise}
|
|
\end{exercise}
|
|
|
|
|
|
\section{Remove Complex Operands}
|
|
\section{Remove Complex Operands}
|
|
-\label{sec:remove-complex-opera-r1}
|
|
|
|
|
|
+\label{sec:remove-complex-opera-R1}
|
|
|
|
|
|
The \code{remove-complex-opera*} pass compiles $R_1$ programs into
|
|
The \code{remove-complex-opera*} pass compiles $R_1$ programs into
|
|
$R_1$ programs in which the arguments of operations are atomic
|
|
$R_1$ programs in which the arguments of operations are atomic
|
|
@@ -2197,17 +2197,17 @@ addq $10, x
|
|
|
|
|
|
The \key{read} operation does not have a direct counterpart in x86
|
|
The \key{read} operation does not have a direct counterpart in x86
|
|
assembly, so we have instead implemented this functionality in the C
|
|
assembly, so we have instead implemented this functionality in the C
|
|
-language, with the function \code{read\_int} in the file
|
|
|
|
-\code{runtime.c}. In general, we refer to all of the functionality in
|
|
|
|
-this file as the \emph{runtime system}, or simply the \emph{runtime}
|
|
|
|
-for short. When compiling your generated x86 assembly code, you need
|
|
|
|
-to compile \code{runtime.c} to \code{runtime.o} (an ``object file'',
|
|
|
|
-using \code{gcc} option \code{-c}) and link it into the
|
|
|
|
-executable. For our purposes of code generation, all you need to do is
|
|
|
|
-translate an assignment of \key{read} into some variable $\itm{lhs}$
|
|
|
|
-(for left-hand side) into a call to the \code{read\_int} function
|
|
|
|
-followed by a move from \code{rax} to the left-hand side. The move
|
|
|
|
-from \code{rax} is needed because the return value from
|
|
|
|
|
|
+language~\citep{Kernighan:1988nx}, with the function \code{read\_int}
|
|
|
|
+in the file \code{runtime.c}. In general, we refer to all of the
|
|
|
|
+functionality in this file as the \emph{runtime system}, or simply the
|
|
|
|
+\emph{runtime} for short. When compiling your generated x86 assembly
|
|
|
|
+code, you need to compile \code{runtime.c} to \code{runtime.o} (an
|
|
|
|
+``object file'', using \code{gcc} option \code{-c}) and link it into
|
|
|
|
+the executable. For our purposes of code generation, all you need to
|
|
|
|
+do is translate an assignment of \key{read} into some variable
|
|
|
|
+$\itm{lhs}$ (for left-hand side) into a call to the \code{read\_int}
|
|
|
|
+function followed by a move from \code{rax} to the left-hand side.
|
|
|
|
+The move from \code{rax} is needed because the return value from
|
|
\code{read\_int} goes into \code{rax}, as is the case in general. \\
|
|
\code{read\_int} goes into \code{rax}, as is the case in general. \\
|
|
\begin{tabular}{lll}
|
|
\begin{tabular}{lll}
|
|
\begin{minipage}{0.3\textwidth}
|
|
\begin{minipage}{0.3\textwidth}
|
|
@@ -3348,7 +3348,7 @@ shown in Figure~\ref{fig:reg-alloc-passes}.
|
|
\node (x86-2) at (3,-2) {\large $\text{x86}^{*}$};
|
|
\node (x86-2) at (3,-2) {\large $\text{x86}^{*}$};
|
|
\node (x86-3) at (6,-2) {\large $\text{x86}^{*}$};
|
|
\node (x86-3) at (6,-2) {\large $\text{x86}^{*}$};
|
|
\node (x86-4) at (9,-2) {\large $\text{x86}$};
|
|
\node (x86-4) at (9,-2) {\large $\text{x86}$};
|
|
-\node (x86-5) at (12,-2) {\large $\text{x86}^{\dagger}$};
|
|
|
|
|
|
+\node (x86-5) at (9,-4) {\large $\text{x86}^{\dagger}$};
|
|
|
|
|
|
\node (x86-2-1) at (3,-4) {\large $\text{x86}^{*}$};
|
|
\node (x86-2-1) at (3,-4) {\large $\text{x86}^{*}$};
|
|
\node (x86-2-2) at (6,-4) {\large $\text{x86}^{*}$};
|
|
\node (x86-2-2) at (6,-4) {\large $\text{x86}^{*}$};
|
|
@@ -3361,7 +3361,7 @@ shown in Figure~\ref{fig:reg-alloc-passes}.
|
|
\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize\color{red} build-inter.} (x86-2-2);
|
|
\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize\color{red} build-inter.} (x86-2-2);
|
|
\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize\color{red} allocate-reg.} (x86-3);
|
|
\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize\color{red} allocate-reg.} (x86-3);
|
|
\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize patch-instr.} (x86-4);
|
|
\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize patch-instr.} (x86-4);
|
|
-\path[->,bend left=15] (x86-4) edge [above] node {\ttfamily\footnotesize print-x86} (x86-5);
|
|
|
|
|
|
+\path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize print-x86} (x86-5);
|
|
\end{tikzpicture}
|
|
\end{tikzpicture}
|
|
\caption{Diagram of the passes for $R_1$ with register allocation.}
|
|
\caption{Diagram of the passes for $R_1$ with register allocation.}
|
|
\label{fig:reg-alloc-passes}
|
|
\label{fig:reg-alloc-passes}
|
|
@@ -4216,7 +4216,8 @@ C_1 & ::= & \gray{\PROGRAM{\itm{info}}{\CFG{\key{(}\itm{label}\,\key{.}\,\Tail\k
|
|
\]
|
|
\]
|
|
\end{minipage}
|
|
\end{minipage}
|
|
}
|
|
}
|
|
-\caption{The abstract syntax of $C_1$, extending $C_0$ with Booleans and conditionals.}
|
|
|
|
|
|
+\caption{The abstract syntax of $C_1$, an extention of $C_0$
|
|
|
|
+ (Figure~\ref{fig:c0-syntax}).}
|
|
\label{fig:c1-syntax}
|
|
\label{fig:c1-syntax}
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
@@ -4796,29 +4797,28 @@ conclusion:
|
|
\node (R2-2) at (3,2) {\large $R_2$};
|
|
\node (R2-2) at (3,2) {\large $R_2$};
|
|
\node (R2-3) at (6,2) {\large $R_2$};
|
|
\node (R2-3) at (6,2) {\large $R_2$};
|
|
\node (R2-4) at (9,2) {\large $R_2$};
|
|
\node (R2-4) at (9,2) {\large $R_2$};
|
|
-\node (R2-5) at (12,2) {\large $R_2$};
|
|
|
|
-\node (C1-1) at (6,0) {\large $C_1$};
|
|
|
|
-%\node (C1-2) at (3,0) {\large $C_1$};
|
|
|
|
|
|
+\node (R2-5) at (9,0) {\large $R_2$};
|
|
|
|
+\node (C1-1) at (3,-2) {\large $C_1$};
|
|
|
|
|
|
-\node (x86-2) at (3,-2) {\large $\text{x86}^{*}$};
|
|
|
|
-\node (x86-3) at (6,-2) {\large $\text{x86}^{*}$};
|
|
|
|
-\node (x86-4) at (9,-2) {\large $\text{x86}^{*}$};
|
|
|
|
-\node (x86-5) at (12,-2) {\large $\text{x86}^{\dagger}$};
|
|
|
|
|
|
+\node (x86-2) at (3,-4) {\large $\text{x86}^{*}$};
|
|
|
|
+\node (x86-3) at (6,-4) {\large $\text{x86}^{*}$};
|
|
|
|
+\node (x86-4) at (9,-4) {\large $\text{x86}^{*}$};
|
|
|
|
+\node (x86-5) at (9,-6) {\large $\text{x86}^{\dagger}$};
|
|
|
|
|
|
-\node (x86-2-1) at (3,-4) {\large $\text{x86}^{*}$};
|
|
|
|
-\node (x86-2-2) at (6,-4) {\large $\text{x86}^{*}$};
|
|
|
|
|
|
+\node (x86-2-1) at (3,-6) {\large $\text{x86}^{*}$};
|
|
|
|
+\node (x86-2-2) at (6,-6) {\large $\text{x86}^{*}$};
|
|
|
|
|
|
\path[->,bend left=15] (R2) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R2-2);
|
|
\path[->,bend left=15] (R2) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R2-2);
|
|
\path[->,bend left=15] (R2-2) edge [above] node {\ttfamily\footnotesize\color{red} shrink} (R2-3);
|
|
\path[->,bend left=15] (R2-2) edge [above] node {\ttfamily\footnotesize\color{red} shrink} (R2-3);
|
|
\path[->,bend left=15] (R2-3) edge [above] node {\ttfamily\footnotesize uniquify} (R2-4);
|
|
\path[->,bend left=15] (R2-3) edge [above] node {\ttfamily\footnotesize uniquify} (R2-4);
|
|
-\path[->,bend left=15] (R2-4) edge [above] node {\ttfamily\footnotesize remove-complex.} (R2-5);
|
|
|
|
-\path[->,bend left=15] (R2-5) edge [right] node {\ttfamily\footnotesize\color{red} explicate-control} (C1-1);
|
|
|
|
|
|
+\path[->,bend left=15] (R2-4) edge [right] node {\ttfamily\footnotesize remove-complex.} (R2-5);
|
|
|
|
+\path[->,bend right=15] (R2-5) edge [left] node {\ttfamily\footnotesize\color{red} explicate-control} (C1-1);
|
|
\path[->,bend right=15] (C1-1) edge [left] node {\ttfamily\footnotesize\color{red} select-instructions} (x86-2);
|
|
\path[->,bend right=15] (C1-1) edge [left] node {\ttfamily\footnotesize\color{red} select-instructions} (x86-2);
|
|
\path[->,bend left=15] (x86-2) edge [right] node {\ttfamily\footnotesize\color{red} uncover-live} (x86-2-1);
|
|
\path[->,bend left=15] (x86-2) edge [right] node {\ttfamily\footnotesize\color{red} uncover-live} (x86-2-1);
|
|
\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize build-inter.} (x86-2-2);
|
|
\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize build-inter.} (x86-2-2);
|
|
\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize allocate-reg.} (x86-3);
|
|
\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize allocate-reg.} (x86-3);
|
|
\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize\color{red} patch-instr.} (x86-4);
|
|
\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize\color{red} patch-instr.} (x86-4);
|
|
-\path[->,bend left=15] (x86-4) edge [above] node {\ttfamily\footnotesize\color{red} print-x86 } (x86-5);
|
|
|
|
|
|
+\path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize\color{red} print-x86 } (x86-5);
|
|
\end{tikzpicture}
|
|
\end{tikzpicture}
|
|
\caption{Diagram of the passes for $R_2$, a language with conditionals.}
|
|
\caption{Diagram of the passes for $R_2$, a language with conditionals.}
|
|
\label{fig:R2-passes}
|
|
\label{fig:R2-passes}
|
|
@@ -5241,7 +5241,7 @@ Subsequently, in the \code{uncover-locals} pass
|
|
propagated to all variables (including the temporaries generated by
|
|
propagated to all variables (including the temporaries generated by
|
|
\code{remove-complex-opera*}).
|
|
\code{remove-complex-opera*}).
|
|
|
|
|
|
-\begin{figure}[tbp]
|
|
|
|
|
|
+\begin{figure}[hb]
|
|
\begin{lstlisting}
|
|
\begin{lstlisting}
|
|
(define (type-check-exp env)
|
|
(define (type-check-exp env)
|
|
(lambda (e)
|
|
(lambda (e)
|
|
@@ -5250,8 +5250,7 @@ propagated to all variables (including the temporaries generated by
|
|
...
|
|
...
|
|
[(Void) (values (HasType (Void) 'Void) 'Void)]
|
|
[(Void) (values (HasType (Void) 'Void) 'Void)]
|
|
[(Prim 'vector es)
|
|
[(Prim 'vector es)
|
|
- (define-values (e* t*) (for/lists (e* t*) ([e es])
|
|
|
|
- (recur e)))
|
|
|
|
|
|
+ (define-values (e* t*) (for/lists (e* t*) ([e es]) (recur e)))
|
|
(let ([t `(Vector ,@t*)])
|
|
(let ([t `(Vector ,@t*)])
|
|
(values (HasType (Prim 'vector e*) t) t))]
|
|
(values (HasType (Prim 'vector e*) t) t))]
|
|
[(Prim 'vector-ref (list e (Int i)))
|
|
[(Prim 'vector-ref (list e (Int i)))
|
|
@@ -5262,7 +5261,9 @@ propagated to all variables (including the temporaries generated by
|
|
(error 'type-check-exp "invalid index ~a" i))
|
|
(error 'type-check-exp "invalid index ~a" i))
|
|
(let ([t (list-ref ts i)])
|
|
(let ([t (list-ref ts i)])
|
|
(values
|
|
(values
|
|
- (HasType (Prim 'vector-ref (list e^ (HasType (Int i) 'Integer))) t)
|
|
|
|
|
|
+ (HasType (Prim 'vector-ref
|
|
|
|
+ (list e^ (HasType (Int i) 'Integer)))
|
|
|
|
+ t)
|
|
t))]
|
|
t))]
|
|
[else (error "expected a vector in vector-ref, not" t)])]
|
|
[else (error "expected a vector in vector-ref, not" t)])]
|
|
[(Prim 'eq? (list e1 e2))
|
|
[(Prim 'eq? (list e1 e2))
|
|
@@ -5291,14 +5292,13 @@ collector~\citep{Wilson:1992fk} that uses Cheney's algorithm to
|
|
perform the
|
|
perform the
|
|
copy~\citep{Cheney:1970aa}. Figure~\ref{fig:copying-collector} gives a
|
|
copy~\citep{Cheney:1970aa}. Figure~\ref{fig:copying-collector} gives a
|
|
coarse-grained depiction of what happens in a two-space collector,
|
|
coarse-grained depiction of what happens in a two-space collector,
|
|
-showing two time steps, prior to garbage collection on the top and
|
|
|
|
-after garbage collection on the bottom. In a two-space collector, the
|
|
|
|
-heap is divided into two parts, the FromSpace and the
|
|
|
|
|
|
+showing two time steps, prior to garbage collection (on the top) and
|
|
|
|
+after garbage collection (on the bottom). In a two-space collector,
|
|
|
|
+the heap is divided into two parts named the FromSpace and the
|
|
ToSpace. Initially, all allocations go to the FromSpace until there is
|
|
ToSpace. Initially, all allocations go to the FromSpace until there is
|
|
not enough room for the next allocation request. At that point, the
|
|
not enough room for the next allocation request. At that point, the
|
|
garbage collector goes to work to make more room.
|
|
garbage collector goes to work to make more room.
|
|
|
|
|
|
-
|
|
|
|
The garbage collector must be careful not to reclaim tuples that will
|
|
The garbage collector must be careful not to reclaim tuples that will
|
|
be used by the program in the future. Of course, it is impossible in
|
|
be used by the program in the future. Of course, it is impossible in
|
|
general to predict what a program will do, but we can over approximate
|
|
general to predict what a program will do, but we can over approximate
|
|
@@ -5327,14 +5327,15 @@ root set, one in a register and two on the stack. All of the live
|
|
objects have been copied to the ToSpace (the right-hand side of
|
|
objects have been copied to the ToSpace (the right-hand side of
|
|
Figure~\ref{fig:copying-collector}) in a way that preserves the
|
|
Figure~\ref{fig:copying-collector}) in a way that preserves the
|
|
pointer relationships. For example, the pointer in the register still
|
|
pointer relationships. For example, the pointer in the register still
|
|
-points to a 2-tuple whose first element is a 3-tuple and second
|
|
|
|
|
|
+points to a 2-tuple whose first element is a 3-tuple and whose second
|
|
element is a 2-tuple. There are four tuples that are not reachable
|
|
element is a 2-tuple. There are four tuples that are not reachable
|
|
from the root set and therefore do not get copied into the ToSpace.
|
|
from the root set and therefore do not get copied into the ToSpace.
|
|
-(The situation in Figure~\ref{fig:copying-collector}, with a
|
|
|
|
-cycle, cannot be created by a well-typed program in $R_3$. However,
|
|
|
|
-creating cycles will be possible once we get to $R_6$. We design
|
|
|
|
-the garbage collector to deal with cycles to begin with, so we will
|
|
|
|
-not need to revisit this issue.)
|
|
|
|
|
|
+
|
|
|
|
+The exact situation in Figure~\ref{fig:copying-collector} cannot be
|
|
|
|
+created by a well-typed program in $R_3$ because it contains a
|
|
|
|
+cycle. However, creating cycles will be possible once we get to $R_6$.
|
|
|
|
+We design the garbage collector to deal with cycles to begin with so
|
|
|
|
+we will not need to revisit this issue.
|
|
|
|
|
|
\begin{figure}[tbp]
|
|
\begin{figure}[tbp]
|
|
\centering
|
|
\centering
|
|
@@ -5348,35 +5349,38 @@ There are many alternatives to copying collectors (and their older
|
|
siblings, the generational collectors) when its comes to garbage
|
|
siblings, the generational collectors) when its comes to garbage
|
|
collection, such as mark-and-sweep and reference counting. The
|
|
collection, such as mark-and-sweep and reference counting. The
|
|
strengths of copying collectors are that allocation is fast (just a
|
|
strengths of copying collectors are that allocation is fast (just a
|
|
-test and pointer increment), there is no fragmentation, cyclic garbage
|
|
|
|
-is collected, and the time complexity of collection only depends on
|
|
|
|
-the amount of live data, and not on the amount of
|
|
|
|
-garbage~\citep{Wilson:1992fk}. The main disadvantage of two-space
|
|
|
|
-copying collectors is that they use a lot of space, though that
|
|
|
|
-problem is ameliorated in generational collectors. Racket and Scheme
|
|
|
|
-programs tend to allocate many small objects and generate a lot of
|
|
|
|
-garbage, so copying and generational collectors are a good fit. Of
|
|
|
|
-course, garbage collection is an active research topic, especially
|
|
|
|
-concurrent garbage collection~\citep{Tene:2011kx}. Researchers are
|
|
|
|
-continuously developing new techniques and revisiting old
|
|
|
|
-trade-offs~\citep{Blackburn:2004aa,Jones:2011aa,Shahriyar:2013aa,Cutler:2015aa,Shidal:2015aa}.
|
|
|
|
|
|
+comparison and pointer increment), there is no fragmentation, cyclic
|
|
|
|
+garbage is collected, and the time complexity of collection only
|
|
|
|
+depends on the amount of live data, and not on the amount of
|
|
|
|
+garbage~\citep{Wilson:1992fk}. The main disadvantage of a two-space
|
|
|
|
+copying collector is that it uses a lot of space, though that problem
|
|
|
|
+is ameliorated in generational collectors. Racket and Scheme programs
|
|
|
|
+tend to allocate many small objects and generate a lot of garbage, so
|
|
|
|
+copying and generational collectors are a good fit. Garbage
|
|
|
|
+collection is an active research topic, especially concurrent garbage
|
|
|
|
+collection~\citep{Tene:2011kx}. Researchers are continuously
|
|
|
|
+developing new techniques and revisiting old
|
|
|
|
+trade-offs~\citep{Blackburn:2004aa,Jones:2011aa,Shahriyar:2013aa,Cutler:2015aa,Shidal:2015aa,Osterlund:2016aa,Jacek:2019aa,Gamari:2020aa}. Researchers
|
|
|
|
+meet every year at the International Symposium on Memory Management to
|
|
|
|
+present these findings.
|
|
|
|
+
|
|
|
|
|
|
\subsection{Graph Copying via Cheney's Algorithm}
|
|
\subsection{Graph Copying via Cheney's Algorithm}
|
|
\label{sec:cheney}
|
|
\label{sec:cheney}
|
|
|
|
|
|
-Let us take a closer look at how the copy works. The allocated objects
|
|
|
|
-and pointers can be viewed as a graph and we need to copy the part of
|
|
|
|
-the graph that is reachable from the root set. To make sure we copy
|
|
|
|
-all of the reachable vertices in the graph, we need an exhaustive
|
|
|
|
-graph traversal algorithm, such as depth-first search or breadth-first
|
|
|
|
-search~\citep{Moore:1959aa,Cormen:2001uq}. Recall that such algorithms
|
|
|
|
-take into account the possibility of cycles by marking which vertices
|
|
|
|
-have already been visited, so as to ensure termination of the
|
|
|
|
-algorithm. These search algorithms also use a data structure such as a
|
|
|
|
-stack or queue as a to-do list to keep track of the vertices that need
|
|
|
|
-to be visited. We shall use breadth-first search and a trick due to
|
|
|
|
-\citet{Cheney:1970aa} for simultaneously representing the queue and
|
|
|
|
-copying tuples into the ToSpace.
|
|
|
|
|
|
+Let us take a closer look at the copying of the live objects. The
|
|
|
|
+allocated objects and pointers can be viewed as a graph and we need to
|
|
|
|
+copy the part of the graph that is reachable from the root set. To
|
|
|
|
+make sure we copy all of the reachable vertices in the graph, we need
|
|
|
|
+an exhaustive graph traversal algorithm, such as depth-first search or
|
|
|
|
+breadth-first search~\citep{Moore:1959aa,Cormen:2001uq}. Recall that
|
|
|
|
+such algorithms take into account the possibility of cycles by marking
|
|
|
|
+which vertices have already been visited, so as to ensure termination
|
|
|
|
+of the algorithm. These search algorithms also use a data structure
|
|
|
|
+such as a stack or queue as a to-do list to keep track of the vertices
|
|
|
|
+that need to be visited. We shall use breadth-first search and a trick
|
|
|
|
+due to \citet{Cheney:1970aa} for simultaneously representing the queue
|
|
|
|
+and copying tuples into the ToSpace.
|
|
|
|
|
|
Figure~\ref{fig:cheney} shows several snapshots of the ToSpace as the
|
|
Figure~\ref{fig:cheney} shows several snapshots of the ToSpace as the
|
|
copy progresses. The queue is represented by a chunk of contiguous
|
|
copy progresses. The queue is represented by a chunk of contiguous
|
|
@@ -5384,25 +5388,16 @@ memory at the beginning of the ToSpace, using two pointers to track
|
|
the front and the back of the queue. The algorithm starts by copying
|
|
the front and the back of the queue. The algorithm starts by copying
|
|
all tuples that are immediately reachable from the root set into the
|
|
all tuples that are immediately reachable from the root set into the
|
|
ToSpace to form the initial queue. When we copy a tuple, we mark the
|
|
ToSpace to form the initial queue. When we copy a tuple, we mark the
|
|
-old tuple to indicate that it has been visited. (We discuss the
|
|
|
|
-marking in Section~\ref{sec:data-rep-gc}.) Note that any pointers
|
|
|
|
-inside the copied tuples in the queue still point back to the
|
|
|
|
|
|
+old tuple to indicate that it has been visited. We discuss how this
|
|
|
|
+marking is accomplish in Section~\ref{sec:data-rep-gc}. Note that any
|
|
|
|
+pointers inside the copied tuples in the queue still point back to the
|
|
FromSpace. Once the initial queue has been created, the algorithm
|
|
FromSpace. Once the initial queue has been created, the algorithm
|
|
enters a loop in which it repeatedly processes the tuple at the front
|
|
enters a loop in which it repeatedly processes the tuple at the front
|
|
of the queue and pops it off the queue. To process a tuple, the
|
|
of the queue and pops it off the queue. To process a tuple, the
|
|
algorithm copies all the tuple that are directly reachable from it to
|
|
algorithm copies all the tuple that are directly reachable from it to
|
|
the ToSpace, placing them at the back of the queue. The algorithm then
|
|
the ToSpace, placing them at the back of the queue. The algorithm then
|
|
updates the pointers in the popped tuple so they point to the newly
|
|
updates the pointers in the popped tuple so they point to the newly
|
|
-copied tuples. Getting back to Figure~\ref{fig:cheney}, in the first
|
|
|
|
-step we copy the tuple whose second element is $42$ to the back of the
|
|
|
|
-queue. The other pointer goes to a tuple that has already been copied,
|
|
|
|
-so we do not need to copy it again, but we do need to update the
|
|
|
|
-pointer to the new location. This can be accomplished by storing a
|
|
|
|
-\emph{forwarding} pointer to the new location in the old tuple, back
|
|
|
|
-when we initially copied the tuple into the ToSpace. This completes
|
|
|
|
-one step of the algorithm. The algorithm continues in this way until
|
|
|
|
-the front of the queue is empty, that is, until the front catches up
|
|
|
|
-with the back.
|
|
|
|
|
|
+copied tuples.
|
|
|
|
|
|
\begin{figure}[tbp]
|
|
\begin{figure}[tbp]
|
|
\centering \includegraphics[width=0.9\textwidth]{figs/cheney}
|
|
\centering \includegraphics[width=0.9\textwidth]{figs/cheney}
|
|
@@ -5410,6 +5405,16 @@ with the back.
|
|
\label{fig:cheney}
|
|
\label{fig:cheney}
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
|
|
+Getting back to Figure~\ref{fig:cheney}, in the first step we copy the
|
|
|
|
+tuple whose second element is $42$ to the back of the queue. The other
|
|
|
|
+pointer goes to a tuple that has already been copied, so we do not
|
|
|
|
+need to copy it again, but we do need to update the pointer to the new
|
|
|
|
+location. This can be accomplished by storing a \emph{forwarding}
|
|
|
|
+pointer to the new location in the old tuple, back when we initially
|
|
|
|
+copied the tuple into the ToSpace. This completes one step of the
|
|
|
|
+algorithm. The algorithm continues in this way until the front of the
|
|
|
|
+queue is empty, that is, until the front catches up with the back.
|
|
|
|
+
|
|
|
|
|
|
\subsection{Data Representation}
|
|
\subsection{Data Representation}
|
|
\label{sec:data-rep-gc}
|
|
\label{sec:data-rep-gc}
|
|
@@ -5434,83 +5439,78 @@ However, $R_3$ is a statically typed language, so it would be
|
|
unfortunate to require tags on every object, especially small and
|
|
unfortunate to require tags on every object, especially small and
|
|
pervasive objects like integers and Booleans. Option 3 is the
|
|
pervasive objects like integers and Booleans. Option 3 is the
|
|
best-performing choice for statically typed languages, but comes with
|
|
best-performing choice for statically typed languages, but comes with
|
|
-a relatively high implementation complexity. To keep this chapter to a
|
|
|
|
-2-week time budget, we recommend a combination of options 1 and 2,
|
|
|
|
-with separate strategies used for the stack and the heap.
|
|
|
|
-
|
|
|
|
-Regarding the stack, we recommend using a separate stack for
|
|
|
|
-pointers~\citep{Siebert:2001aa,Henderson:2002aa,Baker:2009aa}, which
|
|
|
|
-we call a \emph{root stack} (a.k.a. ``shadow stack''). That is, when a
|
|
|
|
-local variable needs to be spilled and is of type \code{(Vector
|
|
|
|
- $\Type_1 \ldots \Type_n$)}, then we put it on the root stack instead
|
|
|
|
-of the normal procedure call stack. Furthermore, we always spill
|
|
|
|
-vector-typed variables if they are live during a call to the
|
|
|
|
-collector, thereby ensuring that no pointers are in registers during a
|
|
|
|
-collection. Figure~\ref{fig:shadow-stack} reproduces the example from
|
|
|
|
-Figure~\ref{fig:copying-collector} and contrasts it with the data
|
|
|
|
-layout using a root stack. The root stack contains the two pointers
|
|
|
|
-from the regular stack and also the pointer in the second
|
|
|
|
|
|
+a relatively high implementation complexity. To keep this chapter
|
|
|
|
+within a 2-week time budget, we recommend a combination of options 1
|
|
|
|
+and 2, using separate strategies for the stack and the heap.
|
|
|
|
+
|
|
|
|
+Regarding the stack, we recommend using a separate stack for pointers,
|
|
|
|
+which we call a \emph{root stack} (a.k.a. ``shadow
|
|
|
|
+stack'')~\citep{Siebert:2001aa,Henderson:2002aa,Baker:2009aa}. That
|
|
|
|
+is, when a local variable needs to be spilled and is of type
|
|
|
|
+\code{(Vector $\Type_1 \ldots \Type_n$)}, then we put it on the root
|
|
|
|
+stack instead of the normal procedure call stack. Furthermore, we
|
|
|
|
+always spill vector-typed variables if they are live during a call to
|
|
|
|
+the collector, thereby ensuring that no pointers are in registers
|
|
|
|
+during a collection. Figure~\ref{fig:shadow-stack} reproduces the
|
|
|
|
+example from Figure~\ref{fig:copying-collector} and contrasts it with
|
|
|
|
+the data layout using a root stack. The root stack contains the two
|
|
|
|
+pointers from the regular stack and also the pointer in the second
|
|
register.
|
|
register.
|
|
|
|
|
|
\begin{figure}[tbp]
|
|
\begin{figure}[tbp]
|
|
-\centering \includegraphics[width=0.7\textwidth]{figs/root-stack}
|
|
|
|
|
|
+\centering \includegraphics[width=0.65\textwidth]{figs/root-stack}
|
|
\caption{Maintaining a root stack to facilitate garbage collection.}
|
|
\caption{Maintaining a root stack to facilitate garbage collection.}
|
|
\label{fig:shadow-stack}
|
|
\label{fig:shadow-stack}
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
The problem of distinguishing between pointers and other kinds of data
|
|
The problem of distinguishing between pointers and other kinds of data
|
|
-also arises inside of each tuple. We solve this problem by attaching a
|
|
|
|
-tag, an extra 64-bits, to each tuple. Figure~\ref{fig:tuple-rep} zooms
|
|
|
|
-in on the tags for two of the tuples in the example from
|
|
|
|
-Figure~\ref{fig:copying-collector}. Note that we have drawn the bits
|
|
|
|
-in a big-endian way, from right-to-left, with bit location 0 (the
|
|
|
|
-least significant bit) on the far right, which corresponds to the
|
|
|
|
-directional of the x86 shifting instructions \key{salq} (shift
|
|
|
|
-left) and \key{sarq} (shift right). Part of each tag is dedicated to
|
|
|
|
-specifying which elements of the tuple are pointers, the part labeled
|
|
|
|
-``pointer mask''. Within the pointer mask, a 1 bit indicates there is
|
|
|
|
-a pointer and a 0 bit indicates some other kind of data. The pointer
|
|
|
|
-mask starts at bit location 7. We have limited tuples to a maximum
|
|
|
|
-size of 50 elements, so we just need 50 bits for the pointer mask. The
|
|
|
|
-tag also contains two other pieces of information. The length of the
|
|
|
|
-tuple (number of elements) is stored in bits location 1 through
|
|
|
|
-6. Finally, the bit at location 0 indicates whether the tuple has yet
|
|
|
|
-to be copied to the ToSpace. If the bit has value 1, then this tuple
|
|
|
|
-has not yet been copied. If the bit has value 0 then the entire tag
|
|
|
|
-is in fact a forwarding pointer. (The lower 3 bits of an pointer are
|
|
|
|
-always zero anyways because our tuples are 8-byte aligned.)
|
|
|
|
|
|
+also arises inside of each tuple on the heap. We solve this problem by
|
|
|
|
+attaching a tag, an extra 64-bits, to each
|
|
|
|
+tuple. Figure~\ref{fig:tuple-rep} zooms in on the tags for two of the
|
|
|
|
+tuples in the example from Figure~\ref{fig:copying-collector}. Note
|
|
|
|
+that we have drawn the bits in a big-endian way, from right-to-left,
|
|
|
|
+with bit location 0 (the least significant bit) on the far right,
|
|
|
|
+which corresponds to the direction of the x86 shifting instructions
|
|
|
|
+\key{salq} (shift left) and \key{sarq} (shift right). Part of each tag
|
|
|
|
+is dedicated to specifying which elements of the tuple are pointers,
|
|
|
|
+the part labeled ``pointer mask''. Within the pointer mask, a 1 bit
|
|
|
|
+indicates there is a pointer and a 0 bit indicates some other kind of
|
|
|
|
+data. The pointer mask starts at bit location 7. We have limited
|
|
|
|
+tuples to a maximum size of 50 elements, so we just need 50 bits for
|
|
|
|
+the pointer mask. The tag also contains two other pieces of
|
|
|
|
+information. The length of the tuple (number of elements) is stored in
|
|
|
|
+bits location 1 through 6. Finally, the bit at location 0 indicates
|
|
|
|
+whether the tuple has yet to be copied to the ToSpace. If the bit has
|
|
|
|
+value 1, then this tuple has not yet been copied. If the bit has
|
|
|
|
+value 0 then the entire tag is a forwarding pointer. (The lower 3 bits
|
|
|
|
+of a pointer are always zero anyways because our tuples are 8-byte
|
|
|
|
+aligned.)
|
|
|
|
|
|
\begin{figure}[tbp]
|
|
\begin{figure}[tbp]
|
|
\centering \includegraphics[width=0.8\textwidth]{figs/tuple-rep}
|
|
\centering \includegraphics[width=0.8\textwidth]{figs/tuple-rep}
|
|
-\caption{Representation for tuples in the heap.}
|
|
|
|
|
|
+\caption{Representation of tuples in the heap.}
|
|
\label{fig:tuple-rep}
|
|
\label{fig:tuple-rep}
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
\subsection{Implementation of the Garbage Collector}
|
|
\subsection{Implementation of the Garbage Collector}
|
|
\label{sec:organize-gz}
|
|
\label{sec:organize-gz}
|
|
|
|
|
|
-The implementation of the garbage collector needs to do a lot of
|
|
|
|
-bit-level data manipulation and we need to link it with our
|
|
|
|
-compiler-generated x86 code. Thus, we recommend implementing the
|
|
|
|
-garbage collector in C~\citep{Kernighan:1988nx} and putting the code
|
|
|
|
-in the \code{runtime.c} file. Figure~\ref{fig:gc-header} shows the
|
|
|
|
-interface to the garbage collector. The \code{initialize} function
|
|
|
|
-creates the FromSpace, ToSpace, and root stack. The \code{initialize}
|
|
|
|
-function is meant to be called near the beginning of \code{main},
|
|
|
|
-before the rest of the program executes. The \code{initialize}
|
|
|
|
-function puts the address of the beginning of the FromSpace into the
|
|
|
|
-global variable \code{free\_ptr}. The global \code{fromspace\_end}
|
|
|
|
-points to the address that is 1-past the last element of the
|
|
|
|
-FromSpace. (We use half-open intervals to represent chunks of
|
|
|
|
-memory~\citep{Dijkstra:1982aa}.) The \code{rootstack\_begin} global
|
|
|
|
|
|
+An implementation of the copying collector is provided in the
|
|
|
|
+\code{runtime.c} file. Figure~\ref{fig:gc-header} defines the
|
|
|
|
+interface to the garbage collector that is used by the compiler. The
|
|
|
|
+\code{initialize} function creates the FromSpace, ToSpace, and root
|
|
|
|
+stack and should be called in the prelude of the \code{main}
|
|
|
|
+function. The \code{initialize} function puts the address of the
|
|
|
|
+beginning of the FromSpace into the global variable
|
|
|
|
+\code{free\_ptr}. The global variable \code{fromspace\_end} points to
|
|
|
|
+the address that is 1-past the last element of the FromSpace. (We use
|
|
|
|
+half-open intervals to represent chunks of
|
|
|
|
+memory~\citep{Dijkstra:1982aa}.) The \code{rootstack\_begin} variable
|
|
points to the first element of the root stack.
|
|
points to the first element of the root stack.
|
|
|
|
|
|
As long as there is room left in the FromSpace, your generated code
|
|
As long as there is room left in the FromSpace, your generated code
|
|
can allocate tuples simply by moving the \code{free\_ptr} forward.
|
|
can allocate tuples simply by moving the \code{free\_ptr} forward.
|
|
%
|
|
%
|
|
-\margincomment{\tiny Should we dedicate a register to the free pointer? \\
|
|
|
|
---Jeremy}
|
|
|
|
-%
|
|
|
|
The amount of room left in FromSpace is the difference between the
|
|
The amount of room left in FromSpace is the difference between the
|
|
\code{fromspace\_end} and the \code{free\_ptr}. The \code{collect}
|
|
\code{fromspace\_end} and the \code{free\_ptr}. The \code{collect}
|
|
function should be called when there is not enough room left in the
|
|
function should be called when there is not enough room left in the
|
|
@@ -5534,69 +5534,101 @@ succeed.
|
|
\label{fig:gc-header}
|
|
\label{fig:gc-header}
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
-\begin{exercise}
|
|
|
|
- In the file \code{runtime.c} you will find the implementation of
|
|
|
|
- \code{initialize} and a partial implementation of \code{collect}.
|
|
|
|
- The \code{collect} function calls another function, \code{cheney},
|
|
|
|
- to perform the actual copy, and that function is left to the reader
|
|
|
|
- to implement. The following is the prototype for \code{cheney}.
|
|
|
|
-\begin{lstlisting}
|
|
|
|
- static void cheney(int64_t** rootstack_ptr);
|
|
|
|
-\end{lstlisting}
|
|
|
|
- The parameter \code{rootstack\_ptr} is a pointer to the top of the
|
|
|
|
- rootstack (which is an array of pointers). The \code{cheney} function
|
|
|
|
- also communicates with \code{collect} through the global
|
|
|
|
- variables \code{fromspace\_begin} and \code{fromspace\_end}
|
|
|
|
- mentioned in Figure~\ref{fig:gc-header} as well as the pointers for
|
|
|
|
- the ToSpace:
|
|
|
|
-\begin{lstlisting}
|
|
|
|
- static int64_t* tospace_begin;
|
|
|
|
- static int64_t* tospace_end;
|
|
|
|
-\end{lstlisting}
|
|
|
|
- The job of the \code{cheney} function is to copy all the live
|
|
|
|
- objects (reachable from the root stack) into the ToSpace, update
|
|
|
|
- \code{free\_ptr} to point to the next unused spot in the ToSpace,
|
|
|
|
- update the root stack so that it points to the objects in the
|
|
|
|
- ToSpace, and finally to swap the global pointers for the FromSpace
|
|
|
|
- and ToSpace.
|
|
|
|
-\end{exercise}
|
|
|
|
|
|
+%% \begin{exercise}
|
|
|
|
+%% In the file \code{runtime.c} you will find the implementation of
|
|
|
|
+%% \code{initialize} and a partial implementation of \code{collect}.
|
|
|
|
+%% The \code{collect} function calls another function, \code{cheney},
|
|
|
|
+%% to perform the actual copy, and that function is left to the reader
|
|
|
|
+%% to implement. The following is the prototype for \code{cheney}.
|
|
|
|
+%% \begin{lstlisting}
|
|
|
|
+%% static void cheney(int64_t** rootstack_ptr);
|
|
|
|
+%% \end{lstlisting}
|
|
|
|
+%% The parameter \code{rootstack\_ptr} is a pointer to the top of the
|
|
|
|
+%% rootstack (which is an array of pointers). The \code{cheney} function
|
|
|
|
+%% also communicates with \code{collect} through the global
|
|
|
|
+%% variables \code{fromspace\_begin} and \code{fromspace\_end}
|
|
|
|
+%% mentioned in Figure~\ref{fig:gc-header} as well as the pointers for
|
|
|
|
+%% the ToSpace:
|
|
|
|
+%% \begin{lstlisting}
|
|
|
|
+%% static int64_t* tospace_begin;
|
|
|
|
+%% static int64_t* tospace_end;
|
|
|
|
+%% \end{lstlisting}
|
|
|
|
+%% The job of the \code{cheney} function is to copy all the live
|
|
|
|
+%% objects (reachable from the root stack) into the ToSpace, update
|
|
|
|
+%% \code{free\_ptr} to point to the next unused spot in the ToSpace,
|
|
|
|
+%% update the root stack so that it points to the objects in the
|
|
|
|
+%% ToSpace, and finally to swap the global pointers for the FromSpace
|
|
|
|
+%% and ToSpace.
|
|
|
|
+%% \end{exercise}
|
|
|
|
|
|
|
|
|
|
%% \section{Compiler Passes}
|
|
%% \section{Compiler Passes}
|
|
%% \label{sec:code-generation-gc}
|
|
%% \label{sec:code-generation-gc}
|
|
|
|
|
|
The introduction of garbage collection has a non-trivial impact on our
|
|
The introduction of garbage collection has a non-trivial impact on our
|
|
-compiler passes. We introduce one new compiler pass called
|
|
|
|
-\code{expose-allocation} and make non-trivial changes to
|
|
|
|
-\code{type-check}, \code{flatten}, \code{select-instructions},
|
|
|
|
-\code{allocate-registers}, and \code{print-x86}. The following
|
|
|
|
-program will serve as our running example. It creates two tuples, one
|
|
|
|
-nested inside the other. Both tuples have length one. The example then
|
|
|
|
-accesses the element in the inner tuple tuple via two vector
|
|
|
|
|
|
+compiler passes. We introduce two new compiler passes named
|
|
|
|
+\code{expose-allocation} and \code{uncover-locals}. We make
|
|
|
|
+significant changes to \code{select-instructions},
|
|
|
|
+\code{build-interference}, \code{allocate-registers}, and
|
|
|
|
+\code{print-x86} and make minor changes in severl more passes. The
|
|
|
|
+following program will serve as our running example. It creates two
|
|
|
|
+tuples, one nested inside the other. Both tuples have length one. The
|
|
|
|
+program accesses the element in the inner tuple tuple via two vector
|
|
references.
|
|
references.
|
|
% tests/s2_17.rkt
|
|
% tests/s2_17.rkt
|
|
\begin{lstlisting}
|
|
\begin{lstlisting}
|
|
(vector-ref (vector-ref (vector (vector 42)) 0) 0))
|
|
(vector-ref (vector-ref (vector (vector 42)) 0) 0))
|
|
\end{lstlisting}
|
|
\end{lstlisting}
|
|
|
|
|
|
-Next we proceed to discuss the new \code{expose-allocation} pass.
|
|
|
|
|
|
+\section{Shrink}
|
|
|
|
+\label{sec:shrink-R3}
|
|
|
|
+
|
|
|
|
+Recall that the \code{shrink} pass translates the primitives operators
|
|
|
|
+into a smaller set of primitives. Because this pass comes after type
|
|
|
|
+checking, but before the passes that require the type information in
|
|
|
|
+the \code{HasType} AST nodes, the \code{shrink} pass must be modified
|
|
|
|
+to wrap \code{HasType} around each AST node that it generates.
|
|
|
|
+
|
|
|
|
|
|
\section{Expose Allocation}
|
|
\section{Expose Allocation}
|
|
\label{sec:expose-allocation}
|
|
\label{sec:expose-allocation}
|
|
|
|
|
|
The pass \code{expose-allocation} lowers the \code{vector} creation
|
|
The pass \code{expose-allocation} lowers the \code{vector} creation
|
|
form into a conditional call to the collector followed by the
|
|
form into a conditional call to the collector followed by the
|
|
-allocation. We choose to place the \code{expose-allocation} pass
|
|
|
|
-before \code{flatten} because \code{expose-allocation} introduces new
|
|
|
|
-variables, which can be done locally with \code{let}, but \code{let}
|
|
|
|
-is gone after \code{flatten}. In the following, we show the
|
|
|
|
-transformation for the \code{vector} form into let-bindings for the
|
|
|
|
-initializing expressions, by a conditional \code{collect}, an
|
|
|
|
-\code{allocate}, and the initialization of the vector.
|
|
|
|
-(The \itm{len} is the length of the vector and \itm{bytes} is how many
|
|
|
|
-total bytes need to be allocated for the vector, which is 8 for the
|
|
|
|
-tag plus \itm{len} times 8.)
|
|
|
|
|
|
+allocation. We choose to place the \code{expose-allocation} pass
|
|
|
|
+before \code{remove-complex-opera*} because the code generated by
|
|
|
|
+\code{expose-allocation} contains complex operands. We also place
|
|
|
|
+\code{expose-allocation} before \code{explicate-control} because
|
|
|
|
+\code{expose-allocation} introduces new variables using \code{let},
|
|
|
|
+but \code{let} is gone after \code{explicate-control}.
|
|
|
|
|
|
|
|
+The output of \code{expose-allocation} is a language that extends
|
|
|
|
+$R_3$ with the three new forms that we use in the translation of the
|
|
|
|
+\code{vector} form.
|
|
|
|
+\[
|
|
|
|
+\begin{array}{lcl}
|
|
|
|
+ \Exp &::=& \cdots
|
|
|
|
+ \mid (\key{collect} \,\itm{int})
|
|
|
|
+ \mid (\key{allocate} \,\itm{int}\,\itm{type})
|
|
|
|
+ \mid (\key{global-value} \,\itm{name})
|
|
|
|
+\end{array}
|
|
|
|
+\]
|
|
|
|
+The $(\key{collect}\,n)$ form runs the garbage collector, requesting
|
|
|
|
+$n$ bytes. It will become a call to the \code{collect} function in
|
|
|
|
+\code{runtime.c} in \code{select-instructions}. The
|
|
|
|
+$(\key{allocate}\,n\,T)$ form creates an tuple of $n$ elements. The
|
|
|
|
+$T$ parameter is the type of the tuple: \code{(Vector $\Type_1 \ldots
|
|
|
|
+ \Type_n$)} where $\Type_i$ is the type of the $i$th element in the
|
|
|
|
+tuple. The $(\key{global-value}\,\itm{name})$ form reads the value of
|
|
|
|
+a global variable, such as \code{free\_ptr}.
|
|
|
|
+
|
|
|
|
+In the following, we show the transformation for the \code{vector}
|
|
|
|
+form into 1) a sequence of let-bindings for the initializing
|
|
|
|
+expressions, 2) a conditional call to \code{collect}, 3) a call to
|
|
|
|
+\code{allocate}, and 4) the initialization of the vector. In the
|
|
|
|
+following, \itm{len} refers to the length of the vector and
|
|
|
|
+\itm{bytes} is how many total bytes need to be allocated for the
|
|
|
|
+vector, which is 8 for the tag plus \itm{len} times 8.
|
|
\begin{lstlisting}
|
|
\begin{lstlisting}
|
|
(has-type (vector |$e_0 \ldots e_{n-1}$|) |\itm{type}|)
|
|
(has-type (vector |$e_0 \ldots e_{n-1}$|) |\itm{type}|)
|
|
|$\Longrightarrow$|
|
|
|$\Longrightarrow$|
|
|
@@ -5610,34 +5642,12 @@ tag plus \itm{len} times 8.)
|
|
(let ([_ (vector-set! |$v$| |$n-1$| |$x_{n-1}$|)])
|
|
(let ([_ (vector-set! |$v$| |$n-1$| |$x_{n-1}$|)])
|
|
|$v$|) ... )))) ...)
|
|
|$v$|) ... )))) ...)
|
|
\end{lstlisting}
|
|
\end{lstlisting}
|
|
-(In the above, we suppressed all of the \code{has-type} forms in the
|
|
|
|
-output for the sake of readability.) The placement of the initializing
|
|
|
|
-expressions $e_0,\ldots,e_{n-1}$ prior to the \code{allocate} and
|
|
|
|
-the sequence of \code{vector-set!}'s is important, as those expressions
|
|
|
|
-may trigger garbage collection and we do not want an allocated but
|
|
|
|
-uninitialized tuple to be present during a garbage collection.
|
|
|
|
-
|
|
|
|
-The output of \code{expose-allocation} is a language that extends
|
|
|
|
-$R_3$ with the three new forms that we use above in the translation of
|
|
|
|
-\code{vector}.
|
|
|
|
-\[
|
|
|
|
-\begin{array}{lcl}
|
|
|
|
- \Exp &::=& \cdots
|
|
|
|
- \mid (\key{collect} \,\itm{int})
|
|
|
|
- \mid (\key{allocate} \,\itm{int}\,\itm{type})
|
|
|
|
- \mid (\key{global-value} \,\itm{name})
|
|
|
|
-\end{array}
|
|
|
|
-\]
|
|
|
|
-
|
|
|
|
-%% The \code{expose-allocation} inserts an \code{initialize} statement at
|
|
|
|
-%% the beginning of the program which will instruct the garbage collector
|
|
|
|
-%% to set up the FromSpace, ToSpace, and all the global variables. The
|
|
|
|
-%% two arguments of \code{initialize} specify the initial allocated space
|
|
|
|
-%% for the root stack and for the heap.
|
|
|
|
-%
|
|
|
|
-%% The \code{expose-allocation} pass annotates all of the local variables
|
|
|
|
-%% in the \code{program} form with their type.
|
|
|
|
-
|
|
|
|
|
|
+In the above, we suppressed all of the \code{has-type} forms in the
|
|
|
|
+output for the sake of readability. The placement of the initializing
|
|
|
|
+expressions $e_0,\ldots,e_{n-1}$ prior to the \code{allocate} and the
|
|
|
|
+sequence of \code{vector-set!} is important, as those expressions may
|
|
|
|
+trigger garbage collection and we cannot have an allocated but
|
|
|
|
+uninitialized tuple on the heap during a collection.
|
|
|
|
|
|
Figure~\ref{fig:expose-alloc-output} shows the output of the
|
|
Figure~\ref{fig:expose-alloc-output} shows the output of the
|
|
\code{expose-allocation} pass on our running example.
|
|
\code{expose-allocation} pass on our running example.
|
|
@@ -5652,7 +5662,7 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
|
|
(let ([collectret7974
|
|
(let ([collectret7974
|
|
(if (< (+ free_ptr 16) fromspace_end)
|
|
(if (< (+ free_ptr 16) fromspace_end)
|
|
(void)
|
|
(void)
|
|
- (collect 16);
|
|
|
|
|
|
+ (collect 16)
|
|
)])
|
|
)])
|
|
(let ([alloc7971 (allocate 1 (Vector Integer))])
|
|
(let ([alloc7971 (allocate 1 (Vector Integer))])
|
|
(let ([initret7973 (vector-set! alloc7971 0 vecinit7972)])
|
|
(let ([initret7973 (vector-set! alloc7971 0 vecinit7972)])
|
|
@@ -5664,7 +5674,7 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
|
|
(let ([collectret7978
|
|
(let ([collectret7978
|
|
(if (< (+ free_ptr 16) fromspace_end)
|
|
(if (< (+ free_ptr 16) fromspace_end)
|
|
(void)
|
|
(void)
|
|
- (collect 16);
|
|
|
|
|
|
+ (collect 16)
|
|
)])
|
|
)])
|
|
(let ([alloc7975 (allocate 1 (Vector (Vector Integer)))])
|
|
(let ([alloc7975 (allocate 1 (Vector (Vector Integer)))])
|
|
(let ([initret7977 (vector-set! alloc7975 0 vecinit7976)])
|
|
(let ([initret7977 (vector-set! alloc7975 0 vecinit7976)])
|
|
@@ -5681,7 +5691,15 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
|
|
|
|
-%\clearpage
|
|
|
|
|
|
+\section{Remove Complex Operands}
|
|
|
|
+\label{sec:remove-complex-opera-R2}
|
|
|
|
+
|
|
|
|
+The new forms \code{collect}, \code{allocate}, and \code{global-value}
|
|
|
|
+should all be treated as complex operands. A new case for
|
|
|
|
+\code{HasType} is needed and the case for \code{Prim} needs to be
|
|
|
|
+handled carefully to prevent the \code{Prim} node from being separated
|
|
|
|
+from its enclosing \code{HasType}.
|
|
|
|
+
|
|
|
|
|
|
\section{Explicate Control and the $C_2$ language}
|
|
\section{Explicate Control and the $C_2$ language}
|
|
\label{sec:explicate-control-r3}
|
|
\label{sec:explicate-control-r3}
|
|
@@ -5694,29 +5712,30 @@ Figure~\ref{fig:expose-alloc-output} shows the output of the
|
|
\begin{array}{lcl}
|
|
\begin{array}{lcl}
|
|
\Atm &::=& \gray{ \INT{\Int} \mid \VAR{\Var} \mid \BOOL{\itm{bool}} }\\
|
|
\Atm &::=& \gray{ \INT{\Int} \mid \VAR{\Var} \mid \BOOL{\itm{bool}} }\\
|
|
\itm{cmp} &::= & \gray{ \key{eq?} \mid \key{<} } \\
|
|
\itm{cmp} &::= & \gray{ \key{eq?} \mid \key{<} } \\
|
|
-\Exp &::= & \gray{ \Atm \mid \READ{} \mid \NEG{\Atm} \mid \ADD{\Atm}{\Atm} }\\
|
|
|
|
|
|
+\Exp &::= & \gray{ \Atm \mid \READ{} } \\
|
|
|
|
+ &\mid& \gray{ \NEG{\Atm} \mid \ADD{\Atm}{\Atm} }\\
|
|
&\mid& \gray{ \UNIOP{\key{not}}{\Atm} \mid \BINOP{\itm{cmp}}{\Atm}{\Atm} } \\
|
|
&\mid& \gray{ \UNIOP{\key{not}}{\Atm} \mid \BINOP{\itm{cmp}}{\Atm}{\Atm} } \\
|
|
- &\mid& (\key{Allocate} \,\itm{int}\,\itm{type})
|
|
|
|
- \mid \BINOP{\key{'vector-ref}}{\Atm}{\Int} \\
|
|
|
|
|
|
+ &\mid& (\key{Allocate} \,\itm{int}\,\itm{type}) \\
|
|
|
|
+ &\mid& \BINOP{\key{'vector-ref}}{\Atm}{\Int} \\
|
|
&\mid& (\key{Prim}~\key{'vector-set!}\,(\key{list}\,\Atm\,\Int\,\Atm))\\
|
|
&\mid& (\key{Prim}~\key{'vector-set!}\,(\key{list}\,\Atm\,\Int\,\Atm))\\
|
|
- &\mid& (\key{GlobalValue} \,\itm{name}) \mid (\key{Void}) \\
|
|
|
|
-\Stmt &::=& \gray{ \ASSIGN{\VAR{\Var}}{\Exp} \mid \RETURN{\Exp} }
|
|
|
|
|
|
+ &\mid& (\key{GlobalValue} \,\itm{name}) \mid (\key{Void})\\
|
|
|
|
+\Stmt &::=& \gray{ \ASSIGN{\VAR{\Var}}{\Exp} }
|
|
\mid (\key{Collect} \,\itm{int}) \\
|
|
\mid (\key{Collect} \,\itm{int}) \\
|
|
-\Tail &::= & \gray{ \RETURN{\Exp} \mid \SEQ{\Stmt}{\Tail} }\\
|
|
|
|
- &\mid& \gray{ \GOTO{\itm{label}} }\\
|
|
|
|
|
|
+\Tail &::= & \gray{ \RETURN{\Exp} \mid \SEQ{\Stmt}{\Tail}
|
|
|
|
+ \mid \GOTO{\itm{label}} } \\
|
|
&\mid& \gray{ \IFSTMT{\BINOP{\itm{cmp}}{\Atm}{\Atm}}{\GOTO{\itm{label}}}{\GOTO{\itm{label}}} }\\
|
|
&\mid& \gray{ \IFSTMT{\BINOP{\itm{cmp}}{\Atm}{\Atm}}{\GOTO{\itm{label}}}{\GOTO{\itm{label}}} }\\
|
|
C_2 & ::= & \PROGRAM{\itm{info}}{\CFG{(\itm{label}\,\key{.}\,\Tail)^{+}}}
|
|
C_2 & ::= & \PROGRAM{\itm{info}}{\CFG{(\itm{label}\,\key{.}\,\Tail)^{+}}}
|
|
\end{array}
|
|
\end{array}
|
|
\]
|
|
\]
|
|
\end{minipage}
|
|
\end{minipage}
|
|
}
|
|
}
|
|
-\caption{The abstract syntax of the $C_2$ language.
|
|
|
|
- TODO: UPDATE}
|
|
|
|
|
|
+\caption{The abstract syntax $C_2$, an extention of $C_1$
|
|
|
|
+ (Figure~\ref{fig:c1-syntax}).}
|
|
\label{fig:c2-syntax}
|
|
\label{fig:c2-syntax}
|
|
\end{figure}
|
|
\end{figure}
|
|
|
|
|
|
The output of \code{explicate-control} is a program in the
|
|
The output of \code{explicate-control} is a program in the
|
|
-intermediate language $C_2$, whose syntax is defined in
|
|
|
|
|
|
+intermediate language $C_2$, whose abstract syntax is defined in
|
|
Figure~\ref{fig:c2-syntax}. The new forms of $C_2$ include the
|
|
Figure~\ref{fig:c2-syntax}. The new forms of $C_2$ include the
|
|
\key{allocate}, \key{vector-ref}, and \key{vector-set!}, and
|
|
\key{allocate}, \key{vector-ref}, and \key{vector-set!}, and
|
|
\key{global-value} expressions and the \code{collect} statement. The
|
|
\key{global-value} expressions and the \code{collect} statement. The
|
|
@@ -5733,25 +5752,26 @@ the \code{Program} structure. Also recall that we need to know the
|
|
types of all the local variables for purposes of identifying the root
|
|
types of all the local variables for purposes of identifying the root
|
|
set for the garbage collector. Thus, we create a pass named
|
|
set for the garbage collector. Thus, we create a pass named
|
|
\code{uncover-locals} to collect not just the variables but the
|
|
\code{uncover-locals} to collect not just the variables but the
|
|
-variables and their types in the form of an alist. Thanks
|
|
|
|
-to the \code{HasType} nodes, the types are readily available in the
|
|
|
|
-AST. Figure~\ref{fig:uncover-locals-r3} lists the output of the
|
|
|
|
-\code{uncover-locals} pass on the running example.
|
|
|
|
|
|
+variables and their types in the form of an alist. Thanks to the
|
|
|
|
+\code{HasType} nodes, the types are readily available at every
|
|
|
|
+assignment to a variable. We recommend storing the resulting alist in
|
|
|
|
+the $\itm{info}$ field of the program, associated with the
|
|
|
|
+\code{locals} key. Figure~\ref{fig:uncover-locals-r3} lists the output
|
|
|
|
+of the \code{uncover-locals} pass on the running example.
|
|
|
|
|
|
\begin{figure}[tbp]
|
|
\begin{figure}[tbp]
|
|
% tests/s2_17.rkt
|
|
% tests/s2_17.rkt
|
|
\begin{lstlisting}
|
|
\begin{lstlisting}
|
|
-program:
|
|
|
|
locals:
|
|
locals:
|
|
vecinit7976 : '(Vector Integer), tmp7980 : 'Integer,
|
|
vecinit7976 : '(Vector Integer), tmp7980 : 'Integer,
|
|
alloc7975 : '(Vector (Vector Integer)), tmp7983 : 'Integer,
|
|
alloc7975 : '(Vector (Vector Integer)), tmp7983 : 'Integer,
|
|
collectret7974 : 'Void, initret7977 : 'Void,
|
|
collectret7974 : 'Void, initret7977 : 'Void,
|
|
collectret7978 : 'Void, tmp7985 : '(Vector Integer),
|
|
collectret7978 : 'Void, tmp7985 : '(Vector Integer),
|
|
tmp7984 : 'Integer, tmp7979 : 'Integer, tmp7982 : 'Integer,
|
|
tmp7984 : 'Integer, tmp7979 : 'Integer, tmp7982 : 'Integer,
|
|
- alloc7971 : '(Vector Integer), tmp7981 : 'Integer, vecinit7972 : 'Integer,
|
|
|
|
- initret7973 : 'Void,
|
|
|
|
|
|
+ alloc7971 : '(Vector Integer), tmp7981 : 'Integer,
|
|
|
|
+ vecinit7972 : 'Integer, initret7973 : 'Void,
|
|
block7991:
|
|
block7991:
|
|
- (collect 16);
|
|
|
|
|
|
+ (collect 16)
|
|
goto block7989;
|
|
goto block7989;
|
|
block7990:
|
|
block7990:
|
|
collectret7974 = (void);
|
|
collectret7974 = (void);
|
|
@@ -5768,7 +5788,7 @@ block7989:
|
|
else
|
|
else
|
|
goto block7988;
|
|
goto block7988;
|
|
block7988:
|
|
block7988:
|
|
- (collect 16);
|
|
|
|
|
|
+ (collect 16)
|
|
goto block7986;
|
|
goto block7986;
|
|
block7987:
|
|
block7987:
|
|
collectret7978 = (void);
|
|
collectret7978 = (void);
|
|
@@ -6151,30 +6171,30 @@ _conclusion:
|
|
\node (R3-2) at (3,2) {\large $R_3$};
|
|
\node (R3-2) at (3,2) {\large $R_3$};
|
|
\node (R3-3) at (6,2) {\large $R_3$};
|
|
\node (R3-3) at (6,2) {\large $R_3$};
|
|
\node (R3-4) at (9,2) {\large $R_3$};
|
|
\node (R3-4) at (9,2) {\large $R_3$};
|
|
-\node (R3-5) at (12,2) {\large $R_3$};
|
|
|
|
-\node (R3-6) at (12,0) {\large $R_3$};
|
|
|
|
-\node (C2-4) at (3,0) {\large $C_2$};
|
|
|
|
-\node (C2-3) at (6,0) {\large $C_2$};
|
|
|
|
|
|
+\node (R3-5) at (9,0) {\large $R_3$};
|
|
|
|
+\node (R3-6) at (6,0) {\large $R_3$};
|
|
|
|
+\node (C2-4) at (3,-2) {\large $C_2$};
|
|
|
|
+\node (C2-3) at (0,-2) {\large $C_2$};
|
|
|
|
|
|
-\node (x86-2) at (3,-2) {\large $\text{x86}^{*}_2$};
|
|
|
|
-\node (x86-3) at (6,-2) {\large $\text{x86}^{*}_2$};
|
|
|
|
-\node (x86-4) at (9,-2) {\large $\text{x86}^{*}_2$};
|
|
|
|
-\node (x86-5) at (9,-4) {\large $\text{x86}^{\dagger}_2$};
|
|
|
|
|
|
+\node (x86-2) at (3,-4) {\large $\text{x86}^{*}_2$};
|
|
|
|
+\node (x86-3) at (6,-4) {\large $\text{x86}^{*}_2$};
|
|
|
|
+\node (x86-4) at (9,-4) {\large $\text{x86}^{*}_2$};
|
|
|
|
+\node (x86-5) at (9,-6) {\large $\text{x86}^{\dagger}_2$};
|
|
|
|
|
|
-\node (x86-2-1) at (3,-4) {\large $\text{x86}^{*}_2$};
|
|
|
|
-\node (x86-2-2) at (6,-4) {\large $\text{x86}^{*}_2$};
|
|
|
|
|
|
+\node (x86-2-1) at (3,-6) {\large $\text{x86}^{*}_2$};
|
|
|
|
+\node (x86-2-2) at (6,-6) {\large $\text{x86}^{*}_2$};
|
|
|
|
|
|
\path[->,bend left=15] (R3) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R3-2);
|
|
\path[->,bend left=15] (R3) edge [above] node {\ttfamily\footnotesize\color{red} typecheck} (R3-2);
|
|
\path[->,bend left=15] (R3-2) edge [above] node {\ttfamily\footnotesize shrink} (R3-3);
|
|
\path[->,bend left=15] (R3-2) edge [above] node {\ttfamily\footnotesize shrink} (R3-3);
|
|
\path[->,bend left=15] (R3-3) edge [above] node {\ttfamily\footnotesize uniquify} (R3-4);
|
|
\path[->,bend left=15] (R3-3) edge [above] node {\ttfamily\footnotesize uniquify} (R3-4);
|
|
-\path[->,bend left=15] (R3-4) edge [above] node {\ttfamily\footnotesize\color{red} expose-alloc.} (R3-5);
|
|
|
|
-\path[->,bend left=15] (R3-5) edge [right] node {\ttfamily\footnotesize remove-complex.} (R3-6);
|
|
|
|
-\path[->,bend right=20] (R3-6) edge [above] node {\ttfamily\footnotesize explicate-control} (C2-3);
|
|
|
|
-\path[->,bend right=15] (C2-3) edge [above] node {\ttfamily\footnotesize\color{red} uncover-locals} (C2-4);
|
|
|
|
-\path[->,bend right=15] (C2-4) edge [left] node {\ttfamily\footnotesize\color{red} select-instr.} (x86-2);
|
|
|
|
-\path[->,bend left=15] (x86-2) edge [right] node {\ttfamily\footnotesize uncover-live} (x86-2-1);
|
|
|
|
-\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize \color{red}build-inter.} (x86-2-2);
|
|
|
|
-\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize allocate-reg.} (x86-3);
|
|
|
|
|
|
+\path[->,bend left=15] (R3-4) edge [right] node {\ttfamily\footnotesize\color{red} expose-alloc.} (R3-5);
|
|
|
|
+\path[->,bend left=15] (R3-5) edge [below] node {\ttfamily\footnotesize remove-complex.} (R3-6);
|
|
|
|
+\path[->,bend right=20] (R3-6) edge [left] node {\ttfamily\footnotesize explicate-control} (C2-3);
|
|
|
|
+\path[->,bend right=15] (C2-3) edge [below] node {\ttfamily\footnotesize\color{red} uncover-locals} (C2-4);
|
|
|
|
+\path[->,bend left=15] (C2-4) edge [right] node {\ttfamily\footnotesize\color{red} select-instr.} (x86-2);
|
|
|
|
+\path[->,bend right=15] (x86-2) edge [left] node {\ttfamily\footnotesize uncover-live} (x86-2-1);
|
|
|
|
+\path[->,bend right=15] (x86-2-1) edge [below] node {\ttfamily\footnotesize\color{red} build-inter.} (x86-2-2);
|
|
|
|
+\path[->,bend right=15] (x86-2-2) edge [right] node {\ttfamily\footnotesize\color{red} allocate-reg.} (x86-3);
|
|
\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize patch-instr.} (x86-4);
|
|
\path[->,bend left=15] (x86-3) edge [above] node {\ttfamily\footnotesize patch-instr.} (x86-4);
|
|
\path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize\color{red} print-x86} (x86-5);
|
|
\path[->,bend left=15] (x86-4) edge [right] node {\ttfamily\footnotesize\color{red} print-x86} (x86-5);
|
|
\end{tikzpicture}
|
|
\end{tikzpicture}
|