3 年之前 · bf3f3f5502
--- a/book.bib
+++ b/book.bib
@@ -1,3 +1,93 @@
 
															+@article{Logothetis:1981,
														
 
															+author = {Logothetis, George and Mishra, Prateek},
														
 
															+title = {Compiling short-circuit boolean expressions in one pass},
														
 
															+journal = {Software: Practice and Experience},
														
 
															+volume = {11},
														
 
															+number = {11},
														
 
															+pages = {1197-1214},
														
 
															+keywords = {Short-circuit evaluation, One-pass compilation, Boolean expressions, Code generation},
														
 
															+doi = {https://doi.org/10.1002/spe.4380111104},
														
 
															+url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/spe.4380111104},
														
 
															+eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/spe.4380111104},
														
 
															+abstract = {Abstract We present a very simple scheme for compiling boolean expressions in the short-circuit manner in one pass. The generated code is of very high quality and avoids most inefficiencies commonly associated with one-pass code generation. In particular, redundant conditional and unconditional branches are kept to a minimum. The scheme is general enough to compile the boolean expressions of a typical high-level language such as Pascal. It is presented in a format suited for syntax-directed translation and can be used with both top-down and bottom-up parsing.},
														
 
															+year = {1981}
														
 
															+}
														
 
															+
														
 
															+@article{Clarke:1989,
														
 
															+author = {Clarke, Keith},
														
 
															+title = {One-Pass Code Generation Using Continuations},
														
 
															+year = {1989},
														
 
															+issue_date = {Dec. 1989},
														
 
															+publisher = {John Wiley & Sons, Inc.},
														
 
															+address = {USA},
														
 
															+volume = {19},
														
 
															+number = {12},
														
 
															+issn = {0038-0644},
														
 
															+journal = {Softw. Pract. Exper.},
														
 
															+month = nov,
														
 
															+pages = {1175–1192},
														
 
															+numpages = {18}
														
 
															+}
														
 
															+
														
 
															+@article{Moggi:1991in,
														
 
															+	address = {Duluth, MN, USA},
														
 
															+	annote = {Journal version of the 1989 Computational Lambda-Calculus and Monads},
														
 
															+	author = {Eugenio Moggi},
														
 
															+	date-added = {2005-11-25 10:58:45 -0600},
														
 
															+	date-modified = {2010-12-17 10:23:11 -0700},
														
 
															+	issn = {0890-5401},
														
 
															+	journal = {Inf. Comput.},
														
 
															+	number = {1},
														
 
															+	pages = {55--92},
														
 
															+	publisher = {Academic Press, Inc.},
														
 
															+	title = {Notions of computation and monads},
														
 
															+	volume = {93},
														
 
															+	year = {1991},
														
 
															+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoWGljOTEucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACE=},
														
 
															+	Bdsk-Url-1 = {http://dx.doi.org/10.1016/0890-5401(91)90052-4}}
														
 
															+
														
 
															+@article{Flatt:2019tb,
														
 
															+	abstract = {We rebuilt Racket on Chez Scheme, and it works well---as long as we're allowed
														
 
															+a few patches to Chez Scheme. DrRacket runs, the Racket distribution can build itself,
														
 
															+and nearly all of the core Racket test suite passes. Maintainability and performance
														
 
															+of the resulting implementation are good, although some work remains to improve end-to-end
														
 
															+performance. The least predictable part of our effort was how big the differences
														
 
															+between Racket and Chez Scheme would turn out to be and how we would manage those
														
 
															+differences. We expect Racket on Chez Scheme to become the main Racket implementation,
														
 
															+and we encourage other language implementers to consider Chez Scheme as a target virtual
														
 
															+machine.},
														
 
															+	address = {New York, NY, USA},
														
 
															+	articleno = {78},
														
 
															+	author = {Flatt, Matthew and Derici, Caner and Dybvig, R. Kent and Keep, Andrew W. and Massaccesi, Gustavo E. and Spall, Sarah and Tobin-Hochstadt, Sam and Zeppieri, Jon},
														
 
															+	date-added = {2021-10-21 14:03:11 -0400},
														
 
															+	date-modified = {2021-10-21 14:03:16 -0400},
														
 
															+	doi = {10.1145/3341642},
														
 
															+	issue_date = {August 2019},
														
 
															+	journal = {Proc. ACM Program. Lang.},
														
 
															+	keywords = {Racket, Scheme},
														
 
															+	month = jul,
														
 
															+	number = {ICFP},
														
 
															+	numpages = {15},
														
 
															+	publisher = {Association for Computing Machinery},
														
 
															+	title = {Rebuilding Racket on Chez Scheme (Experience Report)},
														
 
															+	url = {https://doi.org/10.1145/3341642},
														
 
															+	volume = {3},
														
 
															+	year = {2019},
														
 
															+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoWzMzNDE2NDIucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACQ=},
														
 
															+	Bdsk-Url-1 = {https://doi.org/10.1145/3341642}}
														
 
															+
														
 
															+@incollection{Danvy:2003fk,
														
 
															+	author = {Danvy, Olivier},
														
 
															+	booktitle = {Compiler Construction},
														
 
															+	date-added = {2013-01-02 15:56:48 -0700},
														
 
															+	date-modified = {2013-01-02 15:58:19 -0700},
														
 
															+	pages = {77-89},
														
 
															+	series = {LNCS},
														
 
															+	title = {A New One-Pass Transformation into Monadic Normal Form},
														
 
															+	volume = {2622},
														
 
															+	year = {2003},
														
 
															+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoXxA0RGFudnkyMDAzX0NoYXB0ZXJfQU5ld09uZS1QYXNzVHJhbnNmb3JtYXRpb25JbnRvLnBkZggLGAAAAAAAAAEBAAAAAAAAAAMAAAAAAAAAAAAAAAAAAABP},
														
 
															+	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-36579-6_6}}
														
 
															 @article{PeytonJones:1998,
														
 
															 	author = {Simon L. {Peyton Jones} and Andr{\'e}L.M. Santos},
														
--- a/book.tex
+++ b/book.tex
@@ -3034,14 +3034,31 @@ print(tmp_1)
 
															 \label{fig:Lvar-anf-syntax}
														
 
															 \end{figure}
														
 
															-Figure~\ref{fig:Lvar-anf-syntax} presents the grammar for the output of
														
 
															-this pass, the language \LangVarANF{}. The only difference is that
														
 
															+Figure~\ref{fig:Lvar-anf-syntax} presents the grammar for the output
														
 
															+of this pass, the language \LangVarANF{}. The only difference is that
														
 
															 operator arguments are restricted to be atomic expressions that are
														
 
															 defined by the \Atm{} non-terminal. In particular, integer constants
														
 
															-and variables are atomic. In the literature, restricting arguments to
														
 
															-be atomic expressions is one of the ideas in \emph{administrative
														
 
															-normal form}, or ANF for short~\citep{Danvy:1991fk,Flanagan:1993cg}.
														
 
															+and variables are atomic. This restriction brings us closer to what is
														
 
															+known as a \emph{three-address code}~\citep{Aho:1986qf} language.
														
 
															+
														
 
															+The atomic expressions are pure (they do not cause side-effects or
														
 
															+depend on them) whereas complex expressions may have side effects,
														
 
															+such as \READ{}.  A language with this separation between pure versus
														
 
															+side-effecting expressions is said to be in monadic normal
														
 
															+form~\citep{Moggi:1991in,Danvy:2003fk} which explains the \textit{mon}
														
 
															+in \LangVarANF{}. An important invariant of the
														
 
															+\code{remove\_complex\_operands} pass is that the relative ordering
														
 
															+among complex expressions is not changed, but the relative ordering
														
 
															+between atomic expressions and complex expressions can change and
														
 
															+often does. The reason that these changes are behaviour preserving is
														
 
															+that the atomic expressions are pure.
														
 
															+
														
 
															+Another well-known form is the \emph{administrative normal form}
														
 
															+(ANF)~\citep{Danvy:1991fk,Flanagan:1993cg}.
														
 
															 \index{subject}{administrative normal form} \index{subject}{ANF}
														
 
															+%
														
 
															+The \LangVarANF{} language is not quite in ANF because we allow the
														
 
															+right-hand side of a \code{let} to be a complex expression.
														
 
															 {\if\edition\racketEd
														
 
															 We recommend implementing this pass with two mutually recursive
														
@@ -9267,11 +9284,13 @@ blocks on several test programs.
 
															 \label{sec:cond-further-reading}
														
 
															 The algorithm for the \code{explicate\_control} pass comes from the
														
 
															-course notes of \citet{Dybvig:2010aa}. The use of lazy evaluation in
														
 
															-Section~\ref{sec:opt-jumps} to optimize basic blocks is new.  There
														
 
															-are algorithms similar to \code{explicate\_control} in the literature,
														
 
															-such as the case-of-case transformation of \citet{PeytonJones:1998}.
														
 
															-
														
 
															+course notes of \citet{Dybvig:2010aa} and it has several similarities
														
 
															+to an algorithm of \citet{Danvy:2003fk}. The use of lazy evaluation in
														
 
															+Section~\ref{sec:opt-jumps} to prevent the generation of unused basic
														
 
															+blocks appears to be new. The treatment of conditionals in the
														
 
															+\code{explicate\_control} pass is similar to the case-of-case
														
 
															+transformation of \citet{PeytonJones:1998} and to short-cut boolean
														
 
															+evaluation~\citep{Logothetis:1981,Aho:1986qf,Clarke:1989,Danvy:2003fk}.
														
 
															 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
														
 
															 \chapter{Loops and Dataflow Analysis}
														
@@ -9730,12 +9749,13 @@ we are stuck.
 
															 The way out of this impasse is to realize that we can compute an
														
 
															 under-approximation of the live-before set by starting with empty
														
 
															 live-after sets.  By \emph{under-approximation}, we mean that the set
														
 
															-only contains variables that are really live, but it may be missing
														
 
															-some.  Next, the under-approximations for each block can be improved
														
 
															-by 1) updating the live-after set for each block using the approximate
														
 
															-live-before sets from the other blocks and 2) perform liveness
														
 
															-analysis again on each block.  In fact, by iterating this process, the
														
 
															-under-approximations eventually become the correct solutions!
														
 
															+only contains variables that are live for some execution of the
														
 
															+program, but the set may be missing some variables.  Next, the
														
 
															+under-approximations for each block can be improved by 1) updating the
														
 
															+live-after set for each block using the approximate live-before sets
														
 
															+from the other blocks and 2) perform liveness analysis again on each
														
 
															+block.  In fact, by iterating this process, the under-approximations
														
 
															+eventually become the correct solutions!
														
 
															 %
														
 
															 This approach of iteratively analyzing a control-flow graph is
														
 
															 applicable to many static analysis problems and goes by the name
														
@@ -9749,10 +9769,7 @@ following mapping from label names to sets of locations (variables and
 
															 registers).
														
 
															 \begin{center}
														
 
															 \begin{lstlisting}
														
 
															-mainstart: {}
														
 
															-block5: {}
														
 
															-block7: {}
														
 
															-block8: {}
														
 
															+mainstart: {}, block5: {}, block7: {}, block8: {}
														
 
															 \end{lstlisting}
														
 
															 \end{center}
														
 
															 Using the above live-before approximations, we determine the
														
@@ -9761,10 +9778,7 @@ block.  This produces our next approximation $m_1$ of the live-before
 
															 sets.
														
 
															 \begin{center}
														
 
															   \begin{lstlisting}
														
 
															-mainstart: {}
														
 
															-block5: {i}
														
 
															-block7: {i, sum}
														
 
															-block8: {rsp, sum}
														
 
															+mainstart: {}, block5: {i}, block7: {i, sum}, block8: {rsp, sum}
														
 
															 \end{lstlisting}
														
 
															 \end{center}
														
@@ -9781,10 +9795,7 @@ So the liveness analysis for \code{block7} remains \code{\{i,
 
															 the live-before sets.
														
 
															 \begin{center}
														
 
															   \begin{lstlisting}
														
 
															-mainstart: {}
														
 
															-block5: {i, rsp, sum}
														
 
															-block7: {i, sum}
														
 
															-block8: {rsp, sum}
														
 
															+mainstart: {}, block5: {i, rsp, sum}, block7: {i, sum}, block8: {rsp, sum}
														
 
															 \end{lstlisting}
														
 
															 \end{center}
														
 
															 In the preceding iteration, only \code{block5} changed, so we can
														
@@ -9794,14 +9805,11 @@ for \code{mainstart} and \code{block7} are updated to include
 
															 \code{rsp}, yielding the following approximation $m_3$.
														
 
															 \begin{center}
														
 
															   \begin{lstlisting}
														
 
															-mainstart: {rsp}
														
 
															-block5: {i, rsp, sum}
														
 
															-block7: {i, rsp, sum}
														
 
															-block8: {rsp, sum}
														
 
															+mainstart: {rsp}, block5: {i,rsp,sum}, block7: {i,rsp,sum}, block8: {rsp,sum}
														
 
															 \end{lstlisting}
														
 
															 \end{center}
														
 
															 Because \code{block7} changed, we analyze \code{block5} once more, but
														
 
															-its live-before set remains \code{\{ i, rsp, sum \}}.  At this point
														
 
															+its live-before set remains \code{\{i,rsp,sum\}}.  At this point
														
 
															 our approximations have converged, so $m_3$ is the solution.
														
 
															 This iteration process is guaranteed to converge to a solution by the
														
@@ -9836,9 +9844,9 @@ two mappings $m_i$ and $m_j$, $m_i \sqsubseteq_M m_j$ when $m_i(\ell)
 
															 bottom element of $M$ is the mapping $\bot_M$ that sends every label
														
 
															 to the empty set, i.e., $\bot_M(\ell) = \emptyset$.
														
 
															-We can think of one iteration of liveness analysis as being a function
														
 
															-$f$ on the lattice $M$. It takes a mapping as input and computes a new
														
 
															-mapping.
														
 
															+We can think of one iteration of liveness analysis applied to the
														
 
															+whole program as being a function $f$ on the lattice $M$. It takes a
														
 
															+mapping as input and computes a new mapping.
														
 
															 \[
														
 
															    f(m_i) = m_{i+1}
														
 
															 \]
														
@@ -9863,7 +9871,7 @@ follows.\index{subject}{Kleene Fixed-Point Theorem}
 
															   \sqsubseteq f^n(\bot) \sqsubseteq \cdots
														
 
															 \]
														
 
															 When a lattice contains only finitely-long ascending chains, then
														
 
															-every Kleene chain tops out at some fixed point after a number of
														
 
															+every Kleene chain tops out at some fixed point after some number of
														
 
															 iterations of $f$.
														
 
															 \[
														
 
															 \bot \sqsubseteq f(\bot) \sqsubseteq f(f(\bot)) \sqsubseteq \cdots
														
@@ -9894,6 +9902,13 @@ state. If the output differs from the previous state for this block,
 
															 the mapping for this block is updated and its successor nodes are
														
 
															 pushed onto the work list.
														
 
															+Note that the \code{analyze\_dataflow} function is formulated as a
														
 
															+\emph{forward} dataflow analysis, that is, the inputs to the transfer
														
 
															+function come from the predecessor nodes in the control-flow
														
 
															+graph. However, liveness analysis is a \emph{backward} dataflow
														
 
															+analysis, so in that case one must supply the \code{analyze\_dataflow}
														
 
															+function with the transpose of the control-flow graph.
														
 
															+
														
 
															 \begin{figure}[tb]
														
 
															 {\if\edition\racketEd    
														
 
															 \begin{lstlisting}
														
@@ -10016,19 +10031,24 @@ modification to \code{remove\_complex\_operands} to handle
 
															 \code{uncover-get!}, that we discuss in
														
 
															 Section~\ref{sec:uncover-get-bang}.
														
 
															-As an aside, this problematic interaction between \code{set!}  and
														
 
															-\code{remove\_complex\_operands} is particular to Racket and not its
														
 
															-predecessor, the Scheme language. The key difference is that Scheme
														
 
															-does not specify an order of evaluation for the arguments of an
														
 
															-operator or function call. Thus, a compiler for Scheme is free to
														
 
															-choose any ordering: both \code{42} and \code{80} would be correct
														
 
															-results for the example program.
														
 
															+As an aside, this problematic interaction between \code{set!} and the
														
 
															+pass \code{remove\_complex\_operands} is particular to Racket and not
														
 
															+its predecessor, the Scheme language. The key difference is that
														
 
															+Scheme does not specify an order of evaluation for the arguments of an
														
 
															+operator or function call~\citep{SPERBER:2009aa}. Thus, a compiler for
														
 
															+Scheme is free to choose any ordering: both \code{42} and \code{80}
														
 
															+would be correct results for the example program. Interestingly,
														
 
															+Racket is implemented on top of the Chez Scheme
														
 
															+compiler~\citep{Dybvig:2006aa} and an approach similar to the one
														
 
															+presented in this section (using extra \code{let} bindings to control
														
 
															+the order of evaluation) is used in the translation from Racket to
														
 
															+Scheme~\citep{Flatt:2019tb}.
														
 
															 \fi} % racket
														
 
															 Having discussed the complications that arise from adding support for
														
 
															-assignment and loops, we turn to discussing the significant changes to
														
 
															-existing passes.
														
 
															+assignment and loops, we turn to discussing the individual compilation
														
 
															+passes.
														
 
															 {\if\edition\racketEd
														
@@ -10036,8 +10056,9 @@ existing passes.
 
															 \label{sec:uncover-get-bang}
														
 
															 The goal of this pass it to mark uses of mutable variables so that
														
 
															-\code{remove\_complex\_operands} can treat them as complex
														
 
															-expressions. So the first step is to collect all the mutable
														
 
															+\code{remove\_complex\_operands} can treat them as complex expressions
														
 
															+and thereby preserve their ordering relative to the side-effects in
														
 
															+other operands. So the first step is to collect all the mutable
														
 
															 variables. We recommend creating an auxilliary function for this,
														
 
															 named \code{collect-set!}, that recursively traverses expressions,
														
 
															 returning a set of all variables that occur on the left-hand side of a
														
--- a/defs.tex
+++ b/defs.tex
@@ -11,12 +11,14 @@
 
															 \newcommand{\LangVar}{$\Lang_{\mathsf{Var}}$} % R1
														
 
															 \newcommand{\LangVarM}{\Lang_{\mathsf{Var}}}
														
 
															-\newcommand{\LangVarANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Var}}}}
														
 
															-\newcommand{\LangVarANFM}{\Lang^{\mathsf{ANF}}_{\mathsf{Var}}}
														
 
															+\newcommand{\RCO}{\mathit{mon}} % output of remove-complex-opera*
														
 
															+
														
 
															+\newcommand{\LangVarANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Var}}}}
														
 
															+\newcommand{\LangVarANFM}{\Lang^{\RCO}_{\mathsf{Var}}}
														
 
															 \newcommand{\LangIf}{$\Lang_{\mathsf{If}}$} %R2
														
 
															 \newcommand{\LangIfM}{\ensuremath{\Lang_{\mathsf{If}}}} %R2
														
 
															-\newcommand{\LangIfANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{if}}}} %R2
														
 
															+\newcommand{\LangIfANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{if}}}} %R2
														
 
															 \newcommand{\LangCVar}{$\CLang_{\mathsf{Var}}$} % C0
														
 
															 \newcommand{\LangCVarM}{\CLang_{\mathsf{Var}}} % C0
														
@@ -27,14 +29,14 @@
 
															 \newcommand{\LangStruct}{\ensuremath{\Lang^{\mathsf{Struct}}_{\mathsf{Tup}}}} %\Lang^s3
														
 
															 \newcommand{\LangCVec}{$\CLang_{\mathsf{Tup}}$} %C2
														
 
															 \newcommand{\LangCVecM}{\CLang_{\mathsf{Tup}}} %C2
														
 
															-\newcommand{\LangVecANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Tup}}}} %R3
														
 
															-\newcommand{\LangVecANFM}{\Lang^{\mathsf{ANF}}_{\mathsf{Tup}}} %R3
														
 
															+\newcommand{\LangVecANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Tup}}}} %R3
														
 
															+\newcommand{\LangVecANFM}{\Lang^{\RCO}_{\mathsf{Tup}}} %R3
														
 
															 \newcommand{\LangAlloc}{\ensuremath{\Lang_{\mathsf{Alloc}}}} %R3'
														
 
															 \newcommand{\LangFun}{$\Lang_{\mathsf{Fun}}$} %R4
														
 
															 \newcommand{\LangFunM}{\Lang_{\mathsf{Fun}}} %R4
														
 
															 \newcommand{\LangCFun}{$\CLang_{\mathsf{Fun}}$} %C3
														
 
															 \newcommand{\LangCFunM}{\CLang_{\mathsf{Fun}}} %C3
														
 
															-\newcommand{\LangFunANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Fun}}}} %R4
														
 
															+\newcommand{\LangFunANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Fun}}}} %R4
														
 
															 \newcommand{\LangFunRef}{$\Lang_{\mathsf{FunRef}}$} %F1
														
 
															 \newcommand{\LangFunRefM}{\Lang_{\mathsf{FunRef}}} %F1
														
 
															 \newcommand{\LangFunRefAlloc}{\ensuremath{\Lang^{\mathsf{Alloc}}_{\mathsf{FunRef}}}} %R'4
														
@@ -57,7 +59,7 @@
 
															 \newcommand{\LangLoopAlloc}{\ensuremath{\Lang^{\mathsf{Alloc}}_{\mathsf{While}}}} %R'8
														
 
															 \newcommand{\LangCLoop}{$\CLang_{\circlearrowleft}$} %C7
														
 
															 \newcommand{\LangCLoopM}{\CLang_{\circlearrowleft}} %C7
														
 
															-\newcommand{\LangLoopANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{While}}}} %R8
														
 
															+\newcommand{\LangLoopANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{While}}}} %R8
														
 
															 \newcommand{\LangArray}{\ensuremath{\Lang^{\mathsf{Vecof}}_{\mathsf{While}}}} %\Lang^s3
														
 
															 \newcommand{\LangGrad}{$\Lang_{\mathsf{?}}$} %R9
														
 
															 \newcommand{\LangGradM}{\Lang_{\mathsf{?}}} %R9