3 年之前 · bf3f3f5502
--- a/book.bib
+++ b/book.bib
@@ -1,3 +1,93 @@
 
				+@article{Logothetis:1981,
			
 
				+author = {Logothetis, George and Mishra, Prateek},
			
 
				+title = {Compiling short-circuit boolean expressions in one pass},
			
 
				+journal = {Software: Practice and Experience},
			
 
				+volume = {11},
			
 
				+number = {11},
			
 
				+pages = {1197-1214},
			
 
				+keywords = {Short-circuit evaluation, One-pass compilation, Boolean expressions, Code generation},
			
 
				+doi = {https://doi.org/10.1002/spe.4380111104},
			
 
				+url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/spe.4380111104},
			
 
				+eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/spe.4380111104},
			
 
				+abstract = {Abstract We present a very simple scheme for compiling boolean expressions in the short-circuit manner in one pass. The generated code is of very high quality and avoids most inefficiencies commonly associated with one-pass code generation. In particular, redundant conditional and unconditional branches are kept to a minimum. The scheme is general enough to compile the boolean expressions of a typical high-level language such as Pascal. It is presented in a format suited for syntax-directed translation and can be used with both top-down and bottom-up parsing.},
			
 
				+year = {1981}
			
 
				+}
			
 
				+
			
 
				+@article{Clarke:1989,
			
 
				+author = {Clarke, Keith},
			
 
				+title = {One-Pass Code Generation Using Continuations},
			
 
				+year = {1989},
			
 
				+issue_date = {Dec. 1989},
			
 
				+publisher = {John Wiley & Sons, Inc.},
			
 
				+address = {USA},
			
 
				+volume = {19},
			
 
				+number = {12},
			
 
				+issn = {0038-0644},
			
 
				+journal = {Softw. Pract. Exper.},
			
 
				+month = nov,
			
 
				+pages = {1175–1192},
			
 
				+numpages = {18}
			
 
				+}
			
 
				+
			
 
				+@article{Moggi:1991in,
			
 
				+	address = {Duluth, MN, USA},
			
 
				+	annote = {Journal version of the 1989 Computational Lambda-Calculus and Monads},
			
 
				+	author = {Eugenio Moggi},
			
 
				+	date-added = {2005-11-25 10:58:45 -0600},
			
 
				+	date-modified = {2010-12-17 10:23:11 -0700},
			
 
				+	issn = {0890-5401},
			
 
				+	journal = {Inf. Comput.},
			
 
				+	number = {1},
			
 
				+	pages = {55--92},
			
 
				+	publisher = {Academic Press, Inc.},
			
 
				+	title = {Notions of computation and monads},
			
 
				+	volume = {93},
			
 
				+	year = {1991},
			
 
				+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoWGljOTEucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACE=},
			
 
				+	Bdsk-Url-1 = {http://dx.doi.org/10.1016/0890-5401(91)90052-4}}
			
 
				+
			
 
				+@article{Flatt:2019tb,
			
 
				+	abstract = {We rebuilt Racket on Chez Scheme, and it works well---as long as we're allowed
			
 
				+a few patches to Chez Scheme. DrRacket runs, the Racket distribution can build itself,
			
 
				+and nearly all of the core Racket test suite passes. Maintainability and performance
			
 
				+of the resulting implementation are good, although some work remains to improve end-to-end
			
 
				+performance. The least predictable part of our effort was how big the differences
			
 
				+between Racket and Chez Scheme would turn out to be and how we would manage those
			
 
				+differences. We expect Racket on Chez Scheme to become the main Racket implementation,
			
 
				+and we encourage other language implementers to consider Chez Scheme as a target virtual
			
 
				+machine.},
			
 
				+	address = {New York, NY, USA},
			
 
				+	articleno = {78},
			
 
				+	author = {Flatt, Matthew and Derici, Caner and Dybvig, R. Kent and Keep, Andrew W. and Massaccesi, Gustavo E. and Spall, Sarah and Tobin-Hochstadt, Sam and Zeppieri, Jon},
			
 
				+	date-added = {2021-10-21 14:03:11 -0400},
			
 
				+	date-modified = {2021-10-21 14:03:16 -0400},
			
 
				+	doi = {10.1145/3341642},
			
 
				+	issue_date = {August 2019},
			
 
				+	journal = {Proc. ACM Program. Lang.},
			
 
				+	keywords = {Racket, Scheme},
			
 
				+	month = jul,
			
 
				+	number = {ICFP},
			
 
				+	numpages = {15},
			
 
				+	publisher = {Association for Computing Machinery},
			
 
				+	title = {Rebuilding Racket on Chez Scheme (Experience Report)},
			
 
				+	url = {https://doi.org/10.1145/3341642},
			
 
				+	volume = {3},
			
 
				+	year = {2019},
			
 
				+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoWzMzNDE2NDIucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACQ=},
			
 
				+	Bdsk-Url-1 = {https://doi.org/10.1145/3341642}}
			
 
				+
			
 
				+@incollection{Danvy:2003fk,
			
 
				+	author = {Danvy, Olivier},
			
 
				+	booktitle = {Compiler Construction},
			
 
				+	date-added = {2013-01-02 15:56:48 -0700},
			
 
				+	date-modified = {2013-01-02 15:58:19 -0700},
			
 
				+	pages = {77-89},
			
 
				+	series = {LNCS},
			
 
				+	title = {A New One-Pass Transformation into Monadic Normal Form},
			
 
				+	volume = {2622},
			
 
				+	year = {2003},
			
 
				+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoXxA0RGFudnkyMDAzX0NoYXB0ZXJfQU5ld09uZS1QYXNzVHJhbnNmb3JtYXRpb25JbnRvLnBkZggLGAAAAAAAAAEBAAAAAAAAAAMAAAAAAAAAAAAAAAAAAABP},
			
 
				+	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-36579-6_6}}
			
 
				 
			
 
				 @article{PeytonJones:1998,
			
 
				 	author = {Simon L. {Peyton Jones} and Andr{\'e}L.M. Santos},
			
--- a/book.tex
+++ b/book.tex
@@ -3034,14 +3034,31 @@ print(tmp_1)
 
				 \label{fig:Lvar-anf-syntax}
			
 
				 \end{figure}
			
 
				 
			
 
				-Figure~\ref{fig:Lvar-anf-syntax} presents the grammar for the output of
			
 
				-this pass, the language \LangVarANF{}. The only difference is that
			
 
				+Figure~\ref{fig:Lvar-anf-syntax} presents the grammar for the output
			
 
				+of this pass, the language \LangVarANF{}. The only difference is that
			
 
				 operator arguments are restricted to be atomic expressions that are
			
 
				 defined by the \Atm{} non-terminal. In particular, integer constants
			
 
				-and variables are atomic. In the literature, restricting arguments to
			
 
				-be atomic expressions is one of the ideas in \emph{administrative
			
 
				-normal form}, or ANF for short~\citep{Danvy:1991fk,Flanagan:1993cg}.
			
 
				+and variables are atomic. This restriction brings us closer to what is
			
 
				+known as a \emph{three-address code}~\citep{Aho:1986qf} language.
			
 
				+
			
 
				+The atomic expressions are pure (they do not cause side-effects or
			
 
				+depend on them) whereas complex expressions may have side effects,
			
 
				+such as \READ{}.  A language with this separation between pure versus
			
 
				+side-effecting expressions is said to be in monadic normal
			
 
				+form~\citep{Moggi:1991in,Danvy:2003fk} which explains the \textit{mon}
			
 
				+in \LangVarANF{}. An important invariant of the
			
 
				+\code{remove\_complex\_operands} pass is that the relative ordering
			
 
				+among complex expressions is not changed, but the relative ordering
			
 
				+between atomic expressions and complex expressions can change and
			
 
				+often does. The reason that these changes are behaviour preserving is
			
 
				+that the atomic expressions are pure.
			
 
				+
			
 
				+Another well-known form is the \emph{administrative normal form}
			
 
				+(ANF)~\citep{Danvy:1991fk,Flanagan:1993cg}.
			
 
				 \index{subject}{administrative normal form} \index{subject}{ANF}
			
 
				+%
			
 
				+The \LangVarANF{} language is not quite in ANF because we allow the
			
 
				+right-hand side of a \code{let} to be a complex expression.
			
 
				 
			
 
				 {\if\edition\racketEd
			
 
				 We recommend implementing this pass with two mutually recursive
			
@@ -9267,11 +9284,13 @@ blocks on several test programs.
 
				 \label{sec:cond-further-reading}
			
 
				 
			
 
				 The algorithm for the \code{explicate\_control} pass comes from the
			
 
				-course notes of \citet{Dybvig:2010aa}. The use of lazy evaluation in
			
 
				-Section~\ref{sec:opt-jumps} to optimize basic blocks is new.  There
			
 
				-are algorithms similar to \code{explicate\_control} in the literature,
			
 
				-such as the case-of-case transformation of \citet{PeytonJones:1998}.
			
 
				-
			
 
				+course notes of \citet{Dybvig:2010aa} and it has several similarities
			
 
				+to an algorithm of \citet{Danvy:2003fk}. The use of lazy evaluation in
			
 
				+Section~\ref{sec:opt-jumps} to prevent the generation of unused basic
			
 
				+blocks appears to be new. The treatment of conditionals in the
			
 
				+\code{explicate\_control} pass is similar to the case-of-case
			
 
				+transformation of \citet{PeytonJones:1998} and to short-cut boolean
			
 
				+evaluation~\citep{Logothetis:1981,Aho:1986qf,Clarke:1989,Danvy:2003fk}.
			
 
				 
			
 
				 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				 \chapter{Loops and Dataflow Analysis}
			
@@ -9730,12 +9749,13 @@ we are stuck.
 
				 The way out of this impasse is to realize that we can compute an
			
 
				 under-approximation of the live-before set by starting with empty
			
 
				 live-after sets.  By \emph{under-approximation}, we mean that the set
			
 
				-only contains variables that are really live, but it may be missing
			
 
				-some.  Next, the under-approximations for each block can be improved
			
 
				-by 1) updating the live-after set for each block using the approximate
			
 
				-live-before sets from the other blocks and 2) perform liveness
			
 
				-analysis again on each block.  In fact, by iterating this process, the
			
 
				-under-approximations eventually become the correct solutions!
			
 
				+only contains variables that are live for some execution of the
			
 
				+program, but the set may be missing some variables.  Next, the
			
 
				+under-approximations for each block can be improved by 1) updating the
			
 
				+live-after set for each block using the approximate live-before sets
			
 
				+from the other blocks and 2) perform liveness analysis again on each
			
 
				+block.  In fact, by iterating this process, the under-approximations
			
 
				+eventually become the correct solutions!
			
 
				 %
			
 
				 This approach of iteratively analyzing a control-flow graph is
			
 
				 applicable to many static analysis problems and goes by the name
			
@@ -9749,10 +9769,7 @@ following mapping from label names to sets of locations (variables and
 
				 registers).
			
 
				 \begin{center}
			
 
				 \begin{lstlisting}
			
 
				-mainstart: {}
			
 
				-block5: {}
			
 
				-block7: {}
			
 
				-block8: {}
			
 
				+mainstart: {}, block5: {}, block7: {}, block8: {}
			
 
				 \end{lstlisting}
			
 
				 \end{center}
			
 
				 Using the above live-before approximations, we determine the
			
@@ -9761,10 +9778,7 @@ block.  This produces our next approximation $m_1$ of the live-before
 
				 sets.
			
 
				 \begin{center}
			
 
				   \begin{lstlisting}
			
 
				-mainstart: {}
			
 
				-block5: {i}
			
 
				-block7: {i, sum}
			
 
				-block8: {rsp, sum}
			
 
				+mainstart: {}, block5: {i}, block7: {i, sum}, block8: {rsp, sum}
			
 
				 \end{lstlisting}
			
 
				 \end{center}
			
 
				 
			
@@ -9781,10 +9795,7 @@ So the liveness analysis for \code{block7} remains \code{\{i,
 
				 the live-before sets.
			
 
				 \begin{center}
			
 
				   \begin{lstlisting}
			
 
				-mainstart: {}
			
 
				-block5: {i, rsp, sum}
			
 
				-block7: {i, sum}
			
 
				-block8: {rsp, sum}
			
 
				+mainstart: {}, block5: {i, rsp, sum}, block7: {i, sum}, block8: {rsp, sum}
			
 
				 \end{lstlisting}
			
 
				 \end{center}
			
 
				 In the preceding iteration, only \code{block5} changed, so we can
			
@@ -9794,14 +9805,11 @@ for \code{mainstart} and \code{block7} are updated to include
 
				 \code{rsp}, yielding the following approximation $m_3$.
			
 
				 \begin{center}
			
 
				   \begin{lstlisting}
			
 
				-mainstart: {rsp}
			
 
				-block5: {i, rsp, sum}
			
 
				-block7: {i, rsp, sum}
			
 
				-block8: {rsp, sum}
			
 
				+mainstart: {rsp}, block5: {i,rsp,sum}, block7: {i,rsp,sum}, block8: {rsp,sum}
			
 
				 \end{lstlisting}
			
 
				 \end{center}
			
 
				 Because \code{block7} changed, we analyze \code{block5} once more, but
			
 
				-its live-before set remains \code{\{ i, rsp, sum \}}.  At this point
			
 
				+its live-before set remains \code{\{i,rsp,sum\}}.  At this point
			
 
				 our approximations have converged, so $m_3$ is the solution.
			
 
				 
			
 
				 This iteration process is guaranteed to converge to a solution by the
			
@@ -9836,9 +9844,9 @@ two mappings $m_i$ and $m_j$, $m_i \sqsubseteq_M m_j$ when $m_i(\ell)
 
				 bottom element of $M$ is the mapping $\bot_M$ that sends every label
			
 
				 to the empty set, i.e., $\bot_M(\ell) = \emptyset$.
			
 
				 
			
 
				-We can think of one iteration of liveness analysis as being a function
			
 
				-$f$ on the lattice $M$. It takes a mapping as input and computes a new
			
 
				-mapping.
			
 
				+We can think of one iteration of liveness analysis applied to the
			
 
				+whole program as being a function $f$ on the lattice $M$. It takes a
			
 
				+mapping as input and computes a new mapping.
			
 
				 \[
			
 
				    f(m_i) = m_{i+1}
			
 
				 \]
			
@@ -9863,7 +9871,7 @@ follows.\index{subject}{Kleene Fixed-Point Theorem}
 
				   \sqsubseteq f^n(\bot) \sqsubseteq \cdots
			
 
				 \]
			
 
				 When a lattice contains only finitely-long ascending chains, then
			
 
				-every Kleene chain tops out at some fixed point after a number of
			
 
				+every Kleene chain tops out at some fixed point after some number of
			
 
				 iterations of $f$.
			
 
				 \[
			
 
				 \bot \sqsubseteq f(\bot) \sqsubseteq f(f(\bot)) \sqsubseteq \cdots
			
@@ -9894,6 +9902,13 @@ state. If the output differs from the previous state for this block,
 
				 the mapping for this block is updated and its successor nodes are
			
 
				 pushed onto the work list.
			
 
				 
			
 
				+Note that the \code{analyze\_dataflow} function is formulated as a
			
 
				+\emph{forward} dataflow analysis, that is, the inputs to the transfer
			
 
				+function come from the predecessor nodes in the control-flow
			
 
				+graph. However, liveness analysis is a \emph{backward} dataflow
			
 
				+analysis, so in that case one must supply the \code{analyze\_dataflow}
			
 
				+function with the transpose of the control-flow graph.
			
 
				+
			
 
				 \begin{figure}[tb]
			
 
				 {\if\edition\racketEd    
			
 
				 \begin{lstlisting}
			
@@ -10016,19 +10031,24 @@ modification to \code{remove\_complex\_operands} to handle
 
				 \code{uncover-get!}, that we discuss in
			
 
				 Section~\ref{sec:uncover-get-bang}.
			
 
				 
			
 
				-As an aside, this problematic interaction between \code{set!}  and
			
 
				-\code{remove\_complex\_operands} is particular to Racket and not its
			
 
				-predecessor, the Scheme language. The key difference is that Scheme
			
 
				-does not specify an order of evaluation for the arguments of an
			
 
				-operator or function call. Thus, a compiler for Scheme is free to
			
 
				-choose any ordering: both \code{42} and \code{80} would be correct
			
 
				-results for the example program.
			
 
				+As an aside, this problematic interaction between \code{set!} and the
			
 
				+pass \code{remove\_complex\_operands} is particular to Racket and not
			
 
				+its predecessor, the Scheme language. The key difference is that
			
 
				+Scheme does not specify an order of evaluation for the arguments of an
			
 
				+operator or function call~\citep{SPERBER:2009aa}. Thus, a compiler for
			
 
				+Scheme is free to choose any ordering: both \code{42} and \code{80}
			
 
				+would be correct results for the example program. Interestingly,
			
 
				+Racket is implemented on top of the Chez Scheme
			
 
				+compiler~\citep{Dybvig:2006aa} and an approach similar to the one
			
 
				+presented in this section (using extra \code{let} bindings to control
			
 
				+the order of evaluation) is used in the translation from Racket to
			
 
				+Scheme~\citep{Flatt:2019tb}.
			
 
				 
			
 
				 \fi} % racket
			
 
				 
			
 
				 Having discussed the complications that arise from adding support for
			
 
				-assignment and loops, we turn to discussing the significant changes to
			
 
				-existing passes.
			
 
				+assignment and loops, we turn to discussing the individual compilation
			
 
				+passes.
			
 
				 
			
 
				 
			
 
				 {\if\edition\racketEd
			
@@ -10036,8 +10056,9 @@ existing passes.
 
				 \label{sec:uncover-get-bang}
			
 
				 
			
 
				 The goal of this pass it to mark uses of mutable variables so that
			
 
				-\code{remove\_complex\_operands} can treat them as complex
			
 
				-expressions. So the first step is to collect all the mutable
			
 
				+\code{remove\_complex\_operands} can treat them as complex expressions
			
 
				+and thereby preserve their ordering relative to the side-effects in
			
 
				+other operands. So the first step is to collect all the mutable
			
 
				 variables. We recommend creating an auxilliary function for this,
			
 
				 named \code{collect-set!}, that recursively traverses expressions,
			
 
				 returning a set of all variables that occur on the left-hand side of a
			
--- a/defs.tex
+++ b/defs.tex
@@ -11,12 +11,14 @@
 
				 \newcommand{\LangVar}{$\Lang_{\mathsf{Var}}$} % R1
			
 
				 \newcommand{\LangVarM}{\Lang_{\mathsf{Var}}}
			
 
				 
			
 
				-\newcommand{\LangVarANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Var}}}}
			
 
				-\newcommand{\LangVarANFM}{\Lang^{\mathsf{ANF}}_{\mathsf{Var}}}
			
 
				+\newcommand{\RCO}{\mathit{mon}} % output of remove-complex-opera*
			
 
				+
			
 
				+\newcommand{\LangVarANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Var}}}}
			
 
				+\newcommand{\LangVarANFM}{\Lang^{\RCO}_{\mathsf{Var}}}
			
 
				 
			
 
				 \newcommand{\LangIf}{$\Lang_{\mathsf{If}}$} %R2
			
 
				 \newcommand{\LangIfM}{\ensuremath{\Lang_{\mathsf{If}}}} %R2
			
 
				-\newcommand{\LangIfANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{if}}}} %R2
			
 
				+\newcommand{\LangIfANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{if}}}} %R2
			
 
				 
			
 
				 \newcommand{\LangCVar}{$\CLang_{\mathsf{Var}}$} % C0
			
 
				 \newcommand{\LangCVarM}{\CLang_{\mathsf{Var}}} % C0
			
@@ -27,14 +29,14 @@
 
				 \newcommand{\LangStruct}{\ensuremath{\Lang^{\mathsf{Struct}}_{\mathsf{Tup}}}} %\Lang^s3
			
 
				 \newcommand{\LangCVec}{$\CLang_{\mathsf{Tup}}$} %C2
			
 
				 \newcommand{\LangCVecM}{\CLang_{\mathsf{Tup}}} %C2
			
 
				-\newcommand{\LangVecANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Tup}}}} %R3
			
 
				-\newcommand{\LangVecANFM}{\Lang^{\mathsf{ANF}}_{\mathsf{Tup}}} %R3
			
 
				+\newcommand{\LangVecANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Tup}}}} %R3
			
 
				+\newcommand{\LangVecANFM}{\Lang^{\RCO}_{\mathsf{Tup}}} %R3
			
 
				 \newcommand{\LangAlloc}{\ensuremath{\Lang_{\mathsf{Alloc}}}} %R3'
			
 
				 \newcommand{\LangFun}{$\Lang_{\mathsf{Fun}}$} %R4
			
 
				 \newcommand{\LangFunM}{\Lang_{\mathsf{Fun}}} %R4
			
 
				 \newcommand{\LangCFun}{$\CLang_{\mathsf{Fun}}$} %C3
			
 
				 \newcommand{\LangCFunM}{\CLang_{\mathsf{Fun}}} %C3
			
 
				-\newcommand{\LangFunANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Fun}}}} %R4
			
 
				+\newcommand{\LangFunANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Fun}}}} %R4
			
 
				 \newcommand{\LangFunRef}{$\Lang_{\mathsf{FunRef}}$} %F1
			
 
				 \newcommand{\LangFunRefM}{\Lang_{\mathsf{FunRef}}} %F1
			
 
				 \newcommand{\LangFunRefAlloc}{\ensuremath{\Lang^{\mathsf{Alloc}}_{\mathsf{FunRef}}}} %R'4
			
@@ -57,7 +59,7 @@
 
				 \newcommand{\LangLoopAlloc}{\ensuremath{\Lang^{\mathsf{Alloc}}_{\mathsf{While}}}} %R'8
			
 
				 \newcommand{\LangCLoop}{$\CLang_{\circlearrowleft}$} %C7
			
 
				 \newcommand{\LangCLoopM}{\CLang_{\circlearrowleft}} %C7
			
 
				-\newcommand{\LangLoopANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{While}}}} %R8
			
 
				+\newcommand{\LangLoopANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{While}}}} %R8
			
 
				 \newcommand{\LangArray}{\ensuremath{\Lang^{\mathsf{Vecof}}_{\mathsf{While}}}} %\Lang^s3
			
 
				 \newcommand{\LangGrad}{$\Lang_{\mathsf{?}}$} %R9
			
 
				 \newcommand{\LangGradM}{\Lang_{\mathsf{?}}} %R9