Jeremy Siek 3 年之前
父節點
當前提交
bf3f3f5502
共有 3 個文件被更改,包括 168 次插入55 次删除
  1. 90 0
      book.bib
  2. 69 48
      book.tex
  3. 9 7
      defs.tex

+ 90 - 0
book.bib

@@ -1,3 +1,93 @@
+@article{Logothetis:1981,
+author = {Logothetis, George and Mishra, Prateek},
+title = {Compiling short-circuit boolean expressions in one pass},
+journal = {Software: Practice and Experience},
+volume = {11},
+number = {11},
+pages = {1197-1214},
+keywords = {Short-circuit evaluation, One-pass compilation, Boolean expressions, Code generation},
+doi = {https://doi.org/10.1002/spe.4380111104},
+url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/spe.4380111104},
+eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/spe.4380111104},
+abstract = {Abstract We present a very simple scheme for compiling boolean expressions in the short-circuit manner in one pass. The generated code is of very high quality and avoids most inefficiencies commonly associated with one-pass code generation. In particular, redundant conditional and unconditional branches are kept to a minimum. The scheme is general enough to compile the boolean expressions of a typical high-level language such as Pascal. It is presented in a format suited for syntax-directed translation and can be used with both top-down and bottom-up parsing.},
+year = {1981}
+}
+
+@article{Clarke:1989,
+author = {Clarke, Keith},
+title = {One-Pass Code Generation Using Continuations},
+year = {1989},
+issue_date = {Dec. 1989},
+publisher = {John Wiley & Sons, Inc.},
+address = {USA},
+volume = {19},
+number = {12},
+issn = {0038-0644},
+journal = {Softw. Pract. Exper.},
+month = nov,
+pages = {1175–1192},
+numpages = {18}
+}
+
+@article{Moggi:1991in,
+	address = {Duluth, MN, USA},
+	annote = {Journal version of the 1989 Computational Lambda-Calculus and Monads},
+	author = {Eugenio Moggi},
+	date-added = {2005-11-25 10:58:45 -0600},
+	date-modified = {2010-12-17 10:23:11 -0700},
+	issn = {0890-5401},
+	journal = {Inf. Comput.},
+	number = {1},
+	pages = {55--92},
+	publisher = {Academic Press, Inc.},
+	title = {Notions of computation and monads},
+	volume = {93},
+	year = {1991},
+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoWGljOTEucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACE=},
+	Bdsk-Url-1 = {http://dx.doi.org/10.1016/0890-5401(91)90052-4}}
+
+@article{Flatt:2019tb,
+	abstract = {We rebuilt Racket on Chez Scheme, and it works well---as long as we're allowed
+a few patches to Chez Scheme. DrRacket runs, the Racket distribution can build itself,
+and nearly all of the core Racket test suite passes. Maintainability and performance
+of the resulting implementation are good, although some work remains to improve end-to-end
+performance. The least predictable part of our effort was how big the differences
+between Racket and Chez Scheme would turn out to be and how we would manage those
+differences. We expect Racket on Chez Scheme to become the main Racket implementation,
+and we encourage other language implementers to consider Chez Scheme as a target virtual
+machine.},
+	address = {New York, NY, USA},
+	articleno = {78},
+	author = {Flatt, Matthew and Derici, Caner and Dybvig, R. Kent and Keep, Andrew W. and Massaccesi, Gustavo E. and Spall, Sarah and Tobin-Hochstadt, Sam and Zeppieri, Jon},
+	date-added = {2021-10-21 14:03:11 -0400},
+	date-modified = {2021-10-21 14:03:16 -0400},
+	doi = {10.1145/3341642},
+	issue_date = {August 2019},
+	journal = {Proc. ACM Program. Lang.},
+	keywords = {Racket, Scheme},
+	month = jul,
+	number = {ICFP},
+	numpages = {15},
+	publisher = {Association for Computing Machinery},
+	title = {Rebuilding Racket on Chez Scheme (Experience Report)},
+	url = {https://doi.org/10.1145/3341642},
+	volume = {3},
+	year = {2019},
+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoWzMzNDE2NDIucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACQ=},
+	Bdsk-Url-1 = {https://doi.org/10.1145/3341642}}
+
+@incollection{Danvy:2003fk,
+	author = {Danvy, Olivier},
+	booktitle = {Compiler Construction},
+	date-added = {2013-01-02 15:56:48 -0700},
+	date-modified = {2013-01-02 15:58:19 -0700},
+	pages = {77-89},
+	series = {LNCS},
+	title = {A New One-Pass Transformation into Monadic Normal Form},
+	volume = {2622},
+	year = {2003},
+	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoXxA0RGFudnkyMDAzX0NoYXB0ZXJfQU5ld09uZS1QYXNzVHJhbnNmb3JtYXRpb25JbnRvLnBkZggLGAAAAAAAAAEBAAAAAAAAAAMAAAAAAAAAAAAAAAAAAABP},
+	Bdsk-Url-1 = {http://dx.doi.org/10.1007/3-540-36579-6_6}}
 
 @article{PeytonJones:1998,
 	author = {Simon L. {Peyton Jones} and Andr{\'e}L.M. Santos},

+ 69 - 48
book.tex

@@ -3034,14 +3034,31 @@ print(tmp_1)
 \label{fig:Lvar-anf-syntax}
 \end{figure}
 
-Figure~\ref{fig:Lvar-anf-syntax} presents the grammar for the output of
-this pass, the language \LangVarANF{}. The only difference is that
+Figure~\ref{fig:Lvar-anf-syntax} presents the grammar for the output
+of this pass, the language \LangVarANF{}. The only difference is that
 operator arguments are restricted to be atomic expressions that are
 defined by the \Atm{} non-terminal. In particular, integer constants
-and variables are atomic. In the literature, restricting arguments to
-be atomic expressions is one of the ideas in \emph{administrative
-normal form}, or ANF for short~\citep{Danvy:1991fk,Flanagan:1993cg}.
+and variables are atomic. This restriction brings us closer to what is
+known as a \emph{three-address code}~\citep{Aho:1986qf} language.
+
+The atomic expressions are pure (they do not cause side-effects or
+depend on them) whereas complex expressions may have side effects,
+such as \READ{}.  A language with this separation between pure versus
+side-effecting expressions is said to be in monadic normal
+form~\citep{Moggi:1991in,Danvy:2003fk} which explains the \textit{mon}
+in \LangVarANF{}. An important invariant of the
+\code{remove\_complex\_operands} pass is that the relative ordering
+among complex expressions is not changed, but the relative ordering
+between atomic expressions and complex expressions can change and
+often does. The reason that these changes are behaviour preserving is
+that the atomic expressions are pure.
+
+Another well-known form is the \emph{administrative normal form}
+(ANF)~\citep{Danvy:1991fk,Flanagan:1993cg}.
 \index{subject}{administrative normal form} \index{subject}{ANF}
+%
+The \LangVarANF{} language is not quite in ANF because we allow the
+right-hand side of a \code{let} to be a complex expression.
 
 {\if\edition\racketEd
 We recommend implementing this pass with two mutually recursive
@@ -9267,11 +9284,13 @@ blocks on several test programs.
 \label{sec:cond-further-reading}
 
 The algorithm for the \code{explicate\_control} pass comes from the
-course notes of \citet{Dybvig:2010aa}. The use of lazy evaluation in
-Section~\ref{sec:opt-jumps} to optimize basic blocks is new.  There
-are algorithms similar to \code{explicate\_control} in the literature,
-such as the case-of-case transformation of \citet{PeytonJones:1998}.
-
+course notes of \citet{Dybvig:2010aa} and it has several similarities
+to an algorithm of \citet{Danvy:2003fk}. The use of lazy evaluation in
+Section~\ref{sec:opt-jumps} to prevent the generation of unused basic
+blocks appears to be new. The treatment of conditionals in the
+\code{explicate\_control} pass is similar to the case-of-case
+transformation of \citet{PeytonJones:1998} and to short-cut boolean
+evaluation~\citep{Logothetis:1981,Aho:1986qf,Clarke:1989,Danvy:2003fk}.
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \chapter{Loops and Dataflow Analysis}
@@ -9730,12 +9749,13 @@ we are stuck.
 The way out of this impasse is to realize that we can compute an
 under-approximation of the live-before set by starting with empty
 live-after sets.  By \emph{under-approximation}, we mean that the set
-only contains variables that are really live, but it may be missing
-some.  Next, the under-approximations for each block can be improved
-by 1) updating the live-after set for each block using the approximate
-live-before sets from the other blocks and 2) perform liveness
-analysis again on each block.  In fact, by iterating this process, the
-under-approximations eventually become the correct solutions!
+only contains variables that are live for some execution of the
+program, but the set may be missing some variables.  Next, the
+under-approximations for each block can be improved by 1) updating the
+live-after set for each block using the approximate live-before sets
+from the other blocks and 2) perform liveness analysis again on each
+block.  In fact, by iterating this process, the under-approximations
+eventually become the correct solutions!
 %
 This approach of iteratively analyzing a control-flow graph is
 applicable to many static analysis problems and goes by the name
@@ -9749,10 +9769,7 @@ following mapping from label names to sets of locations (variables and
 registers).
 \begin{center}
 \begin{lstlisting}
-mainstart: {}
-block5: {}
-block7: {}
-block8: {}
+mainstart: {}, block5: {}, block7: {}, block8: {}
 \end{lstlisting}
 \end{center}
 Using the above live-before approximations, we determine the
@@ -9761,10 +9778,7 @@ block.  This produces our next approximation $m_1$ of the live-before
 sets.
 \begin{center}
   \begin{lstlisting}
-mainstart: {}
-block5: {i}
-block7: {i, sum}
-block8: {rsp, sum}
+mainstart: {}, block5: {i}, block7: {i, sum}, block8: {rsp, sum}
 \end{lstlisting}
 \end{center}
 
@@ -9781,10 +9795,7 @@ So the liveness analysis for \code{block7} remains \code{\{i,
 the live-before sets.
 \begin{center}
   \begin{lstlisting}
-mainstart: {}
-block5: {i, rsp, sum}
-block7: {i, sum}
-block8: {rsp, sum}
+mainstart: {}, block5: {i, rsp, sum}, block7: {i, sum}, block8: {rsp, sum}
 \end{lstlisting}
 \end{center}
 In the preceding iteration, only \code{block5} changed, so we can
@@ -9794,14 +9805,11 @@ for \code{mainstart} and \code{block7} are updated to include
 \code{rsp}, yielding the following approximation $m_3$.
 \begin{center}
   \begin{lstlisting}
-mainstart: {rsp}
-block5: {i, rsp, sum}
-block7: {i, rsp, sum}
-block8: {rsp, sum}
+mainstart: {rsp}, block5: {i,rsp,sum}, block7: {i,rsp,sum}, block8: {rsp,sum}
 \end{lstlisting}
 \end{center}
 Because \code{block7} changed, we analyze \code{block5} once more, but
-its live-before set remains \code{\{ i, rsp, sum \}}.  At this point
+its live-before set remains \code{\{i,rsp,sum\}}.  At this point
 our approximations have converged, so $m_3$ is the solution.
 
 This iteration process is guaranteed to converge to a solution by the
@@ -9836,9 +9844,9 @@ two mappings $m_i$ and $m_j$, $m_i \sqsubseteq_M m_j$ when $m_i(\ell)
 bottom element of $M$ is the mapping $\bot_M$ that sends every label
 to the empty set, i.e., $\bot_M(\ell) = \emptyset$.
 
-We can think of one iteration of liveness analysis as being a function
-$f$ on the lattice $M$. It takes a mapping as input and computes a new
-mapping.
+We can think of one iteration of liveness analysis applied to the
+whole program as being a function $f$ on the lattice $M$. It takes a
+mapping as input and computes a new mapping.
 \[
    f(m_i) = m_{i+1}
 \]
@@ -9863,7 +9871,7 @@ follows.\index{subject}{Kleene Fixed-Point Theorem}
   \sqsubseteq f^n(\bot) \sqsubseteq \cdots
 \]
 When a lattice contains only finitely-long ascending chains, then
-every Kleene chain tops out at some fixed point after a number of
+every Kleene chain tops out at some fixed point after some number of
 iterations of $f$.
 \[
 \bot \sqsubseteq f(\bot) \sqsubseteq f(f(\bot)) \sqsubseteq \cdots
@@ -9894,6 +9902,13 @@ state. If the output differs from the previous state for this block,
 the mapping for this block is updated and its successor nodes are
 pushed onto the work list.
 
+Note that the \code{analyze\_dataflow} function is formulated as a
+\emph{forward} dataflow analysis, that is, the inputs to the transfer
+function come from the predecessor nodes in the control-flow
+graph. However, liveness analysis is a \emph{backward} dataflow
+analysis, so in that case one must supply the \code{analyze\_dataflow}
+function with the transpose of the control-flow graph.
+
 \begin{figure}[tb]
 {\if\edition\racketEd    
 \begin{lstlisting}
@@ -10016,19 +10031,24 @@ modification to \code{remove\_complex\_operands} to handle
 \code{uncover-get!}, that we discuss in
 Section~\ref{sec:uncover-get-bang}.
 
-As an aside, this problematic interaction between \code{set!}  and
-\code{remove\_complex\_operands} is particular to Racket and not its
-predecessor, the Scheme language. The key difference is that Scheme
-does not specify an order of evaluation for the arguments of an
-operator or function call. Thus, a compiler for Scheme is free to
-choose any ordering: both \code{42} and \code{80} would be correct
-results for the example program.
+As an aside, this problematic interaction between \code{set!} and the
+pass \code{remove\_complex\_operands} is particular to Racket and not
+its predecessor, the Scheme language. The key difference is that
+Scheme does not specify an order of evaluation for the arguments of an
+operator or function call~\citep{SPERBER:2009aa}. Thus, a compiler for
+Scheme is free to choose any ordering: both \code{42} and \code{80}
+would be correct results for the example program. Interestingly,
+Racket is implemented on top of the Chez Scheme
+compiler~\citep{Dybvig:2006aa} and an approach similar to the one
+presented in this section (using extra \code{let} bindings to control
+the order of evaluation) is used in the translation from Racket to
+Scheme~\citep{Flatt:2019tb}.
 
 \fi} % racket
 
 Having discussed the complications that arise from adding support for
-assignment and loops, we turn to discussing the significant changes to
-existing passes.
+assignment and loops, we turn to discussing the individual compilation
+passes.
 
 
 {\if\edition\racketEd
@@ -10036,8 +10056,9 @@ existing passes.
 \label{sec:uncover-get-bang}
 
 The goal of this pass it to mark uses of mutable variables so that
-\code{remove\_complex\_operands} can treat them as complex
-expressions. So the first step is to collect all the mutable
+\code{remove\_complex\_operands} can treat them as complex expressions
+and thereby preserve their ordering relative to the side-effects in
+other operands. So the first step is to collect all the mutable
 variables. We recommend creating an auxilliary function for this,
 named \code{collect-set!}, that recursively traverses expressions,
 returning a set of all variables that occur on the left-hand side of a

+ 9 - 7
defs.tex

@@ -11,12 +11,14 @@
 \newcommand{\LangVar}{$\Lang_{\mathsf{Var}}$} % R1
 \newcommand{\LangVarM}{\Lang_{\mathsf{Var}}}
 
-\newcommand{\LangVarANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Var}}}}
-\newcommand{\LangVarANFM}{\Lang^{\mathsf{ANF}}_{\mathsf{Var}}}
+\newcommand{\RCO}{\mathit{mon}} % output of remove-complex-opera*
+
+\newcommand{\LangVarANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Var}}}}
+\newcommand{\LangVarANFM}{\Lang^{\RCO}_{\mathsf{Var}}}
 
 \newcommand{\LangIf}{$\Lang_{\mathsf{If}}$} %R2
 \newcommand{\LangIfM}{\ensuremath{\Lang_{\mathsf{If}}}} %R2
-\newcommand{\LangIfANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{if}}}} %R2
+\newcommand{\LangIfANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{if}}}} %R2
 
 \newcommand{\LangCVar}{$\CLang_{\mathsf{Var}}$} % C0
 \newcommand{\LangCVarM}{\CLang_{\mathsf{Var}}} % C0
@@ -27,14 +29,14 @@
 \newcommand{\LangStruct}{\ensuremath{\Lang^{\mathsf{Struct}}_{\mathsf{Tup}}}} %\Lang^s3
 \newcommand{\LangCVec}{$\CLang_{\mathsf{Tup}}$} %C2
 \newcommand{\LangCVecM}{\CLang_{\mathsf{Tup}}} %C2
-\newcommand{\LangVecANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Tup}}}} %R3
-\newcommand{\LangVecANFM}{\Lang^{\mathsf{ANF}}_{\mathsf{Tup}}} %R3
+\newcommand{\LangVecANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Tup}}}} %R3
+\newcommand{\LangVecANFM}{\Lang^{\RCO}_{\mathsf{Tup}}} %R3
 \newcommand{\LangAlloc}{\ensuremath{\Lang_{\mathsf{Alloc}}}} %R3'
 \newcommand{\LangFun}{$\Lang_{\mathsf{Fun}}$} %R4
 \newcommand{\LangFunM}{\Lang_{\mathsf{Fun}}} %R4
 \newcommand{\LangCFun}{$\CLang_{\mathsf{Fun}}$} %C3
 \newcommand{\LangCFunM}{\CLang_{\mathsf{Fun}}} %C3
-\newcommand{\LangFunANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{Fun}}}} %R4
+\newcommand{\LangFunANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{Fun}}}} %R4
 \newcommand{\LangFunRef}{$\Lang_{\mathsf{FunRef}}$} %F1
 \newcommand{\LangFunRefM}{\Lang_{\mathsf{FunRef}}} %F1
 \newcommand{\LangFunRefAlloc}{\ensuremath{\Lang^{\mathsf{Alloc}}_{\mathsf{FunRef}}}} %R'4
@@ -57,7 +59,7 @@
 \newcommand{\LangLoopAlloc}{\ensuremath{\Lang^{\mathsf{Alloc}}_{\mathsf{While}}}} %R'8
 \newcommand{\LangCLoop}{$\CLang_{\circlearrowleft}$} %C7
 \newcommand{\LangCLoopM}{\CLang_{\circlearrowleft}} %C7
-\newcommand{\LangLoopANF}{\ensuremath{\Lang^{\mathsf{ANF}}_{\mathsf{While}}}} %R8
+\newcommand{\LangLoopANF}{\ensuremath{\Lang^{\RCO}_{\mathsf{While}}}} %R8
 \newcommand{\LangArray}{\ensuremath{\Lang^{\mathsf{Vecof}}_{\mathsf{While}}}} %\Lang^s3
 \newcommand{\LangGrad}{$\Lang_{\mathsf{?}}$} %R9
 \newcommand{\LangGradM}{\Lang_{\mathsf{?}}} %R9