3 年之前 · dbd950a748
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
 
															 LATEXMK= latexmk -pdf
														
 
															 all:
														
 
															-	$(LATEXMK) book
														
 
															+	$(LATEXMK) -f book
														
 
															 cont: continuous
														
 
															 continuous:
														
--- a/book.bib
+++ b/book.bib
@@ -1,36 +1,22 @@
 
															 @book{Tomita:1985qr,
														
 
															-	address = {Norwell, MA, USA},
														
 
															 	author = {Masaru Tomita},
														
 
															-	date-added = {2008-12-02 14:16:33 -0700},
														
 
															-	date-modified = {2008-12-02 14:16:39 -0700},
														
 
															-	isbn = {0898382025},
														
 
															 	publisher = {Kluwer Academic Publishers},
														
 
															 	title = {Efficient Parsing for Natural Language: A Fast Algorithm for Practical Systems},
														
 
															 	year = {1985}}
														
 
															 @article{Earley:1970ly,
														
 
															-	acmid = {362035},
														
 
															-	address = {New York, NY, USA},
														
 
															 	author = {Earley, Jay},
														
 
															-	date-added = {2011-05-28 11:31:46 -0600},
														
 
															-	date-modified = {2011-05-28 11:31:48 -0600},
														
 
															-	doi = {http://doi.acm.org/10.1145/362007.362035},
														
 
															-	issn = {0001-0782},
														
 
															 	issue = {2},
														
 
															 	journal = {Commun. ACM},
														
 
															-	keywords = {compilers, computational complexity, context-free grammar, parsing, syntax analysis},
														
 
															 	month = {February},
														
 
															 	numpages = {9},
														
 
															 	pages = {94--102},
														
 
															 	publisher = {ACM},
														
 
															 	title = {An efficient context-free parsing algorithm},
														
 
															-	url = {http://doi.acm.org/10.1145/362007.362035},
														
 
															 	volume = {13},
														
 
															-	year = {1970},
														
 
															-	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoXnA5NC1lYXJsZXkucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACc=},
														
 
															-	Bdsk-Url-1 = {http://doi.acm.org/10.1145/362007.362035}}
														
 
															+	year = {1970}}
														
 
															-@Book{Hopcroft06:_automata,
														
 
															+@book{Hopcroft06:_automata,
														
 
															   author = 	 {John Hopcroft and Rajeev Motwani and Jeffrey Ullman},
														
 
															   title = 	 {Introduction to Automata Theory, Languages, and Computation},
														
 
															   publisher = 	 {Pearson},
														
@@ -38,15 +24,12 @@
 
															 @techreport{Lesk:1975uq,
														
 
															 	author = {M. E. Lesk and E. Schmidt},
														
 
															-	date-added = {2007-08-27 13:37:27 -0600},
														
 
															-	date-modified = {2009-08-25 22:28:17 -0600},
														
 
															 	institution = {Bell Laboratories},
														
 
															 	month = {July},
														
 
															 	title = {Lex - A Lexical Analyzer Generator},
														
 
															-	year = {1975},
														
 
															-	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoV2xleC5wZGYICxgAAAAAAAABAQAAAAAAAAADAAAAAAAAAAAAAAAAAAAAIA==}}
														
 
															+	year = {1975}}
														
 
															-@Misc{shinan20:_lark_docs,
														
 
															+@misc{shinan20:_lark_docs,
														
 
															   author = 	 {Erez Shinan},
														
 
															   title = 	 {Lark Documentation},
														
 
															   url = {https://lark-parser.readthedocs.io/en/latest/index.html},
														
--- a/book.tex
+++ b/book.tex
@@ -196,6 +196,7 @@ ISBN:
 
															 %\listoftables
														
 
															+
														
 
															 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
														
 
															 \chapter*{Preface}
														
 
															 \addcontentsline{toc}{fmbm}{Preface}
														
@@ -247,7 +248,7 @@ concepts and algorithms used in compilers.
 
															   the fundamental tools of compiler construction: \emph{abstract
														
 
															     syntax trees} and \emph{recursive functions}. 
														
 
															 {\if\edition\pythonEd
														
 
															-\item In Chapter~\ref{ch:parsing-Lvar} we learn how to use the Lark
														
 
															+\item In Chapter~\ref{ch:parsing} we learn how to use the Lark
														
 
															   parser generator to create a parser for the language of integer
														
 
															   arithmetic and local variables. We learn about the parsing
														
 
															   algorithms inside Lark, including Earley and LALR(1).
														
@@ -307,14 +308,13 @@ programming, data structures and algorithms, and discrete
 
															 mathematics.
														
 
															 %
														
 
															 At the beginning of the course, students form groups of two to four
														
 
															-people.  The groups complete one chapter every two weeks, starting
														
 
															-with chapter~\ref{ch:Lvar} and finishing with
														
 
															-chapter~\ref{ch:Llambda}. Many chapters include a challenge problem
														
 
															-that we assign to the graduate students. The last two weeks of the
														
 
															+people.  The groups complete approximately one chapter every two
														
 
															+weeks, starting with chapter~\ref{ch:Lvar}. The last two weeks of the
														
 
															 course involve a final project in which students design and implement
														
 
															 a compiler extension of their choosing.  The last few chapters can be
														
 
															-used in support of these projects.  For compiler courses at
														
 
															-universities on the quarter system (about ten weeks in length), we
														
 
															+used in support of these projects.  Many chapters include a challenge
														
 
															+problem that we assign to the graduate students. For compiler courses
														
 
															+at universities on the quarter system (about ten weeks in length), we
														
 
															 recommend completing the course through chapter~\ref{ch:Lvec} or
														
 
															 chapter~\ref{ch:Lfun} and providing some scaffolding code to the
														
 
															 students for each compiler pass.
														
@@ -337,7 +337,6 @@ State University, Portland State University, Rose–Hulman Institute of
 
															 Technology, University of Freiburg, University of Massachusetts
														
 
															 Lowell, and the University of Vermont.
														
 
															-
														
 
															 \begin{figure}[tp]
														
 
															 \begin{tcolorbox}[colback=white]
														
 
															   {\if\edition\racketEd
														
@@ -370,32 +369,35 @@ Lowell, and the University of Vermont.
 
															 \fi}
														
 
															 {\if\edition\pythonEd
														
 
															 \begin{tikzpicture}[baseline=(current  bounding  box.center)]
														
 
															-  \node (C1) at (0,1.5) {\small Ch.~\ref{ch:trees-recur} Preliminaries};
														
 
															-  \node (C2) at (4,1.5) {\small Ch.~\ref{ch:Lvar} Variables};
														
 
															-  \node (C3) at (8,1.5) {\small Ch.~\ref{ch:register-allocation-Lvar} Registers};
														
 
															-  \node (C4) at (0,0) {\small Ch.~\ref{ch:Lif} Conditionals};
														
 
															-  \node (C5) at (4,0) {\small Ch.~\ref{ch:Lvec} Tuples};
														
 
															-  \node (C6) at (8,0) {\small Ch.~\ref{ch:Lfun} Functions};
														
 
															-  \node (C9) at (0,-1.5) {\small Ch.~\ref{ch:Lwhile} Loops};
														
 
															-  \node (C8) at (4,-1.5) {\small Ch.~\ref{ch:Ldyn} Dynamic};
														
 
															+  \node (Prelim) at (0,1.5) {\small Ch.~\ref{ch:trees-recur} Preliminaries};
														
 
															+  \node (Var) at (4,1.5) {\small Ch.~\ref{ch:Lvar} Variables};
														
 
															+  \node (Parse) at (8,1.5) {\small Ch.~\ref{ch:parsing} Parsing};
														
 
															+  \node (Reg) at (0,0) {\small Ch.~\ref{ch:register-allocation-Lvar} Registers};
														
 
															+  \node (Cond) at (4,0) {\small Ch.~\ref{ch:Lif} Conditionals};
														
 
															+  \node (Loop) at (8,0) {\small Ch.~\ref{ch:Lwhile} Loops};
														
 
															+  \node (Fun) at (0,-1.5) {\small Ch.~\ref{ch:Lfun} Functions};
														
 
															+  \node (Tuple) at (4,-1.5) {\small Ch.~\ref{ch:Lvec} Tuples};
														
 
															+  \node (Dyn) at (8,-1.5) {\small Ch.~\ref{ch:Ldyn} Dynamic};
														
 
															 %  \node (CO) at (0,-3) {\small Ch.~\ref{ch:Lobject} Objects};
														
 
															-  \node (C7) at (8,-1.5) {\small Ch.~\ref{ch:Llambda} Lambda};
														
 
															-  \node (C10) at (4,-3) {\small Ch.~\ref{ch:Lgrad} Gradual Typing};
														
 
															-  \node (C11) at (8,-3) {\small Ch.~\ref{ch:Lpoly} Generics};
														
 
															-
														
 
															-  \path[->] (C1) edge [above] node {} (C2);
														
 
															-  \path[->] (C2) edge [above] node {} (C3);
														
 
															-  \path[->] (C3) edge [above] node {} (C4);
														
 
															-  \path[->] (C4) edge [above] node {} (C5);
														
 
															-  \path[->,style=dotted] (C5) edge [above] node {} (C6);
														
 
															-  \path[->] (C5) edge [above] node {} (C7);
														
 
															-  \path[->] (C6) edge [above] node {} (C7);
														
 
															-  \path[->] (C4) edge [above] node {} (C8);
														
 
															-  \path[->] (C4) edge [above] node {} (C9);
														
 
															-  \path[->] (C7) edge [above] node {} (C10);
														
 
															-  \path[->] (C8) edge [above] node {} (C10);
														
 
															-%  \path[->] (C8) edge [above] node {} (CO);
														
 
															-  \path[->] (C10) edge [above] node {} (C11);
														
 
															+  \node (Lam) at (0,-3) {\small Ch.~\ref{ch:Llambda} Lambda};
														
 
															+  \node (Gradual) at (4,-3) {\small Ch.~\ref{ch:Lgrad} Gradual Typing};
														
 
															+  \node (Generic) at (8,-3) {\small Ch.~\ref{ch:Lpoly} Generics};
														
 
															+
														
 
															+  \path[->] (Prelim) edge [above] node {} (Var);
														
 
															+  \path[->] (Var) edge [above] node {} (Reg);
														
 
															+  \path[->] (Var) edge [above] node {} (Parse);
														
 
															+  \path[->] (Reg) edge [above] node {} (Cond);
														
 
															+  \path[->] (Cond) edge [above] node {} (Tuple);
														
 
															+  \path[->,style=dotted] (Tuple) edge [above] node {} (Fun);
														
 
															+  \path[->] (Cond) edge [above] node {} (Fun);
														
 
															+  \path[->] (Tuple) edge [above] node {} (Lam);
														
 
															+  \path[->] (Fun) edge [above] node {} (Lam);
														
 
															+  \path[->] (Cond) edge [above] node {} (Dyn);
														
 
															+  \path[->] (Cond) edge [above] node {} (Loop);
														
 
															+  \path[->] (Lam) edge [above] node {} (Gradual);
														
 
															+  \path[->] (Dyn) edge [above] node {} (Gradual);
														
 
															+%  \path[->] (Dyn) edge [above] node {} (CO);
														
 
															+  \path[->] (Gradual) edge [above] node {} (Generic);
														
 
															 \end{tikzpicture}
														
 
															 \fi}
														
 
															 \end{tcolorbox}
														
@@ -506,9 +508,11 @@ perform.\index{subject}{concrete syntax}\index{subject}{abstract
 
															   syntax}\index{subject}{abstract syntax
														
 
															   tree}\index{subject}{AST}\index{subject}{program}\index{subject}{parse}
														
 
															 The process of translating from concrete syntax to abstract syntax is
														
 
															-called \emph{parsing}~\citep{Aho:2006wb}\python{ and is studied in
														
 
															-  chapter~\ref{ch:parsing-Lvar}}.
														
 
															-\racket{This book does not cover the theory and implementation of parsing.}%
														
 
															+called \emph{parsing}\python{ and is studied in
														
 
															+  chapter~\ref{ch:parsing}}.
														
 
															+\racket{This book does not cover the theory and implementation of parsing.
														
 
															+  We refer the readers interested in parsing to the thorough treatment
														
 
															+  of parsing by \citet{Aho:2006wb}.}%
														
 
															 %
														
 
															 \racket{A parser is provided in the support code for translating from
														
 
															   concrete to abstract syntax.}%
														
@@ -4090,23 +4094,23 @@ all, fast code is useless if it produces incorrect results!
 
															 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
														
 
															 {\if\edition\pythonEd
														
 
															 \chapter{Parsing}
														
 
															-\label{ch:parsing-Lvar}
														
 
															+\label{ch:parsing}
														
 
															 \setcounter{footnote}{0}
														
 
															 \index{subject}{parsing}
														
 
															 In this chapter we learn how to use the Lark parser
														
 
															-generator~\citep{shinan20:_lark_docs} to translate the concrete syntax
														
 
															+framework~\citep{shinan20:_lark_docs} to translate the concrete syntax
														
 
															 of \LangInt{} (a sequence of characters) into an abstract syntax tree.
														
 
															 You will then be asked to use Lark to create a parser for \LangVar{}.
														
 
															-We then learn about the parsing algorithms used inside Lark, studying
														
 
															-the \citet{Earley:1970ly} and LALR algorithms.
														
 
															+We also describe the parsing algorithms used inside Lark, studying the
														
 
															+\citet{Earley:1970ly} and LALR(1) algorithms.
														
 
															-A parser generator takes in a specification of the concrete syntax and
														
 
															-produces a parser. Even though a parser generator does most of the
														
 
															-work for us, using one properly requires some knowledge.  In
														
 
															-particular, we must learn about the specification languages used by
														
 
															-parser generators and we must learn how to deal with ambiguity in our
														
 
															-language specifications.
														
 
															+A parser framework such as Lark takes in a specification of the
														
 
															+concrete syntax and the input program and produces a parse tree. Even
														
 
															+though a parser framework does most of the work for us, using one
														
 
															+properly requires some knowledge.  In particular, we must learn about
														
 
															+its specification languages and we must learn how to deal with
														
 
															+ambiguity in our language specifications.
														
 
															 The process of parsing is traditionally subdivided into two phases:
														
 
															 \emph{lexical analysis} (also called scanning) and \emph{syntax
														
@@ -4119,16 +4123,16 @@ language. The reason for the subdivision into two phases is to enable
 
															 the use of a faster but less powerful algorithm for lexical analysis
														
 
															 and the use of a slower but more powerful algorithm for parsing.
														
 
															 %
														
 
															-Likewise, parser generators typical come in pairs, with separate
														
 
															-generators for the lexical analyzer (or lexer for short) and for the
														
 
															-parser.  A paricularly influential pair of generators were
														
 
															-\texttt{lex} and \texttt{yacc}. The \texttt{lex} generator was written
														
 
															-by \citet{Lesk:1975uq} at Bell Labs. The \texttt{yacc} generator was
														
 
															-written by \citet{Johnson:1979qy} at AT\&T and stands for Yet Another
														
 
															-Compiler Compiler.
														
 
															-
														
 
															-The Lark parse generator that we use in this chapter includes both a
														
 
															-lexical analyzer and a parser. The next section discusses lexical
														
 
															+%% Likewise, parser generators typical come in pairs, with separate
														
 
															+%% generators for the lexical analyzer (or lexer for short) and for the
														
 
															+%% parser.  A paricularly influential pair of generators were
														
 
															+%% \texttt{lex} and \texttt{yacc}. The \texttt{lex} generator was written
														
 
															+%% by \citet{Lesk:1975uq} at Bell Labs. The \texttt{yacc} generator was
														
 
															+%% written by \citet{Johnson:1979qy} at AT\&T and stands for Yet Another
														
 
															+%% Compiler Compiler.
														
 
															+%
														
 
															+The Lark parse framwork that we use in this chapter includes both
														
 
															+lexical analyzers and parsers. The next section discusses lexical
														
 
															 analysis and the remainder of the chapter discusses parsing.
														
@@ -4522,10 +4526,13 @@ section~\ref{sec:lalr} we learn about the LALR algorithm, which is
 
															 more efficient but can only handle a subset of the context-free
														
 
															 grammars.
														
 
															-The Earley algorithm uses a data structure called a
														
 
															-\emph{chart}\index{subject}{chart} to keep track of its progress.  The
														
 
															-chart is an array with one slot for each position in the input string,
														
 
															-where position $0$ is before the first character and position $n$ is
														
 
															+The Earley algorithm can be viewed as an interpreter; it treats the
														
 
															+grammar as the program being interpreted and it treats the concrete
														
 
															+syntax of the program-to-be-parsed as its input.  The Earley algorithm
														
 
															+uses a data structure called a \emph{chart}\index{subject}{chart} to
														
 
															+keep track of its progress and to memoize its results. The chart is an
														
 
															+array with one slot for each position in the input string, where
														
 
															+position $0$ is before the first character and position $n$ is
														
 
															 immediately after the last character. So the array has length $n+1$
														
 
															 for an input string of length $n$. Each slot in the chart contains a
														
 
															 set of \emph{dotted rules}. A dotted rule is simply a grammar rule
														
@@ -4553,8 +4560,8 @@ grammar in figure~\ref{fig:Lint-lark-grammar}, we place
 
															 \begin{lstlisting}
														
 
															   lang_int: . stmt_list         (0)
														
 
															 \end{lstlisting}
														
 
															-in slot $0$ of the chart. The algorithm then proceeds to its
														
 
															-\emph{prediction} phase in which it adds more dotted rules to the
														
 
															+in slot $0$ of the chart. The algorithm then proceeds to with
														
 
															+\emph{prediction} actions in which it adds more dotted rules to the
														
 
															 chart based on which nonterminal come after a period. In the above,
														
 
															 the nonterminal \code{stmt\_list} appears after a period, so we add all
														
 
															 the rules for \code{stmt\_list} to slot $0$, with a period at the
														
@@ -4767,13 +4774,15 @@ use with even the largest of input files.
 
															 \section{The LALR(1) Algorithm}
														
 
															 \label{sec:lalr}
														
 
															-The LALR(1) algorithm consists of a finite automata and a stack to
														
 
															-record its progress in parsing the input string.  Each element of the
														
 
															-stack is a pair: a state number and a grammar symbol (a terminal or
														
 
															-nonterminal). The symbol characterizes the input that has been parsed
														
 
															-so-far and the state number is used to remember how to proceed once
														
 
															-the next symbol-worth of input has been parsed.  Each state in the
														
 
															-finite automata represents where the parser stands in the parsing
														
 
															+The LALR(1) algorithm can be viewed as a two phase approach in which
														
 
															+it first compiles the grammar into a state machine and then runs the
														
 
															+state machine to parse the input string.  The state machine also uses
														
 
															+a stack to record its progress in parsing the input string.  Each
														
 
															+element of the stack is a pair: a state number and a grammar symbol (a
														
 
															+terminal or nonterminal). The symbol characterizes the input that has
														
 
															+been parsed so-far and the state number is used to remember how to
														
 
															+proceed once the next symbol-worth of input has been parsed.  Each
														
 
															+state in the machine represents where the parser stands in the parsing
														
 
															 process with respect to certain grammar rules. In particular, each
														
 
															 state is associated with a set of dotted rules.
														
@@ -4797,7 +4806,7 @@ rule 1 with a period after the \code{PRINT} token and before the
 
															 \emph{item}. There are several rules that could apply next, both rule
														
 
															 2 and 3, so state 1 also shows those rules with a period at the
														
 
															 beginning of their right-hand sides. The edges between states indicate
														
 
															-which transitions the automata should make depending on the next input
														
 
															+which transitions the machine should make depending on the next input
														
 
															 token. So, for example, if the next input token is \code{INT} then the
														
 
															 parser will push \code{INT} and the target state 4 on the stack and
														
 
															 transition to state 4.  Suppose we are now at the end of the input. In
														
@@ -10155,7 +10164,7 @@ arguments may not be used at all. For example, consider the case for
 
															 the constant \TRUE{} in \code{explicate\_pred}, in which we discard the
														
 
															 \code{els} continuation.
														
 
															 %
														
 
															- {\if\edition\racketEd
														
 
															+{\if\edition\racketEd
														
 
															 The following example program falls into this
														
 
															 case, and it creates two unused blocks.       
														
 
															 \begin{center}
														
@@ -10277,11 +10286,12 @@ return a \code{Goto} to the new label.
 
															       [else
														
 
															         (let ([label (gensym 'block)])
														
 
															           (set! basic-blocks (cons (cons label t) basic-blocks))
														
 
															-          (Goto label))]))
														
 
															+          (Goto label))])))
														
 
															 \end{lstlisting}
														
 
															 \end{minipage}
														
 
															 \end{center}
														
 
															 \fi}
														
 
															+
														
 
															 {\if\edition\pythonEd
														
 
															 %
														
 
															 Here is the new version of the \code{create\_block} auxiliary function
														
@@ -20663,6 +20673,7 @@ class TypeCheckLgrad(TypeCheckLlambda):
 
															 \fi}
														
 
															+
														
 
															 \clearpage
														
 
															 \section{Interpreting \LangCast{}}
														
@@ -20780,7 +20791,7 @@ For the first \code{vector-set!}, the proxy casts a tagged \code{1}
 
															 from \CANYTY{} to \INTTY{}.
														
 
															 }
														
 
															 \python{
														
 
															-  For the subscript \code{v[i]} in \code{f([v[i])} of \code{map\_inplace},
														
 
															+  For the subscript \code{v[i]} in \code{f(v[i])} of \code{map\_inplace},
														
 
															   the proxy casts the integer from \INTTY{} to \CANYTY{}.
														
 
															   For the subscript on the left of the assignment,
														
 
															   the proxy casts the tagged value from \CANYTY{} to \INTTY{}.