há 3 anos atrás · dbd950a748
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
 
				 LATEXMK= latexmk -pdf
			
 
				 
			
 
				 all:
			
 
				-	$(LATEXMK) book
			
 
				+	$(LATEXMK) -f book
			
 
				 
			
 
				 cont: continuous
			
 
				 continuous:
			
--- a/book.bib
+++ b/book.bib
@@ -1,36 +1,22 @@
 
				 @book{Tomita:1985qr,
			
 
				-	address = {Norwell, MA, USA},
			
 
				 	author = {Masaru Tomita},
			
 
				-	date-added = {2008-12-02 14:16:33 -0700},
			
 
				-	date-modified = {2008-12-02 14:16:39 -0700},
			
 
				-	isbn = {0898382025},
			
 
				 	publisher = {Kluwer Academic Publishers},
			
 
				 	title = {Efficient Parsing for Natural Language: A Fast Algorithm for Practical Systems},
			
 
				 	year = {1985}}
			
 
				 
			
 
				 @article{Earley:1970ly,
			
 
				-	acmid = {362035},
			
 
				-	address = {New York, NY, USA},
			
 
				 	author = {Earley, Jay},
			
 
				-	date-added = {2011-05-28 11:31:46 -0600},
			
 
				-	date-modified = {2011-05-28 11:31:48 -0600},
			
 
				-	doi = {http://doi.acm.org/10.1145/362007.362035},
			
 
				-	issn = {0001-0782},
			
 
				 	issue = {2},
			
 
				 	journal = {Commun. ACM},
			
 
				-	keywords = {compilers, computational complexity, context-free grammar, parsing, syntax analysis},
			
 
				 	month = {February},
			
 
				 	numpages = {9},
			
 
				 	pages = {94--102},
			
 
				 	publisher = {ACM},
			
 
				 	title = {An efficient context-free parsing algorithm},
			
 
				-	url = {http://doi.acm.org/10.1145/362007.362035},
			
 
				 	volume = {13},
			
 
				-	year = {1970},
			
 
				-	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoXnA5NC1lYXJsZXkucGRmCAsYAAAAAAAAAQEAAAAAAAAAAwAAAAAAAAAAAAAAAAAAACc=},
			
 
				-	Bdsk-Url-1 = {http://doi.acm.org/10.1145/362007.362035}}
			
 
				+	year = {1970}}
			
 
				 
			
 
				-@Book{Hopcroft06:_automata,
			
 
				+@book{Hopcroft06:_automata,
			
 
				   author = 	 {John Hopcroft and Rajeev Motwani and Jeffrey Ullman},
			
 
				   title = 	 {Introduction to Automata Theory, Languages, and Computation},
			
 
				   publisher = 	 {Pearson},
			
@@ -38,15 +24,12 @@
 
				 
			
 
				 @techreport{Lesk:1975uq,
			
 
				 	author = {M. E. Lesk and E. Schmidt},
			
 
				-	date-added = {2007-08-27 13:37:27 -0600},
			
 
				-	date-modified = {2009-08-25 22:28:17 -0600},
			
 
				 	institution = {Bell Laboratories},
			
 
				 	month = {July},
			
 
				 	title = {Lex - A Lexical Analyzer Generator},
			
 
				-	year = {1975},
			
 
				-	Bdsk-File-1 = {YnBsaXN0MDDRAQJccmVsYXRpdmVQYXRoV2xleC5wZGYICxgAAAAAAAABAQAAAAAAAAADAAAAAAAAAAAAAAAAAAAAIA==}}
			
 
				+	year = {1975}}
			
 
				 
			
 
				-@Misc{shinan20:_lark_docs,
			
 
				+@misc{shinan20:_lark_docs,
			
 
				   author = 	 {Erez Shinan},
			
 
				   title = 	 {Lark Documentation},
			
 
				   url = {https://lark-parser.readthedocs.io/en/latest/index.html},
			
--- a/book.tex
+++ b/book.tex
@@ -196,6 +196,7 @@ ISBN:
 
				 
			
 
				 %\listoftables
			
 
				 
			
 
				+
			
 
				 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				 \chapter*{Preface}
			
 
				 \addcontentsline{toc}{fmbm}{Preface}
			
@@ -247,7 +248,7 @@ concepts and algorithms used in compilers.
 
				   the fundamental tools of compiler construction: \emph{abstract
			
 
				     syntax trees} and \emph{recursive functions}. 
			
 
				 {\if\edition\pythonEd
			
 
				-\item In Chapter~\ref{ch:parsing-Lvar} we learn how to use the Lark
			
 
				+\item In Chapter~\ref{ch:parsing} we learn how to use the Lark
			
 
				   parser generator to create a parser for the language of integer
			
 
				   arithmetic and local variables. We learn about the parsing
			
 
				   algorithms inside Lark, including Earley and LALR(1).
			
@@ -307,14 +308,13 @@ programming, data structures and algorithms, and discrete
 
				 mathematics.
			
 
				 %
			
 
				 At the beginning of the course, students form groups of two to four
			
 
				-people.  The groups complete one chapter every two weeks, starting
			
 
				-with chapter~\ref{ch:Lvar} and finishing with
			
 
				-chapter~\ref{ch:Llambda}. Many chapters include a challenge problem
			
 
				-that we assign to the graduate students. The last two weeks of the
			
 
				+people.  The groups complete approximately one chapter every two
			
 
				+weeks, starting with chapter~\ref{ch:Lvar}. The last two weeks of the
			
 
				 course involve a final project in which students design and implement
			
 
				 a compiler extension of their choosing.  The last few chapters can be
			
 
				-used in support of these projects.  For compiler courses at
			
 
				-universities on the quarter system (about ten weeks in length), we
			
 
				+used in support of these projects.  Many chapters include a challenge
			
 
				+problem that we assign to the graduate students. For compiler courses
			
 
				+at universities on the quarter system (about ten weeks in length), we
			
 
				 recommend completing the course through chapter~\ref{ch:Lvec} or
			
 
				 chapter~\ref{ch:Lfun} and providing some scaffolding code to the
			
 
				 students for each compiler pass.
			
@@ -337,7 +337,6 @@ State University, Portland State University, Rose–Hulman Institute of
 
				 Technology, University of Freiburg, University of Massachusetts
			
 
				 Lowell, and the University of Vermont.
			
 
				 
			
 
				-
			
 
				 \begin{figure}[tp]
			
 
				 \begin{tcolorbox}[colback=white]
			
 
				   {\if\edition\racketEd
			
@@ -370,32 +369,35 @@ Lowell, and the University of Vermont.
 
				 \fi}
			
 
				 {\if\edition\pythonEd
			
 
				 \begin{tikzpicture}[baseline=(current  bounding  box.center)]
			
 
				-  \node (C1) at (0,1.5) {\small Ch.~\ref{ch:trees-recur} Preliminaries};
			
 
				-  \node (C2) at (4,1.5) {\small Ch.~\ref{ch:Lvar} Variables};
			
 
				-  \node (C3) at (8,1.5) {\small Ch.~\ref{ch:register-allocation-Lvar} Registers};
			
 
				-  \node (C4) at (0,0) {\small Ch.~\ref{ch:Lif} Conditionals};
			
 
				-  \node (C5) at (4,0) {\small Ch.~\ref{ch:Lvec} Tuples};
			
 
				-  \node (C6) at (8,0) {\small Ch.~\ref{ch:Lfun} Functions};
			
 
				-  \node (C9) at (0,-1.5) {\small Ch.~\ref{ch:Lwhile} Loops};
			
 
				-  \node (C8) at (4,-1.5) {\small Ch.~\ref{ch:Ldyn} Dynamic};
			
 
				+  \node (Prelim) at (0,1.5) {\small Ch.~\ref{ch:trees-recur} Preliminaries};
			
 
				+  \node (Var) at (4,1.5) {\small Ch.~\ref{ch:Lvar} Variables};
			
 
				+  \node (Parse) at (8,1.5) {\small Ch.~\ref{ch:parsing} Parsing};
			
 
				+  \node (Reg) at (0,0) {\small Ch.~\ref{ch:register-allocation-Lvar} Registers};
			
 
				+  \node (Cond) at (4,0) {\small Ch.~\ref{ch:Lif} Conditionals};
			
 
				+  \node (Loop) at (8,0) {\small Ch.~\ref{ch:Lwhile} Loops};
			
 
				+  \node (Fun) at (0,-1.5) {\small Ch.~\ref{ch:Lfun} Functions};
			
 
				+  \node (Tuple) at (4,-1.5) {\small Ch.~\ref{ch:Lvec} Tuples};
			
 
				+  \node (Dyn) at (8,-1.5) {\small Ch.~\ref{ch:Ldyn} Dynamic};
			
 
				 %  \node (CO) at (0,-3) {\small Ch.~\ref{ch:Lobject} Objects};
			
 
				-  \node (C7) at (8,-1.5) {\small Ch.~\ref{ch:Llambda} Lambda};
			
 
				-  \node (C10) at (4,-3) {\small Ch.~\ref{ch:Lgrad} Gradual Typing};
			
 
				-  \node (C11) at (8,-3) {\small Ch.~\ref{ch:Lpoly} Generics};
			
 
				-
			
 
				-  \path[->] (C1) edge [above] node {} (C2);
			
 
				-  \path[->] (C2) edge [above] node {} (C3);
			
 
				-  \path[->] (C3) edge [above] node {} (C4);
			
 
				-  \path[->] (C4) edge [above] node {} (C5);
			
 
				-  \path[->,style=dotted] (C5) edge [above] node {} (C6);
			
 
				-  \path[->] (C5) edge [above] node {} (C7);
			
 
				-  \path[->] (C6) edge [above] node {} (C7);
			
 
				-  \path[->] (C4) edge [above] node {} (C8);
			
 
				-  \path[->] (C4) edge [above] node {} (C9);
			
 
				-  \path[->] (C7) edge [above] node {} (C10);
			
 
				-  \path[->] (C8) edge [above] node {} (C10);
			
 
				-%  \path[->] (C8) edge [above] node {} (CO);
			
 
				-  \path[->] (C10) edge [above] node {} (C11);
			
 
				+  \node (Lam) at (0,-3) {\small Ch.~\ref{ch:Llambda} Lambda};
			
 
				+  \node (Gradual) at (4,-3) {\small Ch.~\ref{ch:Lgrad} Gradual Typing};
			
 
				+  \node (Generic) at (8,-3) {\small Ch.~\ref{ch:Lpoly} Generics};
			
 
				+
			
 
				+  \path[->] (Prelim) edge [above] node {} (Var);
			
 
				+  \path[->] (Var) edge [above] node {} (Reg);
			
 
				+  \path[->] (Var) edge [above] node {} (Parse);
			
 
				+  \path[->] (Reg) edge [above] node {} (Cond);
			
 
				+  \path[->] (Cond) edge [above] node {} (Tuple);
			
 
				+  \path[->,style=dotted] (Tuple) edge [above] node {} (Fun);
			
 
				+  \path[->] (Cond) edge [above] node {} (Fun);
			
 
				+  \path[->] (Tuple) edge [above] node {} (Lam);
			
 
				+  \path[->] (Fun) edge [above] node {} (Lam);
			
 
				+  \path[->] (Cond) edge [above] node {} (Dyn);
			
 
				+  \path[->] (Cond) edge [above] node {} (Loop);
			
 
				+  \path[->] (Lam) edge [above] node {} (Gradual);
			
 
				+  \path[->] (Dyn) edge [above] node {} (Gradual);
			
 
				+%  \path[->] (Dyn) edge [above] node {} (CO);
			
 
				+  \path[->] (Gradual) edge [above] node {} (Generic);
			
 
				 \end{tikzpicture}
			
 
				 \fi}
			
 
				 \end{tcolorbox}
			
@@ -506,9 +508,11 @@ perform.\index{subject}{concrete syntax}\index{subject}{abstract
 
				   syntax}\index{subject}{abstract syntax
			
 
				   tree}\index{subject}{AST}\index{subject}{program}\index{subject}{parse}
			
 
				 The process of translating from concrete syntax to abstract syntax is
			
 
				-called \emph{parsing}~\citep{Aho:2006wb}\python{ and is studied in
			
 
				-  chapter~\ref{ch:parsing-Lvar}}.
			
 
				-\racket{This book does not cover the theory and implementation of parsing.}%
			
 
				+called \emph{parsing}\python{ and is studied in
			
 
				+  chapter~\ref{ch:parsing}}.
			
 
				+\racket{This book does not cover the theory and implementation of parsing.
			
 
				+  We refer the readers interested in parsing to the thorough treatment
			
 
				+  of parsing by \citet{Aho:2006wb}.}%
			
 
				 %
			
 
				 \racket{A parser is provided in the support code for translating from
			
 
				   concrete to abstract syntax.}%
			
@@ -4090,23 +4094,23 @@ all, fast code is useless if it produces incorrect results!
 
				 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
			
 
				 {\if\edition\pythonEd
			
 
				 \chapter{Parsing}
			
 
				-\label{ch:parsing-Lvar}
			
 
				+\label{ch:parsing}
			
 
				 \setcounter{footnote}{0}
			
 
				 \index{subject}{parsing}
			
 
				 
			
 
				 In this chapter we learn how to use the Lark parser
			
 
				-generator~\citep{shinan20:_lark_docs} to translate the concrete syntax
			
 
				+framework~\citep{shinan20:_lark_docs} to translate the concrete syntax
			
 
				 of \LangInt{} (a sequence of characters) into an abstract syntax tree.
			
 
				 You will then be asked to use Lark to create a parser for \LangVar{}.
			
 
				-We then learn about the parsing algorithms used inside Lark, studying
			
 
				-the \citet{Earley:1970ly} and LALR algorithms.
			
 
				+We also describe the parsing algorithms used inside Lark, studying the
			
 
				+\citet{Earley:1970ly} and LALR(1) algorithms.
			
 
				 
			
 
				-A parser generator takes in a specification of the concrete syntax and
			
 
				-produces a parser. Even though a parser generator does most of the
			
 
				-work for us, using one properly requires some knowledge.  In
			
 
				-particular, we must learn about the specification languages used by
			
 
				-parser generators and we must learn how to deal with ambiguity in our
			
 
				-language specifications.
			
 
				+A parser framework such as Lark takes in a specification of the
			
 
				+concrete syntax and the input program and produces a parse tree. Even
			
 
				+though a parser framework does most of the work for us, using one
			
 
				+properly requires some knowledge.  In particular, we must learn about
			
 
				+its specification languages and we must learn how to deal with
			
 
				+ambiguity in our language specifications.
			
 
				 
			
 
				 The process of parsing is traditionally subdivided into two phases:
			
 
				 \emph{lexical analysis} (also called scanning) and \emph{syntax
			
@@ -4119,16 +4123,16 @@ language. The reason for the subdivision into two phases is to enable
 
				 the use of a faster but less powerful algorithm for lexical analysis
			
 
				 and the use of a slower but more powerful algorithm for parsing.
			
 
				 %
			
 
				-Likewise, parser generators typical come in pairs, with separate
			
 
				-generators for the lexical analyzer (or lexer for short) and for the
			
 
				-parser.  A paricularly influential pair of generators were
			
 
				-\texttt{lex} and \texttt{yacc}. The \texttt{lex} generator was written
			
 
				-by \citet{Lesk:1975uq} at Bell Labs. The \texttt{yacc} generator was
			
 
				-written by \citet{Johnson:1979qy} at AT\&T and stands for Yet Another
			
 
				-Compiler Compiler.
			
 
				-
			
 
				-The Lark parse generator that we use in this chapter includes both a
			
 
				-lexical analyzer and a parser. The next section discusses lexical
			
 
				+%% Likewise, parser generators typical come in pairs, with separate
			
 
				+%% generators for the lexical analyzer (or lexer for short) and for the
			
 
				+%% parser.  A paricularly influential pair of generators were
			
 
				+%% \texttt{lex} and \texttt{yacc}. The \texttt{lex} generator was written
			
 
				+%% by \citet{Lesk:1975uq} at Bell Labs. The \texttt{yacc} generator was
			
 
				+%% written by \citet{Johnson:1979qy} at AT\&T and stands for Yet Another
			
 
				+%% Compiler Compiler.
			
 
				+%
			
 
				+The Lark parse framwork that we use in this chapter includes both
			
 
				+lexical analyzers and parsers. The next section discusses lexical
			
 
				 analysis and the remainder of the chapter discusses parsing.
			
 
				 
			
 
				 
			
@@ -4522,10 +4526,13 @@ section~\ref{sec:lalr} we learn about the LALR algorithm, which is
 
				 more efficient but can only handle a subset of the context-free
			
 
				 grammars.
			
 
				 
			
 
				-The Earley algorithm uses a data structure called a
			
 
				-\emph{chart}\index{subject}{chart} to keep track of its progress.  The
			
 
				-chart is an array with one slot for each position in the input string,
			
 
				-where position $0$ is before the first character and position $n$ is
			
 
				+The Earley algorithm can be viewed as an interpreter; it treats the
			
 
				+grammar as the program being interpreted and it treats the concrete
			
 
				+syntax of the program-to-be-parsed as its input.  The Earley algorithm
			
 
				+uses a data structure called a \emph{chart}\index{subject}{chart} to
			
 
				+keep track of its progress and to memoize its results. The chart is an
			
 
				+array with one slot for each position in the input string, where
			
 
				+position $0$ is before the first character and position $n$ is
			
 
				 immediately after the last character. So the array has length $n+1$
			
 
				 for an input string of length $n$. Each slot in the chart contains a
			
 
				 set of \emph{dotted rules}. A dotted rule is simply a grammar rule
			
@@ -4553,8 +4560,8 @@ grammar in figure~\ref{fig:Lint-lark-grammar}, we place
 
				 \begin{lstlisting}
			
 
				   lang_int: . stmt_list         (0)
			
 
				 \end{lstlisting}
			
 
				-in slot $0$ of the chart. The algorithm then proceeds to its
			
 
				-\emph{prediction} phase in which it adds more dotted rules to the
			
 
				+in slot $0$ of the chart. The algorithm then proceeds to with
			
 
				+\emph{prediction} actions in which it adds more dotted rules to the
			
 
				 chart based on which nonterminal come after a period. In the above,
			
 
				 the nonterminal \code{stmt\_list} appears after a period, so we add all
			
 
				 the rules for \code{stmt\_list} to slot $0$, with a period at the
			
@@ -4767,13 +4774,15 @@ use with even the largest of input files.
 
				 \section{The LALR(1) Algorithm}
			
 
				 \label{sec:lalr}
			
 
				 
			
 
				-The LALR(1) algorithm consists of a finite automata and a stack to
			
 
				-record its progress in parsing the input string.  Each element of the
			
 
				-stack is a pair: a state number and a grammar symbol (a terminal or
			
 
				-nonterminal). The symbol characterizes the input that has been parsed
			
 
				-so-far and the state number is used to remember how to proceed once
			
 
				-the next symbol-worth of input has been parsed.  Each state in the
			
 
				-finite automata represents where the parser stands in the parsing
			
 
				+The LALR(1) algorithm can be viewed as a two phase approach in which
			
 
				+it first compiles the grammar into a state machine and then runs the
			
 
				+state machine to parse the input string.  The state machine also uses
			
 
				+a stack to record its progress in parsing the input string.  Each
			
 
				+element of the stack is a pair: a state number and a grammar symbol (a
			
 
				+terminal or nonterminal). The symbol characterizes the input that has
			
 
				+been parsed so-far and the state number is used to remember how to
			
 
				+proceed once the next symbol-worth of input has been parsed.  Each
			
 
				+state in the machine represents where the parser stands in the parsing
			
 
				 process with respect to certain grammar rules. In particular, each
			
 
				 state is associated with a set of dotted rules.
			
 
				 
			
@@ -4797,7 +4806,7 @@ rule 1 with a period after the \code{PRINT} token and before the
 
				 \emph{item}. There are several rules that could apply next, both rule
			
 
				 2 and 3, so state 1 also shows those rules with a period at the
			
 
				 beginning of their right-hand sides. The edges between states indicate
			
 
				-which transitions the automata should make depending on the next input
			
 
				+which transitions the machine should make depending on the next input
			
 
				 token. So, for example, if the next input token is \code{INT} then the
			
 
				 parser will push \code{INT} and the target state 4 on the stack and
			
 
				 transition to state 4.  Suppose we are now at the end of the input. In
			
@@ -10155,7 +10164,7 @@ arguments may not be used at all. For example, consider the case for
 
				 the constant \TRUE{} in \code{explicate\_pred}, in which we discard the
			
 
				 \code{els} continuation.
			
 
				 %
			
 
				- {\if\edition\racketEd
			
 
				+{\if\edition\racketEd
			
 
				 The following example program falls into this
			
 
				 case, and it creates two unused blocks.       
			
 
				 \begin{center}
			
@@ -10277,11 +10286,12 @@ return a \code{Goto} to the new label.
 
				       [else
			
 
				         (let ([label (gensym 'block)])
			
 
				           (set! basic-blocks (cons (cons label t) basic-blocks))
			
 
				-          (Goto label))]))
			
 
				+          (Goto label))])))
			
 
				 \end{lstlisting}
			
 
				 \end{minipage}
			
 
				 \end{center}
			
 
				 \fi}
			
 
				+
			
 
				 {\if\edition\pythonEd
			
 
				 %
			
 
				 Here is the new version of the \code{create\_block} auxiliary function
			
@@ -20663,6 +20673,7 @@ class TypeCheckLgrad(TypeCheckLlambda):
 
				 
			
 
				 \fi}
			
 
				 
			
 
				+
			
 
				 \clearpage
			
 
				 
			
 
				 \section{Interpreting \LangCast{}}
			
@@ -20780,7 +20791,7 @@ For the first \code{vector-set!}, the proxy casts a tagged \code{1}
 
				 from \CANYTY{} to \INTTY{}.
			
 
				 }
			
 
				 \python{
			
 
				-  For the subscript \code{v[i]} in \code{f([v[i])} of \code{map\_inplace},
			
 
				+  For the subscript \code{v[i]} in \code{f(v[i])} of \code{map\_inplace},
			
 
				   the proxy casts the integer from \INTTY{} to \CANYTY{}.
			
 
				   For the subscript on the left of the assignment,
			
 
				   the proxy casts the tagged value from \CANYTY{} to \INTTY{}.