|
@@ -15,15 +15,24 @@
|
|
|
\usepackage{color}
|
|
|
%\usepackage{ifthen}
|
|
|
|
|
|
-\def\edition{1}
|
|
|
-\def\racketEd{0}
|
|
|
-\def\pythonEd{1}
|
|
|
|
|
|
\definecolor{lightgray}{gray}{1}
|
|
|
\newcommand{\black}[1]{{\color{black} #1}}
|
|
|
%\newcommand{\gray}[1]{{\color{lightgray} #1}}
|
|
|
\newcommand{\gray}[1]{{\color{gray} #1}}
|
|
|
|
|
|
+\def\racketEd{0}
|
|
|
+\def\pythonEd{1}
|
|
|
+\def\edition{0}
|
|
|
+
|
|
|
+% material that is specific to the Racket edition of the book
|
|
|
+\newcommand{\racket}[1]{{\if\edition\racketEd\color{olive}{#1}\fi}}
|
|
|
+% would like a command for: \if\edition\racketEd\color{olive}
|
|
|
+% and : \fi\color{black}
|
|
|
+
|
|
|
+% material that is specific to the Python edition of the book
|
|
|
+\newcommand{\python}[1]{{\if\edition\pythonEd\color{purple}{#1}\fi}}
|
|
|
+
|
|
|
%% For multiple indices:
|
|
|
\usepackage{multind}
|
|
|
\makeindex{subject}
|
|
@@ -42,11 +51,6 @@ moredelim=[is][\color{red}]{~}{~},
|
|
|
showstringspaces=false
|
|
|
}
|
|
|
|
|
|
-% material that is specific to the Racket edition of the book
|
|
|
-\newcommand{\racket}[1]{{\color{olive}{#1}}}
|
|
|
-
|
|
|
-% material that is specific to the Python edition of the book
|
|
|
-\newcommand{\python}[1]{{\color{purple}{#1}}}
|
|
|
|
|
|
%%% Any shortcut own defined macros place here
|
|
|
%% sample of author macro:
|
|
@@ -458,10 +462,10 @@ input_int() + -8
|
|
|
\begin{minipage}{0.4\textwidth}
|
|
|
\begin{equation}
|
|
|
\begin{tikzpicture}
|
|
|
- \node[draw, circle] (plus) at (0 , 0) {\key{+}};
|
|
|
- \node[draw, circle] (read) at (-1, -1.5) {{\if\edition\racketEd\footnotesize\key{read}\fi\if\edition\pythonEd\key{input}}};
|
|
|
- \node[draw, circle] (minus) at (1 , -1.5) {$\key{-}$};
|
|
|
- \node[draw, circle] (8) at (1 , -3) {\key{8}};
|
|
|
+ \node[draw] (plus) at (0 , 0) {\key{+}};
|
|
|
+ \node[draw] (read) at (-1, -1.5) {{\if\edition\racketEd\footnotesize\key{read}\fi\if\edition\pythonEd\key{input\_int()}\fi}};
|
|
|
+ \node[draw] (minus) at (1 , -1.5) {$\key{-}$};
|
|
|
+ \node[draw] (8) at (1 , -3) {\key{8}};
|
|
|
|
|
|
\draw[->] (plus) to (read);
|
|
|
\draw[->] (plus) to (minus);
|
|
@@ -472,7 +476,7 @@ input_int() + -8
|
|
|
\end{minipage}
|
|
|
\end{center}
|
|
|
We use the standard terminology for trees to describe ASTs: each
|
|
|
-circle above is called a \emph{node}. The arrows connect a node to its
|
|
|
+rectangle above is called a \emph{node}. The arrows connect a node to its
|
|
|
\emph{children} (which are also nodes). The top-most node is the
|
|
|
\emph{root}. Every node except for the root has a \emph{parent} (the
|
|
|
node it is the child of). If a node has no children, it is a
|
|
@@ -533,28 +537,30 @@ node it is the child of). If a node has no children, it is a
|
|
|
%% In general, the Racket expression that follows the comma (splice)
|
|
|
%% can be any expression that produces an S-expression.
|
|
|
|
|
|
-We define a Racket \code{struct} for each kind of node. For this
|
|
|
+{\if\edition\racketEd\color{olive}
|
|
|
+We define a Racket \code{struct} for each kind of node. For this
|
|
|
chapter we require just two kinds of nodes: one for integer constants
|
|
|
-and one for primitive operations. The following is the \code{struct}
|
|
|
+and one for primitive operations. The following is the \code{struct}
|
|
|
definition for integer constants.
|
|
|
\begin{lstlisting}
|
|
|
(struct Int (value))
|
|
|
\end{lstlisting}
|
|
|
An integer node includes just one thing: the integer value.
|
|
|
-To create an AST node for the integer $8$, we write \code{(Int 8)}.
|
|
|
+To create an AST node for the integer $8$, we write \INT{8}.
|
|
|
\begin{lstlisting}
|
|
|
(define eight (Int 8))
|
|
|
\end{lstlisting}
|
|
|
-We say that the value created by \code{(Int 8)} is an
|
|
|
-\emph{instance} of the \code{Int} structure.
|
|
|
+We say that the value created by \INT{8} is an
|
|
|
+\emph{instance} of the
|
|
|
+\code{Int} structure.
|
|
|
|
|
|
The following is the \code{struct} definition for primitive operations.
|
|
|
\begin{lstlisting}
|
|
|
(struct Prim (op args))
|
|
|
\end{lstlisting}
|
|
|
-A primitive operation node includes an operator symbol \code{op}
|
|
|
-and a list of child \code{args}. For example, to create
|
|
|
-an AST that negates the number $8$, we write \code{(Prim '- (list eight))}.
|
|
|
+A primitive operation node includes an operator symbol \code{op} and a
|
|
|
+list of child \code{args}. For example, to create an AST that negates
|
|
|
+the number $8$, we write \code{(Prim '- (list eight))}.
|
|
|
\begin{lstlisting}
|
|
|
(define neg-eight (Prim '- (list eight)))
|
|
|
\end{lstlisting}
|
|
@@ -581,11 +587,78 @@ The reason we choose to use just one structure is that in many parts
|
|
|
of the compiler the code for the different primitive operators is the
|
|
|
same, so we might as well just write that code once, which is enabled
|
|
|
by using a single structure.
|
|
|
+\fi}
|
|
|
+
|
|
|
+{\if\edition\pythonEd\color{purple}
|
|
|
+We use a Python \code{class} for each kind of node.
|
|
|
+The following is the class definition for constants.
|
|
|
+\begin{lstlisting}
|
|
|
+class Constant:
|
|
|
+ def __init__(self, value):
|
|
|
+ self.value = value
|
|
|
+\end{lstlisting}
|
|
|
+An integer constant node includes just one thing: the integer value.
|
|
|
+To create an AST node for the integer $8$, we write \INT{8}.
|
|
|
+\begin{lstlisting}
|
|
|
+eight = Constant(8)
|
|
|
+\end{lstlisting}
|
|
|
+We say that the value created by \INT{8} is an
|
|
|
+\emph{instance} of the \code{Constant} class.
|
|
|
+
|
|
|
+The following is class definition for unary operators.
|
|
|
+\begin{lstlisting}
|
|
|
+class UnaryOp:
|
|
|
+ def __init__(self, op, operand):
|
|
|
+ self.op = op
|
|
|
+ self.operand = operand
|
|
|
+\end{lstlisting}
|
|
|
+The specific operation is specified by the \code{op} parameter. For
|
|
|
+example, the class \code{USub} is for unary subtraction. (More unary
|
|
|
+operators are introduced in later chapters.) To create an AST that
|
|
|
+negates the number $8$, we write \NEG{\code{eight}}.
|
|
|
+\begin{lstlisting}
|
|
|
+neg_eight = UnaryOp(USub(), eight)
|
|
|
+\end{lstlisting}
|
|
|
+
|
|
|
+The call to the \code{input\_int} function is represented by the
|
|
|
+\code{Call} and \code{Name} classes.
|
|
|
+\begin{lstlisting}
|
|
|
+class Call:
|
|
|
+ def __init__(self, func, args):
|
|
|
+ self.func = func
|
|
|
+ self.args = args
|
|
|
+
|
|
|
+class Name:
|
|
|
+ def __init__(self, id):
|
|
|
+ self.id = id
|
|
|
+\end{lstlisting}
|
|
|
+To create an AST node that calls \code{input\_int}, we write
|
|
|
+\begin{lstlisting}
|
|
|
+read = Call(Name('input_int'), [])
|
|
|
+\end{lstlisting}
|
|
|
+
|
|
|
+Finally, to represent the addition in \eqref{eq:arith-prog}, we use
|
|
|
+the \code{BinOp} class for binary operators.
|
|
|
+\begin{lstlisting}
|
|
|
+class BinOp:
|
|
|
+ def __init__(self, left, op, right):
|
|
|
+ self.op = op
|
|
|
+ self.left = left
|
|
|
+ self.right = right
|
|
|
+\end{lstlisting}
|
|
|
+Similar to \code{UnaryOp}, the specific operation is specified by the
|
|
|
+\code{op} parameter, which for now is just an instance of the
|
|
|
+\code{Add} class. So to create the AST node that adds negative eight
|
|
|
+to some user input, we write the following.
|
|
|
+\begin{lstlisting}
|
|
|
+ast1_1 = BinOp(read, Add(), neg_eight)
|
|
|
+\end{lstlisting}
|
|
|
+\fi}
|
|
|
|
|
|
When compiling a program such as \eqref{eq:arith-prog}, we need to
|
|
|
know that the operation associated with the root node is addition and
|
|
|
-we need to be able to access its two children. Racket provides pattern
|
|
|
-matching to support these kinds of queries, as we see in
|
|
|
+we need to be able to access its two children. \racket{Racket}\python{Python}
|
|
|
+provides pattern matching to support these kinds of queries, as we see in
|
|
|
Section~\ref{sec:pattern-matching}.
|
|
|
|
|
|
In this book, we often write down the concrete syntax of a program
|
|
@@ -649,20 +722,17 @@ node is also an $\Exp$.
|
|
|
\begin{equation}
|
|
|
\Exp ::= \NEG{\Exp} \label{eq:arith-neg}
|
|
|
\end{equation}
|
|
|
-Symbols in typewriter font such as \key{-} and \key{read} are
|
|
|
-\emph{terminal} symbols and must literally appear in the program for
|
|
|
-the rule to be applicable.
|
|
|
+Symbols in typewriter font are \emph{terminal} symbols and must
|
|
|
+literally appear in the program for the rule to be applicable.
|
|
|
\index{subject}{terminal}
|
|
|
|
|
|
We can apply these rules to categorize the ASTs that are in the
|
|
|
\LangInt{} language. For example, by rule \eqref{eq:arith-int}
|
|
|
-\texttt{(Int 8)} is an $\Exp$, then by rule \eqref{eq:arith-neg} the
|
|
|
+\INT{8} is an $\Exp$, then by rule \eqref{eq:arith-neg} the
|
|
|
following AST is an $\Exp$.
|
|
|
\begin{center}
|
|
|
-\begin{minipage}{0.4\textwidth}
|
|
|
-\begin{lstlisting}
|
|
|
-(Prim '- (list (Int 8)))
|
|
|
-\end{lstlisting}
|
|
|
+\begin{minipage}{0.5\textwidth}
|
|
|
+\NEG{\INT{\code{8}}}
|
|
|
\end{minipage}
|
|
|
\begin{minipage}{0.25\textwidth}
|
|
|
\begin{equation}
|
|
@@ -682,24 +752,38 @@ The next grammar rule is for addition expressions:
|
|
|
\Exp ::= \ADD{\Exp}{\Exp} \label{eq:arith-add}
|
|
|
\end{equation}
|
|
|
We can now justify that the AST \eqref{eq:arith-prog} is an $\Exp$ in
|
|
|
-\LangInt{}. We know that \lstinline{(Prim 'read '())} is an $\Exp$ by rule
|
|
|
-\eqref{eq:arith-read} and we have already categorized \code{(Prim '-
|
|
|
- (list (Int 8)))} as an $\Exp$, so we apply rule \eqref{eq:arith-add}
|
|
|
+\LangInt{}. We know that \READ{} is an $\Exp$ by rule
|
|
|
+\eqref{eq:arith-read} and we have already categorized
|
|
|
+\NEG{\INT{\code{8}}} as an $\Exp$, so we apply rule \eqref{eq:arith-add}
|
|
|
to show that
|
|
|
-\begin{lstlisting}
|
|
|
-(Prim '+ (list (Prim 'read '()) (Prim '- (list (Int 8)))))
|
|
|
-\end{lstlisting}
|
|
|
+\[
|
|
|
+\ADD{\READ{}}{\NEG{\INT{\code{8}}}}
|
|
|
+\]
|
|
|
is an $\Exp$ in the \LangInt{} language.
|
|
|
|
|
|
If you have an AST for which the above rules do not apply, then the
|
|
|
-AST is not in \LangInt{}. For example, the program \code{(- (read) (+ 8))}
|
|
|
-is not in \LangInt{} because there are no rules for \code{+} with only one
|
|
|
-argument, nor for \key{-} with two arguments. Whenever we define a
|
|
|
-language with a grammar, the language only includes those programs
|
|
|
-that are justified by the rules.
|
|
|
-
|
|
|
-The last grammar rule for \LangInt{} states that there is a \code{Program}
|
|
|
-node to mark the top of the whole program:
|
|
|
+AST is not in \LangInt{}. For example, the program
|
|
|
+\racket{\code{(- (read) 8)}}
|
|
|
+\python{\code{input\_int() - 8}}
|
|
|
+is not in \LangInt{} because there are no rules for \key{-} with two arguments.
|
|
|
+Whenever we define a language with a grammar, the language only includes those
|
|
|
+programs that are justified by the rules.
|
|
|
+
|
|
|
+{\if\edition\pythonEd\color{purple}
|
|
|
+The language \LangInt{} includes a second non-terminal $\Stmt$ for statements.
|
|
|
+There is a statement for printing the value of an expression
|
|
|
+\[
|
|
|
+\Stmt{} ::= \PRINT{\Exp}
|
|
|
+\]
|
|
|
+and a statement that evaluates an expression but ignores the result.
|
|
|
+\[
|
|
|
+\Stmt{} ::= \EXPR{\Exp}
|
|
|
+\]
|
|
|
+\fi}
|
|
|
+
|
|
|
+{\if\edition\racketEd\color{olive}
|
|
|
+The last grammar rule for \LangInt{} states that there is a
|
|
|
+\code{Program} node to mark the top of the whole program:
|
|
|
\[
|
|
|
\LangInt{} ::= \PROGRAM{\code{'()}}{\Exp}
|
|
|
\]
|
|
@@ -710,6 +794,25 @@ The \code{Program} structure is defined as follows
|
|
|
where \code{body} is an expression. In later chapters, the \code{info}
|
|
|
part will be used to store auxiliary information but for now it is
|
|
|
just the empty list.
|
|
|
+\fi}
|
|
|
+
|
|
|
+{\if\edition\pythonEd\color{purple}
|
|
|
+The last grammar rule for \LangInt{} states that there is a
|
|
|
+\code{Module} node to mark the top of the whole program:
|
|
|
+\[
|
|
|
+ \LangInt{} ::= \PROGRAM{}{\Stmt^{*}}
|
|
|
+\]
|
|
|
+The asterisk symbol $*$ indicates a list of the preceding grammar item, in
|
|
|
+this case, a list of statments.
|
|
|
+%
|
|
|
+The \code{Module} class is defined as follows
|
|
|
+\begin{lstlisting}
|
|
|
+class Module:
|
|
|
+ def __init__(self, body):
|
|
|
+ self.body = body
|
|
|
+\end{lstlisting}
|
|
|
+where \code{body} is a list of statements.
|
|
|
+\fi}
|
|
|
|
|
|
It is common to have many grammar rules with the same left-hand side
|
|
|
but different right-hand sides, such as the rules for $\Exp$ in the
|
|
@@ -720,16 +823,21 @@ We collect all of the grammar rules for the abstract syntax of \LangInt{}
|
|
|
in Figure~\ref{fig:r0-syntax}. The concrete syntax for \LangInt{} is
|
|
|
defined in Figure~\ref{fig:r0-concrete-syntax}.
|
|
|
|
|
|
-The \code{read-program} function provided in \code{utilities.rkt} of
|
|
|
-the support code reads a program in from a file (the sequence of
|
|
|
-characters in the concrete syntax of Racket) and parses it into an
|
|
|
-abstract syntax tree. See the description of \code{read-program} in
|
|
|
-Appendix~\ref{appendix:utilities} for more details.
|
|
|
+\racket{The \code{read-program} function provided in
|
|
|
+ \code{utilities.rkt} of the support code reads a program in from a
|
|
|
+ file (the sequence of characters in the concrete syntax of Racket)
|
|
|
+ and parses it into an abstract syntax tree. See the description of
|
|
|
+ \code{read-program} in Appendix~\ref{appendix:utilities} for more
|
|
|
+ details.}
|
|
|
|
|
|
+\python{The \code{parse} function in Python's \code{ast} module
|
|
|
+ converts the concrete syntax (represented as a string) into an
|
|
|
+ abstract syntax tree.}
|
|
|
|
|
|
\begin{figure}[tp]
|
|
|
\fbox{
|
|
|
\begin{minipage}{0.96\textwidth}
|
|
|
+{\if\edition\racketEd\color{olive}
|
|
|
\[
|
|
|
\begin{array}{rcl}
|
|
|
\begin{array}{rcl}
|
|
@@ -738,6 +846,20 @@ Appendix~\ref{appendix:utilities} for more details.
|
|
|
\end{array}
|
|
|
\end{array}
|
|
|
\]
|
|
|
+\fi}
|
|
|
+
|
|
|
+{\if\edition\pythonEd\color{purple}
|
|
|
+\[
|
|
|
+\begin{array}{rcl}
|
|
|
+\begin{array}{rcl}
|
|
|
+ \Exp &::=& \Int \mid \key{input\_int}\LP\RP \mid \key{-}\;\Exp \mid \Exp \; \key{+} \; \Exp\\
|
|
|
+ \Stmt &::=& \key{print}\LP \Exp \RP \mid \Exp\\
|
|
|
+ \LangInt{} &::=& \Stmt^{*}
|
|
|
+\end{array}
|
|
|
+\end{array}
|
|
|
+\]
|
|
|
+\fi}
|
|
|
+
|
|
|
\end{minipage}
|
|
|
}
|
|
|
\caption{The concrete syntax of \LangInt{}.}
|
|
@@ -747,6 +869,7 @@ Appendix~\ref{appendix:utilities} for more details.
|
|
|
\begin{figure}[tp]
|
|
|
\fbox{
|
|
|
\begin{minipage}{0.96\textwidth}
|
|
|
+{\if\edition\racketEd\color{olive}
|
|
|
\[
|
|
|
\begin{array}{rcl}
|
|
|
\Exp &::=& \INT{\Int} \mid \READ{} \mid \NEG{\Exp} \\
|
|
@@ -754,6 +877,18 @@ Appendix~\ref{appendix:utilities} for more details.
|
|
|
\LangInt{} &::=& \PROGRAM{\code{'()}}{\Exp}
|
|
|
\end{array}
|
|
|
\]
|
|
|
+\fi}
|
|
|
+
|
|
|
+{\if\edition\pythonEd\color{purple}
|
|
|
+\[
|
|
|
+\begin{array}{rcl}
|
|
|
+ \Exp{} &::=& \INT{\Int} \mid \READ{} \\
|
|
|
+ &\mid& \NEG{\Exp} \mid \ADD{\Exp}{\Exp} \\
|
|
|
+\Stmt{} &::=& \PRINT{\Exp} \mid \EXPR{\Exp} \\
|
|
|
+\LangInt{} &::=& \PROGRAM{}{\Stmt^{*}}
|
|
|
+\end{array}
|
|
|
+\]
|
|
|
+\fi}
|
|
|
\end{minipage}
|
|
|
}
|
|
|
\caption{The abstract syntax of \LangInt{}.}
|