|
@@ -1118,10 +1118,6 @@ node is number of bytes of stack space needed for variables in the
|
|
|
program. (Some of the intermediate languages will store other
|
|
|
information in that location for the purposes of communicating
|
|
|
auxiliary data from one step of the compiler to the next. )
|
|
|
-%% \marginpar{Consider mentioning PseudoX86, since I think that's what
|
|
|
-%% you actually are referring to.}
|
|
|
-%% Not here. PseudoX86 is the language with variables and
|
|
|
-%% instructions that don't obey the x86 rules. -Jeremy
|
|
|
|
|
|
\begin{figure}[tp]
|
|
|
\fbox{
|
|
@@ -1129,7 +1125,7 @@ auxiliary data from one step of the compiler to the next. )
|
|
|
\[
|
|
|
\begin{array}{lcl}
|
|
|
\Arg &::=& \INT{\Int} \mid \REG{\itm{register}}
|
|
|
- \mid \STACKLOC{\Int} \\
|
|
|
+ \mid (\key{deref}\,\itm{register}\,\Int) \\
|
|
|
\Instr &::=& (\key{addq} \; \Arg\; \Arg) \mid
|
|
|
(\key{subq} \; \Arg\; \Arg) \mid
|
|
|
(\key{negq} \; \Arg) \mid (\key{movq} \; \Arg\; \Arg) \\
|
|
@@ -3227,7 +3223,7 @@ So $0011 \mathrel{\mathrm{XOR}} 0101 = 0110$.
|
|
|
\[
|
|
|
\begin{array}{lcl}
|
|
|
\Arg &::=& \gray{\INT{\Int} \mid \REG{\itm{register}}
|
|
|
- \mid \STACKLOC{\Int}} \mid (\key{byte-reg}\; \itm{register}) \\
|
|
|
+ \mid (\key{deref}\,\itm{register}\,\Int)} \mid (\key{byte-reg}\; \itm{register}) \\
|
|
|
\itm{cc} & ::= & \key{e} \mid \key{l} \mid \key{le} \mid \key{g} \mid \key{ge} \\
|
|
|
\Instr &::=& \gray{(\key{addq} \; \Arg\; \Arg) \mid
|
|
|
(\key{subq} \; \Arg\; \Arg) \mid
|
|
@@ -4305,17 +4301,17 @@ back afterwards.
|
|
|
\begin{lstlisting}
|
|
|
(call-live-roots (|$x_0 \ldots x_{n-1}$|) (collect |$\itm{bytes}$|))
|
|
|
|$\Longrightarrow$|
|
|
|
- (movq (var |$x_0$|) (offset (reg |$\itm{rootstack}$|) |$0$|))
|
|
|
+ (movq (var |$x_0$|) (deref |$\itm{rootstack}$| |$0$|))
|
|
|
|$\ldots$|
|
|
|
- (movq (var |$x_{n-1}$|) (offset (reg |$\itm{rootstack}$|) |$8(n-1)$|))
|
|
|
+ (movq (var |$x_{n-1}$|) (deref |$\itm{rootstack}$| |$8(n-1)$|))
|
|
|
(addq |$n$| (reg |$\itm{rootstack}$|))
|
|
|
(movq (reg |$\itm{rootstack}$|) (reg rdi))
|
|
|
(movq (int |$\itm{bytes}$|) (reg rsi))
|
|
|
(callq collect)
|
|
|
(subq |$n$| (reg |$\itm{rootstack}$|))
|
|
|
- (movq (offset (reg |$\itm{rootstack}$|) |$0$|) (var |$x_0$|))
|
|
|
+ (movq (deref |$\itm{rootstack}$| |$0$|) (var |$x_0$|))
|
|
|
|$\ldots$|
|
|
|
- (movq (offset (reg |$\itm{rootstack}$|) |$8(n-1)$|) (var |$x_{n-1}$|))
|
|
|
+ (movq (deref |$\itm{rootstack}$| |$8(n-1)$|) (var |$x_{n-1}$|))
|
|
|
\end{lstlisting}
|
|
|
|
|
|
\noindent We simply translate \code{initialize} into a call to the
|
|
@@ -4367,39 +4363,37 @@ register operands.
|
|
|
(movq (global-value free_ptr) |$\itm{lhs}'$|)
|
|
|
(addq (int |$8(\itm{len}+1)$|) (global-value free_ptr))
|
|
|
(movq |$\itm{lhs}'$| (reg r11))
|
|
|
- (movq (int |$\itm{tag}$|) (offset (reg r11) 0))
|
|
|
+ (movq (int |$\itm{tag}$|) (deref r11 0))
|
|
|
\end{lstlisting}
|
|
|
|
|
|
The \code{vector-ref} and \code{vector-set!} forms translate into
|
|
|
-\code{movq} instructions with the appropriate \code{offset}. (The
|
|
|
+\code{movq} instructions with the appropriate \key{deref}. (The
|
|
|
plus one is to get past the tag at the beginning of the tuple
|
|
|
representation.)
|
|
|
\begin{lstlisting}
|
|
|
(assign |$\itm{lhs}$| (vector-ref |$\itm{vec}$| |$n$|))
|
|
|
|$\Longrightarrow$|
|
|
|
(movq |$\itm{vec}'$| (reg r11))
|
|
|
-(movq (offset (reg r11) |$8(n+1)$|) |$\itm{lhs}$|)
|
|
|
+(movq (deref r11 |$8(n+1)$|) |$\itm{lhs}$|)
|
|
|
|
|
|
(assign |$\itm{lhs}$| (vector-set! |$\itm{vec}$| |$n$| |$\itm{arg}$|))
|
|
|
|$\Longrightarrow$|
|
|
|
(movq |$\itm{vec}'$| (reg r11))
|
|
|
-(movq |$\itm{arg}'$| (offset (reg r11) |$8(n+1)$|))
|
|
|
+(movq |$\itm{arg}'$| (deref r11 |$8(n+1)$|))
|
|
|
(movq (int 0) |$\itm{lhs}$|)
|
|
|
\end{lstlisting}
|
|
|
The $\itm{vec}'$ and $\itm{arg}'$ are obtained by recursively
|
|
|
processing $\itm{vec}$ and $\itm{arg}$.
|
|
|
|
|
|
|
|
|
-
|
|
|
\begin{figure}[tp]
|
|
|
\fbox{
|
|
|
\begin{minipage}{0.96\textwidth}
|
|
|
\[
|
|
|
\begin{array}{lcl}
|
|
|
\Arg &::=& \gray{ \INT{\Int} \mid \REG{\itm{register}}
|
|
|
- \mid \STACKLOC{\Int} \mid (\key{byte-reg}\; \itm{register}) } \\
|
|
|
- &\mid& (\key{global-value}\; \itm{name})
|
|
|
- \mid (\key{offset}\,\Arg\,\Int) \\
|
|
|
+ \mid (\key{deref}\,\itm{register}\,\Int) \mid (\key{byte-reg}\; \itm{register}) } \\
|
|
|
+ &\mid& (\key{global-value}\; \itm{name}) \\
|
|
|
\itm{cc} & ::= & \gray{ \key{e} \mid \key{l} \mid \key{le} \mid \key{g} \mid \key{ge} } \\
|
|
|
\Instr &::=& \gray{(\key{addq} \; \Arg\; \Arg) \mid
|
|
|
(\key{subq} \; \Arg\; \Arg) \mid
|
|
@@ -4414,7 +4408,7 @@ processing $\itm{vec}$ and $\itm{arg}$.
|
|
|
\mid (\key{jmp} \; \itm{label})
|
|
|
\mid (\key{j}\itm{cc} \; \itm{label})
|
|
|
\mid (\key{label} \; \itm{label}) } \\
|
|
|
-x86_1 &::= & \gray{ (\key{program} \;\itm{info} \;(\key{type}\;\itm{type})\; \Instr^{+}) }
|
|
|
+x86_2 &::= & \gray{ (\key{program} \;\itm{info} \;(\key{type}\;\itm{type})\; \Instr^{+}) }
|
|
|
\end{array}
|
|
|
\]
|
|
|
\end{minipage}
|
|
@@ -4425,8 +4419,7 @@ x86_1 &::= & \gray{ (\key{program} \;\itm{info} \;(\key{type}\;\itm{type})\; \I
|
|
|
|
|
|
The syntax of the $x86_2$ language is defined in
|
|
|
Figure~\ref{fig:x86-2}. It differs from $x86_1$ just in the addition
|
|
|
-of the form for global variables and a form for dereferencing an
|
|
|
-address at a given offset.
|
|
|
+of the form for global variables.
|
|
|
|
|
|
Figure~\ref{fig:select-instr-output-gc} shows the output of the
|
|
|
\code{select-instructions} pass on the running example.
|
|
@@ -4457,9 +4450,9 @@ Figure~\ref{fig:select-instr-output-gc} shows the output of the
|
|
|
(movq (global-value free_ptr) (var tmp28644))
|
|
|
(addq (int 16) (global-value free_ptr))
|
|
|
(movq (var tmp28644) (reg r11))
|
|
|
- (movq (int 3) (offset (reg r11) 0))
|
|
|
+ (movq (int 3) (deref r11 0))
|
|
|
(movq (var tmp28644) (reg r11))
|
|
|
- (movq (int 42) (offset (reg r11) 8))
|
|
|
+ (movq (int 42) (deref r11 8))
|
|
|
|
|
|
(movq (global-value free_ptr) (var end-data28654))
|
|
|
(addq (int 16) (var end-data28654))
|
|
@@ -4467,27 +4460,27 @@ Figure~\ref{fig:select-instr-output-gc} shows the output of the
|
|
|
(setl (byte-reg al))
|
|
|
(movzbq (byte-reg al) (var lt28655))
|
|
|
(if (eq? (int 0) (var lt28655))
|
|
|
- ((movq (var tmp28644) (offset (reg r15) 0))
|
|
|
+ ((movq (var tmp28644) (deref r15 0))
|
|
|
(addq (int 8) (reg r15))
|
|
|
(movq (reg r15) (reg rdi))
|
|
|
(movq (int 16) (reg rsi))
|
|
|
(callq collect)
|
|
|
(subq (int 8) (reg r15))
|
|
|
- (movq (offset (reg r15) 0) (var tmp28644)))
|
|
|
+ (movq (deref r15 0) (var tmp28644)))
|
|
|
())
|
|
|
|
|
|
(movq (global-value free_ptr) (var tmp28645))
|
|
|
(addq (int 16) (global-value free_ptr))
|
|
|
(movq (var tmp28645) (reg r11))
|
|
|
- (movq (int 131) (offset (reg r11) 0))
|
|
|
+ (movq (int 131) (deref r11 0))
|
|
|
(movq (var tmp28645) (reg r11))
|
|
|
- (movq (var tmp28644) (offset (reg r11) 8))
|
|
|
+ (movq (var tmp28644) (deref r11 8))
|
|
|
|
|
|
(movq (var tmp28645) (reg r11))
|
|
|
- (movq (offset (reg r11) 8) (var tmp28646))
|
|
|
+ (movq (deref r11 8) (var tmp28646))
|
|
|
|
|
|
(movq (var tmp28646) (reg r11))
|
|
|
- (movq (offset (reg r11) 8) (var tmp28647))
|
|
|
+ (movq (deref r11 8) (var tmp28647))
|
|
|
|
|
|
(movq (var tmp28647) (reg rax)))
|
|
|
\end{lstlisting}
|
|
@@ -4502,7 +4495,7 @@ Figure~\ref{fig:select-instr-output-gc} shows the output of the
|
|
|
|
|
|
|
|
|
\marginpar{\scriptsize We need to show the translation to x86 and what
|
|
|
- to do about global-value and offset. (to do: this week) \\ --Jeremy}
|
|
|
+ to do about global-value. \\ --Jeremy}
|
|
|
|
|
|
\begin{figure}[tbp]
|
|
|
\begin{minipage}[t]{0.5\textwidth}
|
|
@@ -4636,13 +4629,13 @@ inside each other; they can only be defined at the top level.
|
|
|
\begin{minipage}{0.96\textwidth}
|
|
|
\[
|
|
|
\begin{array}{lcl}
|
|
|
- \Type &::=& \gray{\key{Integer} \mid \key{Boolean}
|
|
|
- \mid (\key{Vector}\;\Type^{+}) \mid \key{Void}} \mid (\Type^{*} \; \key{->}\; \Type) \\
|
|
|
- \Exp &::=& \gray{\Int \mid (\key{read}) \mid (\key{-}\;\Exp) \mid (\key{+} \; \Exp\;\Exp)} \\
|
|
|
- &\mid& \gray{\Var \mid \LET{\Var}{\Exp}{\Exp}
|
|
|
- \mid \key{\#t} \mid \key{\#f} \mid
|
|
|
+ \Type &::=& \gray{ \key{Integer} \mid \key{Boolean}
|
|
|
+ \mid (\key{Vector}\;\Type^{+}) \mid \key{Void} } \mid (\Type^{*} \; \key{->}\; \Type) \\
|
|
|
+ \Exp &::=& \gray{ \Int \mid (\key{read}) \mid (\key{-}\;\Exp) \mid (\key{+} \; \Exp\;\Exp)} \\
|
|
|
+ &\mid& \gray{ \Var \mid \LET{\Var}{\Exp}{\Exp} }\\
|
|
|
+ &\mid& \gray{ \key{\#t} \mid \key{\#f} \mid
|
|
|
(\key{and}\;\Exp\;\Exp) \mid (\key{not}\;\Exp)} \\
|
|
|
- &\mid& \gray{(\key{eq?}\;\Exp\;\Exp) \mid \IF{\Exp}{\Exp}{\Exp}} \\
|
|
|
+ &\mid& \gray{(\itm{relop}\;\Exp\;\Exp) \mid \IF{\Exp}{\Exp}{\Exp}} \\
|
|
|
&\mid& \gray{(\key{vector}\;\Exp^{+}) \mid
|
|
|
(\key{vector-ref}\;\Exp\;\Int)} \\
|
|
|
&\mid& \gray{(\key{vector-set!}\;\Exp\;\Int\;\Exp)\mid (\key{void})} \\
|
|
@@ -4799,6 +4792,36 @@ changes to our compiler, that is, do we need any new passes and/or do
|
|
|
we need to change any existing passes? Also, do we need to add new
|
|
|
kinds of AST nodes to any of the intermediate languages?
|
|
|
|
|
|
+\begin{figure}[tp]
|
|
|
+\centering
|
|
|
+\fbox{
|
|
|
+\begin{minipage}{0.96\textwidth}
|
|
|
+\[
|
|
|
+\begin{array}{lcl}
|
|
|
+ \Type &::=& \gray{ \key{Integer} \mid \key{Boolean}
|
|
|
+ \mid (\key{Vector}\;\Type^{+}) \mid \key{Void} } \mid (\Type^{*} \; \key{->}\; \Type) \\
|
|
|
+ \Exp &::=& \gray{ \Int \mid (\key{read}) \mid (\key{-}\;\Exp) \mid (\key{+} \; \Exp\;\Exp)} \\
|
|
|
+ &\mid& (\key{function-ref}\, \itm{label})
|
|
|
+ \mid \gray{ \Var \mid \LET{\Var}{\Exp}{\Exp} }\\
|
|
|
+ &\mid& \gray{ \key{\#t} \mid \key{\#f} \mid
|
|
|
+ (\key{and}\;\Exp\;\Exp) \mid (\key{not}\;\Exp)} \\
|
|
|
+ &\mid& \gray{(\itm{relop}\;\Exp\;\Exp) \mid \IF{\Exp}{\Exp}{\Exp}} \\
|
|
|
+ &\mid& \gray{(\key{vector}\;\Exp^{+}) \mid
|
|
|
+ (\key{vector-ref}\;\Exp\;\Int)} \\
|
|
|
+ &\mid& \gray{(\key{vector-set!}\;\Exp\;\Int\;\Exp)\mid (\key{void})} \\
|
|
|
+ &\mid& (app\, \Exp \; \Exp^{*}) \\
|
|
|
+ \Def &::=& (\key{define}\; (\itm{label} \; [\Var \key{:} \Type]^{*}) \key{:} \Type \; \Exp) \\
|
|
|
+ F_1 &::=& (\key{program} \; \Def^{*} \; \Exp)
|
|
|
+\end{array}
|
|
|
+\]
|
|
|
+\end{minipage}
|
|
|
+}
|
|
|
+\caption{The $F_1$ language, an extension of $R_3$
|
|
|
+ (Figure~\ref{fig:r3-syntax}).}
|
|
|
+\label{fig:f1-syntax}
|
|
|
+\end{figure}
|
|
|
+
|
|
|
+
|
|
|
To begin with, the syntax of $R_4$ is inconvenient for purposes of
|
|
|
compilation because it conflates the use of function names and local
|
|
|
variables and it conflates the application of primitive operations and
|
|
@@ -4812,11 +4835,48 @@ function references from just a symbol $f$ to \code{(function-ref
|
|
|
$f$)} and that changes function application from \code{($e_0$ $e_1$
|
|
|
$\ldots$ $e_n$)} to the explicitly tagged AST \code{(app $e_0$ $e_1$
|
|
|
$\ldots$ $e_n$)}. A good name for this pass is
|
|
|
-\code{reveal-functions}. Placing this pass after \code{uniquify} is a
|
|
|
-good idea, because it will make sure that there are no local variables
|
|
|
-and functions that share the same name. On the other hand,
|
|
|
+\code{reveal-functions} and the output language, $F_1$, is defined in
|
|
|
+Figure~\ref{fig:f1-syntax}. Placing this pass after \code{uniquify} is
|
|
|
+a good idea, because it will make sure that there are no local
|
|
|
+variables and functions that share the same name. On the other hand,
|
|
|
\code{reveal-functions} needs to come before the \code{flatten} pass
|
|
|
because \code{flatten} will help us compile \code{function-ref}.
|
|
|
+Figure~\ref{fig:c3-syntax} defines the syntax for $C_3$, the output of
|
|
|
+\key{flatten}.
|
|
|
+
|
|
|
+
|
|
|
+\begin{figure}[tp]
|
|
|
+\fbox{
|
|
|
+\begin{minipage}{0.96\textwidth}
|
|
|
+\[
|
|
|
+\begin{array}{lcl}
|
|
|
+\Arg &::=& \gray{ \Int \mid \Var \mid \key{\#t} \mid \key{\#f} }
|
|
|
+ \mid (\key{function-ref}\,\itm{label})\\
|
|
|
+\itm{relop} &::= & \gray{ \key{eq?} \mid \key{<} \mid \key{<=} \mid \key{>} \mid \key{>=} } \\
|
|
|
+\Exp &::= & \gray{ \Arg \mid (\key{read}) \mid (\key{-}\;\Arg) \mid (\key{+} \; \Arg\;\Arg)
|
|
|
+ \mid (\key{not}\;\Arg) \mid (\itm{relop}\;\Arg\;\Arg) } \\
|
|
|
+ &\mid& \gray{ (\key{vector}\, \Arg^{+})
|
|
|
+ \mid (\key{vector-ref}\, \Arg\, \Int) } \\
|
|
|
+ &\mid& \gray{ (\key{vector-set!}\,\Arg\,\Int\,\Arg) } \\
|
|
|
+ &\mid& (app \,\Arg\,\Arg^{*}) \\
|
|
|
+\Stmt &::=& \gray{ \ASSIGN{\Var}{\Exp} \mid \RETURN{\Arg} } \\
|
|
|
+ &\mid& \gray{ \IF{(\itm{relop}\, \Arg\,\Arg)}{\Stmt^{*}}{\Stmt^{*}} } \\
|
|
|
+ &\mid& \gray{ (\key{initialize}\,\itm{int}\,\itm{int}) }\\
|
|
|
+ &\mid& \gray{ \IF{(\key{collection-needed?}\,\itm{int})}{\Stmt^{*}}{\Stmt^{*}} } \\
|
|
|
+ &\mid& \gray{ (\key{collect} \,\itm{int}) }
|
|
|
+ \mid \gray{ (\key{allocate} \,\itm{int}) }\\
|
|
|
+ &\mid& \gray{ (\key{call-live-roots}\,(\Var^{*}) \,\Stmt^{*}) } \\
|
|
|
+ \Def &::=& (\key{define}\; (\itm{label} \; [\Var \key{:} \Type]^{*}) \key{:} \Type \; \Stmt^{+}) \\
|
|
|
+C_3 & ::= & (\key{program}\;(\Var^{*})\;(\key{type}\;\textit{type})\;(\key{defines}\,\Def^{*})\;\Stmt^{+})
|
|
|
+\end{array}
|
|
|
+\]
|
|
|
+\end{minipage}
|
|
|
+}
|
|
|
+\caption{The $C_3$ intermediate language, an extension of $C_2$
|
|
|
+ (Figure~\ref{fig:c2-syntax}).}
|
|
|
+\label{fig:c3-syntax}
|
|
|
+\end{figure}
|
|
|
+
|
|
|
|
|
|
Because each \code{function-ref} needs to eventually become an
|
|
|
\code{leaq} instruction, it first needs to become an assignment
|
|
@@ -4839,7 +4899,48 @@ $\Rightarrow$
|
|
|
(leaq (function-ref |$f$|) |$\itm{lhs}$|)
|
|
|
\end{lstlisting}
|
|
|
\end{minipage}
|
|
|
-\end{tabular}
|
|
|
+\end{tabular} \\
|
|
|
+%
|
|
|
+The output of select instructions is a program in the x86$_3$
|
|
|
+language, whose syntax is defined in Figure~\ref{fig:x86-3}.
|
|
|
+
|
|
|
+
|
|
|
+\begin{figure}[tp]
|
|
|
+\fbox{
|
|
|
+\begin{minipage}{0.96\textwidth}
|
|
|
+\[
|
|
|
+\begin{array}{lcl}
|
|
|
+\Arg &::=& \gray{ \INT{\Int} \mid \REG{\itm{register}}
|
|
|
+ \mid (\key{deref}\,\itm{register}\,\Int) \mid (\key{byte-reg}\; \itm{register}) } \\
|
|
|
+ &\mid& \gray{ (\key{global-value}\; \itm{name}) } \\
|
|
|
+\itm{cc} & ::= & \gray{ \key{e} \mid \key{l} \mid \key{le} \mid \key{g} \mid \key{ge} } \\
|
|
|
+\Instr &::=& \gray{ (\key{addq} \; \Arg\; \Arg) \mid
|
|
|
+ (\key{subq} \; \Arg\; \Arg) \mid
|
|
|
+ (\key{negq} \; \Arg) \mid (\key{movq} \; \Arg\; \Arg) } \\
|
|
|
+ &\mid& \gray{ (\key{callq} \; \mathit{label}) \mid
|
|
|
+ (\key{pushq}\;\Arg) \mid
|
|
|
+ (\key{popq}\;\Arg) \mid
|
|
|
+ (\key{retq}) } \\
|
|
|
+ &\mid& \gray{ (\key{xorq} \; \Arg\;\Arg)
|
|
|
+ \mid (\key{cmpq} \; \Arg\; \Arg) \mid (\key{set}\itm{cc} \; \Arg) } \\
|
|
|
+ &\mid& \gray{ (\key{movzbq}\;\Arg\;\Arg)
|
|
|
+ \mid (\key{jmp} \; \itm{label})
|
|
|
+ \mid (\key{j}\itm{cc} \; \itm{label})
|
|
|
+ \mid (\key{label} \; \itm{label}) } \\
|
|
|
+ &\mid& (\key{indirect-callq}\;\Arg ) \mid (\key{leaq}\;\Arg\;\Arg)\\
|
|
|
+\Def &::= & (\key{define} \; (\itm{label}) \;\itm{int} \;\itm{info}\; \Stmt^{+})\\
|
|
|
+x86_3 &::= & (\key{program} \;\itm{info} \;(\key{type}\;\itm{type})\;
|
|
|
+ (\key{defines}\,\Def^{*}) \; \Instr^{+})
|
|
|
+\end{array}
|
|
|
+\]
|
|
|
+\end{minipage}
|
|
|
+}
|
|
|
+\caption{The x86$_3$ language (extends x86$_2$ of Figure~\ref{fig:x86-2}).}
|
|
|
+\label{fig:x86-3}
|
|
|
+\end{figure}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|
|
|
Next we consider compiling function definitions. The \code{flatten}
|
|
|
pass should handle function definitions a lot like a \code{program}
|