?? codegen.cpp
字號:
#include "SCC.H"
#include "CodeGen.H"
#include "PTNode.H"
CodeGen::CodeGen()
{
// When this class is instantiated, open the 'out.bin' file. Obviously, a
// real compiler would want the user to specify the output file name as a
// command-line argument.
out = fopen( "out.bin", "wb" );
if ( out == NULL ) {
perror( "out.bin" );
exit( 1 );
}
}
CodeGen::~CodeGen()
{
// Make sure to close the bytecode stream.
fclose( out );
}
void
CodeGen::StartGen( PTNodePtr root )
{
// Before we can start generating the code in the script, we have to
// allocate space for any variables that are used in the script. Since the
// interpreter does not support scopes, all the variables exist at the base
// of the stack. Iterate through the entire symbol table and allocate space
// for them on the stack.
//
// While we are doing this, we need to also need to set the variables
// address. This will be important later when the variable is actually
// used.
SymbolTable::iterator i = symtbl.begin();
SymbolTable::iterator end = symtbl.end();
unsigned int offset = 0;
for ( ; i != end; ++i ) {
IdentifierNodePtr var = (*i).second;
// The interpreter expects the variable offsets to be in word entries;
// therefore, we only need to increment the offset by one for each
// variable.
var->SetOffset( offset );
++offset;
// Emit a dummy push instruction. This allocates space for the variable
// on the stack.
WriteOpcode( Push_Opcode );
WriteArg( 0 );
}
// Now that space for all the variables have been allocated and their
// offsets are set, generate the script's code.
Gen( root );
// To be clean, emit pop instructions to remove the variables that we
// previously added to the stack. This will ensure that the stack starts
// and stops empty.
for ( i = symtbl.begin(); i != end; ++i )
WriteOpcode( Pop_Opcode );
}
size_t
CodeGen::Gen( PTNodePtr node )
{
// This function switches on the node's type and passes flow of control off
// to the proper internal code generator function. This helper function
// will recursively call this function for any children that it may have.
// It is useful for some of the opcode generators to know how much code has
// been generated. Store a marker here when before we generate anything so
// we can figure out how many bytes was generated for this node.
Marker start = GetCurrentMarker();
switch ( node->GetType() ) {
case Add_PTNodeType:
GenAdd( AddNodePtr( node ) );
break;
case Subtract_PTNodeType:
GenSubtract( SubtractNodePtr( node ) );
break;
case Multiply_PTNodeType:
GenMultiply( MultiplyNodePtr( node ) );
break;
case Divide_PTNodeType:
GenDivide( DivideNodePtr( node ) );
break;
case Constant_PTNodeType:
GenConstant( ConstantNodePtr( node ) );
break;
case Identifier_PTNodeType:
GenIdentifier( IdentifierNodePtr( node ) );
break;
case Assignment_PTNodeType:
GenAssignment( AssignmentNodePtr( node ) );
break;
case If_PTNodeType:
GenIf( IfNodePtr( node ) );
break;
case Statement_PTNodeType:
GenStatement( StatementNodePtr( node ) );
break;
case Block_PTNodeType:
GenBlock( BlockNodePtr( node ) );
break;
default:
// Some unknown node type! This shouldn't happen...
assert( false );
break;
}
return GetCurrentMarker() - start;
}
void
CodeGen::GenAdd( AddNodePtr add )
{
// All of the basic math nodes have the same format: Generate the code for
// the left & right hand expressions, and then emit the proper opcode
// instruction. This makes sure that the stack will contain the proper two
// values to preform the addition, subtraction, etc.
Gen( add->GetLHS() );
Gen( add->GetRHS() );
WriteOpcode( Add_Opcode );
}
void
CodeGen::GenSubtract( SubtractNodePtr subtract )
{
Gen( subtract->GetLHS() );
Gen( subtract->GetRHS() );
WriteOpcode( Subtract_Opcode );
}
void
CodeGen::GenMultiply( MultiplyNodePtr multiply )
{
Gen( multiply->GetLHS() );
Gen( multiply->GetRHS() );
WriteOpcode( Multiply_Opcode );
}
void
CodeGen::GenDivide( DivideNodePtr divide )
{
Gen( divide->GetLHS() );
Gen( divide->GetRHS() );
WriteOpcode( Divide_Opcode );
}
void
CodeGen::GenConstant( ConstantNodePtr constant )
{
// A constant simply pushes its own value onto the stack. The Push opcode
// expects that the next word in the bytecode stream is the value to push
// onto the stack.
WriteOpcode( Push_Opcode );
WriteArg( constant->GetValue() );
}
void
CodeGen::GenIdentifier( IdentifierNodePtr identifier )
{
// An identifier node simply refers that the variable is being "used". This
// means its current value should be pushed onto the stack.
WriteOpcode( Load_Opcode );
WriteArg( identifier->GetOffset() );
}
void
CodeGen::GenAssignment( AssignmentNodePtr assignment )
{
Gen( assignment->GetRHS() );
// An assigment is an expression not a statement. It needs to leave the
// result of the assignment on the stack. This allows the script writer to
// chain assignments such as: a = b = 5;
WriteOpcode( Dupe_Opcode );
// The left hand side of the assignment node will always be the variable to
// assign to. This node's code isn't emited directly. It is only used to
// fetch the proper variable offset for the store opcode.
IdentifierNodePtr var = IdentifierNodePtr( assignment->GetLHS() );
WriteOpcode( Store_Opcode );
WriteArg( var->GetOffset() );
}
void
CodeGen::GenIf( IfNodePtr ifNode )
{
// The assembly for an if-statement with an else block will look like:
//
// Perform the conditional instructions.
// If top-stack element is zero, Jump to label A.
// Perform true statements.
// Jump to label B.
// A: Preform false statements.
// B:
//
// The last label, B, will actually point to the next instruction after
// the if. An if-statement with out an else block is slightly simplier, it
// looks like:
//
// Perform the conditional instructions.
// If top-stack element is zero, Jump to label A.
// Perform true statements.
// A:
//
// Note that the first assembly block will work for both forms of an if
// statement; however, the second form is slightly more efficient for
// if-statements with out a false block (it doesn't need the "Jump to B"
// instruction).
Marker ifArg;
size_t trueSize;
// Generate the if-statement's conditional. This will leave an argument on
// the stack that can be used by the IfZero opcode.
Gen( ifNode->GetConditional() );
// Emit the IfZero opcode. If the value on the stack is false, then this
// opcode will jump to the false statements of the if. Since the size of
// the true block isn't known, save a marker so the value can be filled in
// later.
WriteOpcode( IfZero_Opcode );
ifArg = WriteArg( 0 );
// Generate all of the true statements, and save their size.
trueSize = Gen( ifNode->GetTrueStatements() );
// If this if-statement contains any false statements, then emit their code.
if ( ifNode->GetFalseStatements() != NULL ) {
Marker jumpArg;
size_t falseSize;
// Before the false statements, a relative jump is required to skip these
// false statements. This jump is considered part of the true block, and
// the true statement's size needs to be updated accordingly.
WriteOpcode( Jump_Opcode );
jumpArg = WriteArg( 0 );
trueSize += sizeof( Opcode );
trueSize += sizeof( int );
// Generate the if-statements, and update the Jump opcode's argument since
// we now know how many bytes it needs to skip.
falseSize = Gen( ifNode->GetFalseStatements() );
UpdateArg( jumpArg, falseSize );
}
// Now that the finial size of the true statement block is known, update
// IfZero's argument.
UpdateArg( ifArg, trueSize );
}
void
CodeGen::GenFor( ForNodePtr forStmt )
{
// The assembly for a for-statement looks like:
//
// Perform pre-loop expression.
// A: Perform loop conditional expression.
// Jump to label B if top-stack element is zero.
// Perform loop body statements.
// Perform loop incremental expression.
// Jump to label A.
// B:
//
// The last label, B, will actually point to the next isntruction after the
// for.
Marker loopTop;
Marker loopSize;
Marker ifArg;
Marker jumpArg;
// Generate the code for the preloop expression. Nothing special needs to
// happen here.
Gen( forStmt->GetPreLoop() );
// Save a marker to the top of the loop. This will be needed to calculate
// the size of the loop body.
loopTop = GetCurrentMarker();
// Generate the conditional part of the for-loop. Note that we have to
// generate the IfZero opcodes to actually test the conditional and jump out
// of the loop, if needed.
Gen( forStmt->GetConditional() );
WriteOpcode( IfZero_Opcode );
ifArg = WriteArg( 0 );
// Generate the code for the inner part of the function loop (this includes
// the incremental expression).
Gen( forStmt->GetLoopBody() );
Gen( forStmt->GetPostLoop() );
// Generate the code to jump back up to the conditional.
WriteOpcode( Jump_Opcode );
jumpArg = WriteArg( 0 );
// Now that the entire body of the loop has been generated, calculate the
// size of the loop body so we can update the jump values. Note that the
// jump at the end of the loop needs to go backwards (hence the negative).
loopSize = GetCurrentMarker() - loopTop;
UpdateArg( ifArg, loopSize );
UpdateArg( jumpArg, -loopSize );
}
void
CodeGen::GenStatement( StatementNodePtr stmt )
{
// A statement yields its entire work to its expression; however, this
// expression will leave a value on the stack. For example, the expression
// "4+5" will leave the answer "9" as the top-most stack element. Since
// this statemnt has been completed, it is save to pop this element off the
// stack. It is no longer needed.
//
// HOWEVER, a statement with no expression does not generate any code (since
// there is nothing to pop off). This makes it easy to support empty
// statements (most notiably for a for-loop).
if ( stmt == NULL )
return;
Gen( stmt->GetExpression() );
WriteOpcode( Pop_Opcode );
}
void
CodeGen::GenBlock( BlockNodePtr block )
{
// Block nodes do not have any code to generate. They simply iterate over
// all their children making sure that their values are generated.
NodeList::const_iterator i = block->GetChildrenBegin();
NodeList::const_iterator end = block->GetChildrenEnd();
for ( ; i != end; ++i ) {
const PTNodePtr node = *i;
Gen( node );
}
}
void
CodeGen::WriteOpcode( Opcode op )
{
// Write out opcodes as if they were an integer. If the size of the
// generate bytecode stream is an issue, then you'll want to only
// write out the smallest amount possible (probably a byte).
WriteArg( (int)op );
}
CodeGen::Marker
CodeGen::WriteArg( int arg )
{
// In case the code generator needs to update this opcode argument, return
// this argument's marker. We have to fetch the marker now before we update
// the output file since the marker is supposed to point to the beginning of
// the argument.
Marker pos = GetCurrentMarker();
// Some opcodes (such as Push) expect an argument from the bytecode stream.
// This function writes out such and argument to the output file.
fwrite( &arg, sizeof( arg ), 1, out );
return pos;
}
void
CodeGen::UpdateArg( Marker pos, int arg )
{
// The marker is really just a file position, so rewind back to the file
// position, write the new argument, and zip back to the end of the file.
fseek( out, pos, SEEK_SET );
WriteArg( arg );
fseek( out, 0, SEEK_END );
}
CodeGen::Marker
CodeGen::GetCurrentMarker()
{
// Just use the current offset in the output file as the marker.
return ftell( out );
}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -