?? c2asm.cpp
字號:
/*********************************************************************************************
* *
* Program Name:C2ASM,Ver 1.0,9th July,2003. *
* *
* Category:Cross Compiler(From "C" to "Assembly Language"). *
* *
* Input:c_code.txt(Place the "C" code u want to convert into "ASSEMBLY" Language) *
* *
* Output:asm_code.txt("ASSEMBLY" code for INPUT "C" program) *
* *
* Requirements:Microsoft Windows(95/98/Me/2000/XP)/DOS. *
* Microsoft Visual C++(Ver 6.0)/Microsoft Visual C++ .NET *
* Microsoft MASM(Ver 6.11) *
* *
* NOTE:All Names Mentioned in "Requirements" Section are Registered trade marks of *
* Microsoft Corporation. *
* *
* Programmed By:Muhammad Owais Khan Afridi. *
* BS(Computer Science),Department Of Computer Science, *
* Karachi University,Pakistan. *
* *
* http://www.csku.edu.pk *
* *
* Notes for Readers: *
* 1)Sure to read documentation supplied with this code.It'll clear many confusions. *
* 2)Reader Shld know programming in C,Assembly,Use of <Vectors> and <Stack> of STL(C++), *
* to understand this code. *
* 3)This program doesn't use any Object Code Optimization,Register Allocation Algorithms. *
* 4)It converts STANDARD "C",See Sample Programs supplied to get some idea. *
* 5)It doesn't Convert all the "C" statements like Switch-Case.It uses a Sub-Part of "C" *
* Language,See GRAMMAR,TOKEN SET of Language in Documentation Supplied to get idea abt it*
* *
* Disclaimer: *
* The Author of this PROGRAM shall not be liable in any event for incidental or *
* consequential damages in connection with,or arising out of,the furnishing, *
* performance,or use of this program. *
*********************************************************************************************/
#pragma warning (disable:4786)
#include<conio.h>
#include<string>
#include<iostream>
#include<fstream>
#include<vector>
#include<stack>
using namespace std;
#define LEXEME_SIZE 30
#define FILE_READ "c_code.txt" //This is the INPUT "C" file,which is to be converted into
//Assembly.
#define FILE_WRITE "asm_code.txt" //This is the OUTPUT "ASSEMBLY" file For the input "C" file
//Globals Used By Lexical Box,Syntax Box,Code Generator.
vector<long> number_long; //This vector will hold Long Constants occuring in the Program
vector<long> number_int; //This vector will hold Int Constants occuring in the Program
//////////////////////////////
//
//Globals Used By Lexical Box.
//
/////////////////////////////
//This Vector is used to hold IDENTIFIER(s) for Lexical Box ONLY!
vector<string> lex_identifier;
#define TOTAL_KEYWORDS 10
string keywrd[TOTAL_KEYWORDS]={"void","main","int","long","if","else","for","while","do","return"};
//This Vector will Pick Keywords from the above Array.
vector<string> keywords;
//Structure Of a Token
struct to{
string clas; //Stores Class part of Token
int index; //Stores Value part of Token
}tok; //tok is used to PUSH values on "tokens" Vector.
//This Vector will hold ALL Tokens Generated by Lexical Box.
vector<to> tokens;
///////////////////////////////////
//
//Lexical Box Fucntion Declaration
//
///////////////////////////////////
//Lexemer will break input stream into LEXEMES using D.F.A approach as discussed in ULMAN's
//Compiler Construction Book
void lexemer(ifstream& input,int &line);
//If we follow a particular D.F.A to recognize input stream,In case that FAILS we need to check
//Some Other D.F.A,"lexemer_fail" will provide this fuctionality.
int lexemer_fail(ifstream& input,char& faulty_character,int& line,int& starting_state,streampos& lexeme_begining);
//If we're unable to identify a Lexeme as a particular TOKEN of Our Language,Then we need to
//Show some error message."lexemer_error" will do this.
void lexemer_error(char& faulty_character,int &line);
//It makes Token for a LEXEME
void tokenizer(char* lexeme_buffer,int& starting_state);
void syntax_box(); //Syntax Box Declaration
void code_generator(); //Code Generator Declaration
int main()
{
//Loading KEYWORDS LIST.It's used in Differentiating Kewords From Identifiers.
for(unsigned int i=0;i<TOTAL_KEYWORDS;i++)
keywords.push_back(keywrd[i]);
char file_read='\0';
int linecount=1;
ifstream input(FILE_READ);
//Start Reading INPUT File.
while(input.get(file_read))
{
if(file_read=='\n') linecount++;
if(file_read!='\n'&&file_read!='\t'&&file_read!=' ')
{
input.seekg(-1,ios::cur);
lexemer(input,linecount); //Start Making Lexemes
}//end of if
}//end reading file
input.close();
//If u wanna See Which TOKENs are generated,Remove Comments From the Following Code,It'll
//Show u ALL TOKENs on CONSOLE.
/*
for(unsigned int i=0;i<tokens.size();i++)
{
cout<<"\n";
cout<<tokens[i].clas;
cout<<"\t"<<tokens[i].index;
}
*/
//Pushing An Error Token At the END of TOKEN STREAM.
//So that we've an "End Marker"
tok.clas="error";
tok.index=-10;
tokens.push_back(tok);
//Start Parsing,Type Checking,Intermediate Code Generation
syntax_box();
//If u wanna See What Intermediate Code is generated,Remove Comments From the Following
//Code,It'll Show u ALL ATOMS on CONSOLE.
/* cout<<"\n\nFollowing Intermediate Code is Generated\n";
cout<<"INDEX----DATATYPE----TABLE";
for(i=0;i<atoms.size();i++)
cout<<endl<<atoms[i].op<<" type="<<atoms[i].type
<<" arg1= "<<atoms[i].arg1.index <<" "<< atoms[i].arg1.datatype <<" "<< atoms[i].arg1.whichtable
<<" arg2= "<<atoms[i].arg2.index <<" "<< atoms[i].arg2.datatype <<" "<< atoms[i].arg2.whichtable
<<" result= "<<atoms[i].result.index <<" "<< atoms[i].result.datatype <<" "<< atoms[i].result.whichtable;
*/
//Start CODE GENERATION.
code_generator();
cout<<"\n\n\"C2ASM\" has sucessfully converted \"C\" code(c_code.txt)"
<<endl<<"to \"ASSEMBLY\" code(asm_code.txt)";
cout<<"\n";
getch();
return(0);
}
///**********************///
///**********************///
/// ///
/// LEXICAL BOX STARTED. ///
/// ///
///**********************///
///**********************///
void lexemer(ifstream& input,int& line)
{
char character_read='\0';
char lexeme_buffer[LEXEME_SIZE];
memset(lexeme_buffer,'\0',LEXEME_SIZE);
int counter=0;
int current_state=0,starting_state=0;
bool read_flag=true;
streampos lexeme_begining=input.tellg();
while(read_flag){
switch(current_state)
{
//D.F.A for Identifier And Keyword
case 0:input.get(character_read);
if(isalpha(character_read)||character_read=='_') {current_state=1;lexeme_buffer[counter]=character_read;counter++;starting_state=0;}
else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
break;
case 1:input.get(character_read);
if(isalpha(character_read)||character_read=='_'||isdigit(character_read)) {current_state=1;lexeme_buffer[counter]=character_read;counter++;}
else current_state=2;
break;
case 2:
input.seekg(-1,ios::cur);
read_flag=false;
break;
//D.F.A for Relational And Assignment Operators
case 3:input.get(character_read);
if(character_read=='=') {current_state=4;lexeme_buffer[counter]=character_read;counter++;starting_state=3;}
else if(character_read=='<') {current_state=7;lexeme_buffer[counter]=character_read;counter++;starting_state=3;}
else if(character_read=='>') {current_state=11;lexeme_buffer[counter]=character_read;counter++;starting_state=3;}
else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
break;
case 4:input.get(character_read);
if(character_read=='=') {current_state=5;lexeme_buffer[counter]=character_read;counter++;}
else current_state=6;
break;
case 5:
read_flag=false;
break;
case 6:
input.seekg(-1,ios::cur);
read_flag=false;
break;
case 7:input.get(character_read);
if(character_read=='=') {current_state=8;lexeme_buffer[counter]=character_read;counter++;}
else if(character_read=='>') {current_state=9;lexeme_buffer[counter]=character_read;counter++;}
else current_state=10;
break;
case 8:
read_flag=false;
break;
case 9:
read_flag=false;
break;
case 10:
input.seekg(-1,ios::cur);
read_flag=false;
break;
case 11:input.get(character_read);
if(character_read=='=') {current_state=12;lexeme_buffer[counter]=character_read;counter++;}
else current_state=13;
break;
case 12:
read_flag=false;
break;
case 13:
input.seekg(-1,ios::cur);
read_flag=false;
break;
//D.F.A for Arithmetic Operators
case 14:
//MAULA ALI!
input.get(character_read);
if(character_read=='+'|| character_read=='-'||character_read=='*'||character_read=='/'||character_read=='%')
{current_state=15;lexeme_buffer[counter]=character_read;counter++;starting_state=14;}
else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
break;
case 15:
read_flag=false;
break;
//D.F.A for Punctuations
case 16:
input.get(character_read);
if(character_read=='('||character_read==')'||character_read=='{'||character_read=='}'||character_read==','||character_read==';'||character_read=='['||character_read==']')
{current_state=17;lexeme_buffer[counter]=character_read;counter++;starting_state=16;}
else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
break;
case 17:
read_flag=false;
break;
//D.F.A for LONG Numbers
case 18:
input.get(character_read);
if(isdigit(character_read)) {current_state=19;lexeme_buffer[counter]=character_read;counter++;starting_state=18;}
else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
break;
case 19:
input.get(character_read);
if(isdigit(character_read)) {current_state=19;lexeme_buffer[counter]=character_read;counter++;}
else if(character_read=='L'||character_read=='l') {current_state=20;lexeme_buffer[counter]=character_read;counter++;}
else current_state=21;
break;
case 20:
read_flag=false;
break;
case 21:
input.seekg(-1,ios::cur);
read_flag=false;
break;
//Error TOKEN
case 100:
input.get(character_read);
{current_state=100;lexeme_buffer[counter]=character_read;counter++;starting_state=100;}
read_flag=false;
break;
}//End of switch
}//End Reading One LEXEME
lexeme_buffer[counter]='\0';
//A Lexeme Is Made Sucessfully,Call Tokenizer to Make It's Coresponding TOKEN
tokenizer(lexeme_buffer,starting_state);
}
int lexemer_fail(ifstream& input,char& faulty_character,int& line,int& starting_state,streampos& lexeme_begining)
{
switch(starting_state)
{
case 0:
input.seekg(lexeme_begining);starting_state=3;break;
case 3:
input.seekg(lexeme_begining);starting_state=14;break;
case 14:
input.seekg(lexeme_begining);starting_state=16;break;
case 16:
input.seekg(lexeme_begining);starting_state=18;break;
case 18:
//We're Generating ERROR token for those things which are NOT part of OUR LANGUAGE.If U
//wanna stop this ERROR code generation,Remove Comments From the Following Line and DO
//COMMENT Line AFTER IT!.
//input.seekg(lexeme_begining);lexemer_error(faulty_character,line);break;
input.seekg(lexeme_begining);starting_state=100;break;
//default:
// cout<<"\nNo More States";exit(-1);
}
return starting_state;
}
void lexemer_error(char& faulty_character,int &line)
{
cout<<"\nSome Ir-Recoverable Error Occured in LEXICAL ANALYZER!---Possibly"
<<"\nAn Invalid Lexeme Caused it,Which is Not Part Of Our Language"
<<"The Starting SYMBOL Of Lexeme is "<<faulty_character<<" Line Number Is "<<line<<endl;
getch();
exit(-1);
}
void tokenizer(char* lexeme_buffer,int& starting_state)
{
tok.clas=" ";
tok.index=-10;
string lexeme;
lexeme.assign(lexeme_buffer);
unsigned int j=0,k=0;
long num;
switch(starting_state)
{
//Lexeme is a KEYWORD/IDENTIFIER
case 0:
//If LEXEME is a KEYWORD
for(k=0;k<keywords.size();k++)
{
if(lexeme==keywords[k])
{
if(lexeme=="int"||lexeme=="long")
{
tok.clas="dt";
tok.index=(lexeme=="int")?0:1;
}
else{
tok.clas=lexeme;
tok.index=-10;
}
tokens.push_back(tok);
return;
}
}
//If LEXEME is an IDENTIFIER
for(j=0;j<lex_identifier.size();j++)
//If identifier is already THERE!
if(lex_identifier[j]==lexeme)
{
tok.clas="id";
tok.index=j;
tokens.push_back(tok);
return;
}
//If Identifier is NOT THERE!
if(lex_identifier.size()==0||lex_identifier.size()==j)
{
//Enter into IDENTIFIER SYMBOL TABLE!
lex_identifier.push_back(lexeme);
//Enter into TOKEN's TABLE!
tok.clas="id";
tok.index=j;
tokens.push_back(tok);
return;
}
break;
//Lexeme is a RELATIONAL/ASSIGNMENT OPERATOR
case 3:
if(lexeme=="=") //An Assignment Operator
tok.clas="assignop";
else //A Relational Operator
tok.clas="relop";
if(lexeme=="==")
tok.index=8;
else if(lexeme=="<>")
tok.index=9;
else if(lexeme==">=")
tok.index=10;
else if(lexeme=="<=")
tok.index=11;
else if(lexeme==">")
tok.index=12;
else if(lexeme=="<")
tok.index=13;
else if(lexeme=="=")
tok.index=14;
//Enter into TOKEN's TABLE!
tokens.push_back(tok);
break;
//Lexeme is an ARITHMENTIC OPERATOR
case 14:
tok.clas=(lexeme=="+" ||lexeme=="-") ? "add_sub" : "mul_div_mod";
if(lexeme=="+")
tok.index=15;
else if(lexeme=="-")
tok.index=16;
else if(lexeme=="*")
tok.index=17;
else if(lexeme=="/")
tok.index=18;
else if(lexeme=="%")
tok.index=19;
//Enter into TOKEN's TABLE!
tokens.push_back(tok);
break;
//Lexeme is a PUNCTUATION CHARACTIER
case 16:
if(lexeme=="{")
{
//Enter into TOKEN's TABLE!
tok.clas="braces_open";
tok.index=2;
tokens.push_back(tok);
}
else if(lexeme=="}")
{
//Enter into TOKEN's TABLE!
tok.clas="braces_close";
tok.index=3;
tokens.push_back(tok);
}
else if(lexeme=="(")
{
//Enter into TOKEN's TABLE!
tok.clas="parenthesis_open";
tok.index=4;
tokens.push_back(tok);
}
else if(lexeme==")")
{
//Enter into TOKEN's TABLE!
tok.clas="parenthesis_close";
tok.index=5;
tokens.push_back(tok);
}
else if(lexeme==",")
{
//Enter into TOKEN's TABLE!
tok.clas="comma";
tok.index=6;
tokens.push_back(tok);
}
else if(lexeme==";")
{
//Enter into TOKEN's TABLE!
tok.clas="semicolon";
tok.index=7;
tokens.push_back(tok);
}
else if(lexeme=="[")
{
//Enter into TOKEN's TABLE!
tok.clas="square_open";
tok.index=20;
tokens.push_back(tok);
}
else if(lexeme=="]")
{
//Enter into TOKEN's TABLE!
tok.clas="square_close";
tok.index=21;
tokens.push_back(tok);
}
break;
//Lexeme is a NUMBER,It may be an INTEGER or a LONG
case 18:
//Checking Whehter the GIVEN NUM is a LONG?
num=lexeme.find('L');
if (num==-1) num=lexeme.find('l');
//IF GIVEN NUM is a LONG!
if(num!=-1)
{
//Convert String To LONG INTEGER!...
lexeme.erase(num,1); //Remove 'L' or 'l' from the END
num=0;
const char *temp=lexeme.c_str();
num=atol(temp);
for(unsigned int m=0;m<number_long.size();m++)
//Number is already there!
if(number_long[m]==num)
{
tok.clas="long_const";
tok.index=m;
tokens.push_back(tok);
return;
}
//Number is NOT there!
if(number_long.size()==0||number_long.size()==m)
{
//Enter into NUMBER's SYMBOL TABLE!
number_long.push_back(num);
//Enter into TOKEN's TABLE!
tok.clas="long_const";
tok.index=m;
tokens.push_back(tok);
return;
}
}
//IF GIVEN NUM is an INTEGER!
else
{
int num=0;
num=atoi(lexeme_buffer);
for(unsigned int m=0;m<number_int.size();m++)
//Number is already there!
if(number_int[m]==num)
{
tok.clas="int_const";
tok.index=m;
tokens.push_back(tok);
return;
}
//Number is NOT there!
if(number_int.size()==0||number_int.size()==m)
{
//Enter into NUMBER's SYMBOL TABLE!
number_int.push_back(num);
//Enter into TOKEN's TABLE!
tok.clas="int_const";
tok.index=m;
tokens.push_back(tok);
return;
}
}
break;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -