编译原理实验--词法分析器
实验一:词法分析器的设计与实现
实验目的:
通过设计编制调试一个具体的词法分析程序,加深对词法分析原理的理解。并掌握在对程序设计语言源程序进行扫描过程中将其分解为各类单词的词法分析方法。
编制一个读单词过程,从输入的源程序中,识别出各个具有独立意义的单词, 即关键字、标识符、常数、算符、界符五大类。并依次输出各个单词的内部编码及单词符号自身价值。(遇到错误时可显示“Error”,然后跳过错误部分继续显示)
实验内容:
给定一个C语言的子集 ,如下:
关键字(小写)
main if else int return void while
算符或界符:
= + - * / < >= = = != ; : , { } [ ] ( )
标识符是字母开头,后面字母和数字组合
数值指无符号常数
空格一般用来分隔标识符、数值、专用符号和关键字
实验要求:
- 请给出自己单词符号的种别码表
单词符号 |
特种码 |
单词符号 |
特种码 |
main |
1 |
<= |
15 |
if |
2 |
> |
16 |
int |
3 |
>= |
17 |
return |
4 |
= |
18 |
void |
5 |
== |
19 |
while |
6 |
!= |
20 |
letter(letter|digit)* |
7 |
; |
21 |
digit digit* |
8 |
{ |
22 |
+ |
9 |
} |
23 |
- |
10 |
( |
24 |
* |
11 |
) |
25 |
/ |
12 |
[ |
26 |
: |
13 |
] |
27 |
< |
14 |
# |
0 |
- 给出标识符ID和数值NUM的正规式定义
ID:letter(letter|digit)*
NUM:digit digit*
- 给出该语言的子集对应的状态转换图
- (我不放了总得自己干点什么吧嘻嘻)
- 词法分析器的功能
输入:所给文法的源程序
输出:二元组(token 或sum ,syn)构成的序列。其中
syn 为单词种别码
token 为存放单词自身字符串
sum 为整型常数
- 给出总体及各主要程序段的算法流程
把源程序放入一个数组中,用token数组取词;
进行匹配查找,和错误处理,最后输出。
给出完整可执行程序的程序代码(有关键变量及关键语句的注释)
/*@author:yin*/#include#include#include#include#include using namespace std;int sum, syn, p, m, n;char ch, token[8], s[100];const char* tab[6] = {"main","if","int","return","void","while"};int scanner() {for (n = 0; n < 8; n++) token[n] = '\0';m = 0;n = 0;ch = s[p++];while (ch == ' ') ch = s[p++];if (isalpha(ch)) {while (isalpha(ch) || isdigit(ch)) {//isalpha(ch)函数:判断字符ch是否为英文字母,小写字母为2,大写字母为1,若不是字母0//isdigit(ch)函数:判断字符ch是否为数字,是返回1,不是返回0token[m++] = ch;ch = s[p++];}syn = 7;for (n = 0; n < 6; n++)if (strcmp(token, tab[n]) == 0) syn = n + 1;p--;}else if (isdigit(ch)) {sum = 0;while (isdigit(ch)) {sum = sum * 10 + (ch - '0');ch = s[p++];}syn = 8;p--;}else if (ch == '=') {syn = 18;token[m++] = ch;ch = s[p++];if (ch == '=') { syn = 19; token[m] = ch; p++; }p--;}else if (ch == '') {syn = 16;token[m++] = ch;ch = s[p++];if (ch == '=') { syn = 17; token[m] = ch; p++; }p--;}else if (ch == '!') {if (s[p] == '=') {syn = 20;token[m++] = ch;token[m] = s[p];}p++;}else switch (ch) {case '+':syn = 9; token[m] = ch; break;case '-':syn = 10; token[m] = ch; break;case '*':syn = 11; token[m] = ch; break;case '/':syn = 12; token[m] = ch; break;case ':':syn = 13; token[m] = ch; break;case ';':syn = 21; token[m] = ch; break;case '{':syn = 22; token[m] = ch; break;case '}':syn = 23; token[m] = ch; break;case '(':syn = 24; token[m] = ch; break;case ')':syn = 25; token[m] = ch; break;case '#':syn = 0; token[m] = ch; break;case '[':syn = 26; token[m] = ch; break;case ']':syn = 27; token[m] = ch; break;default:syn = -1;}return 0;}int main() {p = 0;cout << "Please input code and end with character '#':" << endl;do {ch = getchar();if(ch != '\n') //除去回车s[p++] = ch;} while (ch != '#');p = 0;do {scanner();switch (syn) {case 8:cout << '|' << setw(2)<< syn << ',' << setw(5) <<sum << "|" << endl; break;case -1:cout << '|' << syn << ',' << "error" <" << endl; break;default:cout << '|' << setw(2)<< syn << ',' << setw(5) <<token << "|" << endl;}} while (syn != 0);//getch():是一个不回显函数,当用户按下某个字符时,函数自动读取,无需按回车,所在头文件是conio.h。return 0;}