词法分析器
1、实验目的
设计、编制并且调试一个词法分析器。
2、实验要求
2.1、待分析的简单的词法
(1)关键字:
begin、if、then、while、do、end
(2)运算符和界符:
: = + - * / < <= <> > >= + ; ( ) #
2.2、各种单词符号对应的种别码
3、实现
3.1、编写实体类Word
package lexicalAnalysis;
public class Word {
//种别码
private int typeNum;
//扫描得到的词
private String word;
public int getTypeNum() {
return typeNum;
}
public void setTypeNum(int typeNum) {
this.typeNum = typeNum;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
}
3.2、分析方法的实现
import lexicalAnalysis.Word;
public class CodeAnalysis {
//定义关键字结束表示符号
private static String _KEY_WORD_END = "waiting for your expanding";
//字符数量
private int charNum = 0;
//关键字
private Word word;
//输入缓冲区
private char[] input = new char[255];
//单词缓冲区
private char[] token = new char[255];
//输入缓冲区的首地址
private int p_input = 0;
//单词缓冲区的首地址
private int p_token = 0;
//当前读入的字符
private char ch;
//可扩充的关键字数组
private String[] rwTab = {"begin","if","then","while","do","end","",_KEY_WORD_END};
//扫描单词
public CodeAnalysis(char[] input) {
this.input = input;
}
/**
* 缓冲区读取一个字符到ch中
* @return
*/
public char getCh() {
if (p_input < input.length) {
ch = input[p_input];
p_input++;
}
return ch;
}
/**
* 如果是标识符或者是空白符就读取下一个字符
*/
public void getBc() {
while ((ch == ' ' || ch == '\t') && p_input < input.length) {
ch = input[p_input];
p_input++;
}
}
/**
* 连接当前扫描的字符以及原来的字符
*/
public void conCat() {
token[p_token] = ch;
p_token++;
token[p_token] = '\0';
}
/**
* 判断是否是字母
* @return
*/
public boolean letter() {
if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
return true;
}
return false;
}
/**
* 判断是否是数字
* @return
*/
public boolean digit() {
if (ch >= '0' && ch <= '9') {
return true;
}
return false;
}
/**
* 回退字符
*/
public void retract() {
p_input--;
}
/**
* 将token中的数组转换成二进制数值表示
* @return
*/
public String dtb() {
//48表示的ascll值中的数据0,将首个数据变为0,之后依次遍历
int num = token[0] - 48;
for (int i = 1;i < p_token;i++) {
num = num * 10 + token[i] -48;
}
StringBuilder result = new StringBuilder();
while (num > 0) {
int r = num % 2;
int s = num / 2;
result.append(r);
num = s;
}
return result.reverse().toString();
}
/**
* 查看token中的字符串是否有关键字,是的话返回种别码,不是返回10
* @return
*/
public int reverse() {
int i = 0;
while (rwTab[i].compareTo(_KEY_WORD_END) != 0) {
if (rwTab[i].compareTo(new String(token).trim()) == 0) {
return i + 1;
}
i++;
}
return 10;
}
/**
* 扫描各种单词符号的种别码
* @return
*/
public Word scan() {
token = new char[255];
Word myWord = new Word();
myWord.setTypeNum(10);
myWord.setWord("");
p_token = 0;
//得到这个字符
getCh();
//继续往下扫描
getBc();
if (letter()) {
while (letter() || digit()) {
conCat();
getCh();
}
retract();
//返回种别码
myWord.setTypeNum(reverse());
//返回关键字
myWord.setWord(new String(token).trim());
return myWord;
} else if (digit()) {
while (digit()) {
conCat();
getCh();
}
retract();
myWord.setTypeNum(11);
myWord.setWord(new String(token).trim());
return myWord;
} else
switch (ch) {
case '=':
myWord.setTypeNum(25);
myWord.setWord("=");
return myWord;
case '+':
myWord.setTypeNum(13);
myWord.setWord("+");
return myWord;
case '-':
myWord.setTypeNum(14);
myWord.setWord("-");
return myWord;
case '*':
myWord.setTypeNum(15);
myWord.setWord("*");
return myWord;
case '/':
getCh();
//识别单行注释
if (ch == '/') {
while (getCh() != '\n');
myWord.setTypeNum(30);
myWord.setWord("\\n");
return myWord;
}
//识别多行注释
//识别单行注释
if (ch == '*') {
String string = "";
while (true) {
if (ch == '*') {
if (getCh() == '/') {
myWord.setTypeNum(31);
myWord.setWord(string);
return myWord;
}
retract();
}
if (getCh() == '\n') {
string += "\\n";
}
}
}
retract();
myWord.setTypeNum(16);
myWord.setWord("/");
return myWord;
case ':':
getCh();
if (ch == '=') {
myWord.setTypeNum(18);
myWord.setWord(":=");
return myWord;
}
retract();
myWord.setTypeNum(17);
myWord.setWord(":");
return myWord;
case '<':
getCh();
if (ch == '=') {
myWord.setTypeNum(22);
myWord.setWord("<=");
return myWord;
} else if (ch == '>') {
myWord.setTypeNum(21);
myWord.setWord("<>");
return myWord;
}
retract();
myWord.setTypeNum(20);
myWord.setWord("<");
return myWord;
case '>':
getCh();
if (ch == '=') {
myWord.setTypeNum(24);
myWord.setWord(">=");
return myWord;
}
retract();
myWord.setTypeNum(23);
myWord.setWord(">");
return myWord;
case ';':
myWord.setTypeNum(26);
myWord.setWord(";");
return myWord;
case '(':
myWord.setTypeNum(27);
myWord.setWord("(");
return myWord;
case ')':
myWord.setTypeNum(28);
myWord.setWord(")");
return myWord;
case '#':
myWord.setTypeNum(0);
myWord.setWord("#");
return myWord;
default:
conCat();
myWord.setTypeNum(-1);
myWord.setWord("ERROR INFO: WORD = \"" + new String(token).trim() + "\" ");
return myWord;
}
}
}
3.3、main方法实现词法分析
通过输入创建输入文件input.txt,扫描单词符号种类。
import lexicalAnalysis.Word;
import java.io.*;
import java.util.ArrayList;
import java.util.Scanner;
//词法分析
public class Analyzer {
//输入文件
private File inputFile;
//输入文件
private File outputFile;
//统计
private String fileContent;
//动态数组链表
private ArrayList<Word> list = new ArrayList<>();
//构造有参函数
public Analyzer(String input, String output) {
inputFile = new File(input);
outputFile = new File(output);
}
/**
* 从指定文件中读取源程序文件内容
* @return
*/
public String getContent() {
//字符串缓冲区
StringBuilder stringBuilder = new StringBuilder();
//对输入的数据进行异常判断
try(Scanner reader = new Scanner(inputFile)) {
//判断下一行字符串是否有输入。
while (reader.hasNextLine()) {
//读取输入的字符串,并且将它赋值。
String line = reader.nextLine();
//输出
stringBuilder.append(line + '\n');
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
//返回字符串
return fileContent = stringBuilder.toString();
}
/**
* 分析方法的实现
* @param fileContent
*/
public void analyzer(String fileContent) {
int over = 1;
Word word = new Word();
CodeAnalysis codeAnalysis = new CodeAnalysis(fileContent.toCharArray());
while (over != 0) {
word = codeAnalysis.scan();
list.add(word);
over = word.getTypeNum();
}
//保存数据
saveResult();
}
/**
* 保存扫描结果,并且将结果打印
*/
private void saveResult() {
if (outputFile.exists())
try {
outputFile.createNewFile();
} catch (IOException e1) {
e1.printStackTrace();
}
try(Writer writer = new FileWriter(outputFile)) {
for (Word word : list) {
writer.write("(" + word.getTypeNum() + " ," + word.getWord() + ")\n");
}
}catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
//将文件以文本格式输入以及输入
Analyzer analyzer = new Analyzer("input.txt","output.txt");
analyzer.analyzer(analyzer.getContent());
}
//get方法
public ArrayList<Word> getList() {
return list;
}
public void setList(ArrayList<Word> list) {
this.list = list;
}
}
3.4、结果
总结
点个赞再走嘛。