0、单词以字母开头(保留字、标识符),通过查保留字表可以确定是哪种
1、单词以数字开头进行判断是小数、正整数、科学计数
2、遇到运算符、界符直接存到相关表
3、遇到>得判断后面的符号是不是和>是一个整体(>=),如果是将整体存到界符运算符表中,如果不是就只将>存到表中。(<、&、|类似)
4、上图中其它符号会作为一个新单词的开始继续扫描
5、遇到空格会直接跳过寻找下一个单词。
import java.io.*;
import java.util.LinkedHashMap;
import java.util.Map;
public class Word1 {
public static String [] retainWords = new String[]{//c语言的32个保留字
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
};
public static String [] operatorWords = new String []{//界符
"+", "-", "*", "/", "<", "<=", ">", ">=", "=", "==",
"!=", ";", "(", ")", "^", ",", "\"", "\‘", "#", "&",
"&&", "|", "||", "%", "~", "<<", ">>", "[", "]", "{",
"}", "\\", ".", ":", "!"};
public String headFile="";
public String define="";
public Map<String,Integer> remainWordMap = new LinkedHashMap<String, Integer>();//保留字
public Map<String,Integer> identifierMap = new LinkedHashMap<String, Integer>();//标识符
public Map<String,String> numMap = new LinkedHashMap<String, String>();//整数
public Map<String,Integer> operatorMap = new LinkedHashMap<String, Integer>();//界符
public String strToken = "";//存放构成单词符号的字符串
//判断是否是字母
public boolean isLetter(char ch){
return Character.isLetter(ch);
}
//判断是否是数字
public boolean isDigit(char ch){
return Character.isDigit(ch);
}
//判断是否是空格
public boolean isBC(char ch){
return " ".equals(ch+"");
}
//连接字符
public void concat(char ch){
strToken += ch+"";
}
//清清除strToken
public void clearStrToken(){
strToken = "";
}
//判断是否是保留字(返回保留字对应的code)
public int reserve(){
for(int i = 0;i < retainWords.length;i++){
if(strToken.equals(retainWords[i])){
return i+1;//是保留字
}
}
return -1;//标识符
}
public String getHeadFile() {
return headFile;
}
public String getDefine() {
return define;
}
//返回界符对应的code
public int isOperator(){
for (int i = 0; i <operatorWords.length ; i++) {
if(strToken.equals(operatorWords[i])){
return 33+i;
}
}return -1;
}
public String filter() throws IOException {//过滤器,过滤注释,去掉无用换行符
BufferedReader br;
String all = "";
int ch;
br = new BufferedReader(new FileReader("d:\\test.txt"));
while((ch = br.read()) != -1){
all += (char)ch + "";
}
//头文件和简单宏定义的处理
String []File = all.split("\n");
int k = 0;
while(File[k].contains("include")){
headFile += File[k].replaceAll("#( |\t|‘‘)*include( |\t|‘‘)*","")+"\n";
k++;
}
while(File[k].contains("define")){
define += File[k].replaceAll("#( |\t|‘‘)*define( |\t|‘‘)*","")+"\n";
k++;
}
int len = 0;
for (int i = 0; i < k; i++) {
len += File[i].length();
}
all = new StringBuffer(all).replace(0,len+k,"").toString();//删去宏定义和头文件部分
for (int i = 0; i <all.length()-1 ; i++) {
if(all.charAt(i)==all.charAt(i+1)&&all.charAt(i)==‘/‘){
for(int j=i+2;j<all.length();j++){
if(all.charAt(j)==‘\n‘){
all=all.replaceAll(all.substring(i,j),"");
break;
}
}
}
else if(all.charAt(i)==‘/‘&&all.charAt(i+1)==‘*‘){
for (int j = i+2; j <all.length()-1 ; j++) {
if(all.charAt(j)==‘*‘&&all.charAt(j+1)==‘/‘){
all = new StringBuffer(all).replace(i,j+1+1,"").toString();
break;
}
}
}
}
all = all.replaceAll("\r|\n|\t", "");//去除无用字符
all = all.trim();//去除前后无用空格
return all;//返回处理过的源程序
}
//扫描程序
//i扫描完一个单词后就会立刻单词结尾的下一个字符
public void scanner(String all) throws IOException {
int i=0;
while(i<all.length()) {
if(isLetter(all.charAt(i))){//开头是字母的情况
concat(all.charAt(i));
i++;
while(isLetter(all.charAt(i))||isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
if(reserve()!=-1){//保留字
remainWordMap.put(strToken,reserve());
}else{//标识符
identifierMap.put(strToken,99);
}
clearStrToken();
}
if(isDigit(all.charAt(i))){//开头是数字的情况
concat(all.charAt(i));
i++;
while(isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
if(all.charAt(i)==‘.‘){//浮点数判断
concat(‘.‘);
i++;
while(isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
}
if(all.charAt(i)==‘e‘||all.charAt(i)==‘E‘){//科学计数的判断
concat(all.charAt(i));
if(all.charAt(i+1)==‘+‘||all.charAt(i+1)==‘-‘){//类似于8e+5,8.4e-5
concat(all.charAt(i+1));
i += 2;
}else{//类似于8e9,8.12e9
i++;
}
while(isDigit(all.charAt(i))){
concat(all.charAt(i));
i++;
}
}
numMap.put(strToken,"100");
clearStrToken();
}
if (all.charAt(i) == ‘+‘ || all.charAt(i) == ‘-‘ || all.charAt(i) == ‘*‘ || all.charAt(i) == ‘/‘
|| all.charAt(i) == ‘;‘ || all.charAt(i) == ‘(‘ || all.charAt(i) == ‘)‘ || all.charAt(i) == ‘^‘
|| all.charAt(i) == ‘,‘ || all.charAt(i) == ‘~‘ || all.charAt(i) == ‘#‘ || all.charAt(i) == ‘%‘
|| all.charAt(i) == ‘[‘ || all.charAt(i) == ‘]‘ || all.charAt(i) == ‘{‘ || all.charAt(i) == ‘}‘){
concat(all.charAt(i));
operatorMap.put(strToken,isOperator());
clearStrToken();
if(i==all.length()-1){//因为结束是},在这里必须判断,否则数组越界(i++)
break;
}
i++;
}
if(all.charAt(i)==‘=‘){//=和==
if(all.charAt(i+1)==‘=‘){
strToken += "==";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat(‘=‘);
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)==‘<‘){//<和<=和<<
if(all.charAt(i+1)==‘=‘){
strToken += "<=";
operatorMap.put(strToken,isOperator());
i += 2;
}else if(all.charAt(i+1)==‘<‘){
strToken += "<<";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat(‘<‘);
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)==‘>‘){//> 和>=和 >>
if(all.charAt(i+1)==‘=‘){
strToken += ">=";
operatorMap.put(strToken,isOperator());
i += 2;
}else if(all.charAt(i+1)==‘>‘){
strToken += ">>";
operatorMap.put(strToken,isOperator());
i += 2;
} else{
concat(‘>‘);
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)==‘!‘){//!和!=
if(all.charAt(i+1)==‘=‘){
strToken += "!=";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat(‘!‘);
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(all.charAt(i)==‘&‘){//&和&&
if(all.charAt(i+1)==‘&‘){
strToken += "&&";
operatorMap.put(strToken,isOperator());
i += 2;
}else{
concat(‘&‘);
operatorMap.put(strToken,isOperator());
i++;
}
clearStrToken();
}
if(isBC(all.charAt(i))){//判断是否是空格
i++;
continue;
}
}
}
public static void main(String[] args) throws IOException {
Word1 w = new Word1();
System.out.println(w.filter());
System.out.println("包含头文件如下:");
System.out.println(w.getHeadFile());
System.out.println("包含宏定义如下:");
System.out.println(w.getDefine());
System.out.println("过滤后字符串长度:"+w.filter().length());
w.scanner(w.filter());
System.out.println("保留字如下:");
BufferedWriter out = new BufferedWriter(new FileWriter("d:\\remainWord.txt"));
//保留字
for(Map.Entry<String, Integer> mEntry:w.remainWordMap.entrySet()){
String key=mEntry.getKey();
Integer value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
out = new BufferedWriter(new FileWriter("d:\\operatorWord.txt"));
//界符
System.out.println("界符和运算符如下:");
for(Map.Entry<String, Integer> mEntry:w.operatorMap.entrySet()){
String key=mEntry.getKey();
Integer value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
out = new BufferedWriter(new FileWriter("d:\\numWord.txt"));
System.out.println("数字如下:");
//数字
for(Map.Entry<String, String> mEntry:w.numMap.entrySet()){
String key=mEntry.getKey();
String value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
out = new BufferedWriter(new FileWriter("d:\\identifierWord.txt"));
System.out.println("标识符如下:");
//标识符
for(Map.Entry<String, Integer> mEntry:w.identifierMap.entrySet()){
String key=mEntry.getKey();
Integer value =mEntry.getValue();
out.write("("+key+","+value+")");
out.newLine();
System.out.println("("+key+","+value+")");
}out.close();
}
}
原文:https://www.cnblogs.com/cstdio1/p/12637860.html