Python实现SLR(1)语法分析器
实验课前一天晚上肝了个SLR语法分析器,当时还发朋友圈语法分析器和我晚上总得走一个,从第二天状态来看,应该是我们俩一起走了(笑
- 编写的时间比较仓促,所以代码有些地方实现不是很好,存在一些问题,以后有时间的话再来修补一下,比如在对两个项目规范族进行比较时效率比较低,first集和follow集中对连续多个非终结符推到ε的情况下可能会有bug,但在我的文法定义中特意绕开了ε,勉强能用。
- 为了方便代码阅读,加入了许多注释后的打印语句,将这些打印语句取消注释后运行,能够告诉你当前这段代码所做的事。
- 在编写过程中,尽量对模块进行封装,把逻辑和文法定义分开,做到文法可以方便地修改,但时间过于仓促可能还是有点小问题。当文法修改时,修改
getCol函数(该函数将终结符和非终结符映射到action和goto表中相应的列),initProduction函数(该函数定义了文法产生式(拓广文法),在本文中有28个产生式),source(输入单词序列),varset(非终结符集合),terminalset(终结符集合)
SLR(1)分析流程
- 输入文法
- 求first集
- 求follow集
- 构造LR(0)项目集DFA
- 构造Action和Goto
- 按照Action和Goto进行分析
完整代码附在文章末尾
1.主要数据结构定义和基础函数:
基础函数
-
isVariable函数判断是不是非终结符
-
isTerminal函数判断是不是终结
-
transf(production_set, var)函数 production_set为一个LR(0)项目,尝试通过var(终结符或非终结符)进行转移
-
isSameStatus(status1, status2)函数:判断status1和status2是不是两个相同的LR(0)项目
-
isInPointset(production_set, pointset):#用来检验production_set是不是已经存在的point ,如果存在就把point返回(生成DFA时用到)
数据结构
- 产生式采用类来存储,left和right分别为list,number‘为产生式编号
- GraphPoint存储DFA转移,transfer为有向边集合,集合中的一个元素由var(终结符或非终结符),和另一个GraphPoint组成
class Production:
def __init__(self, left, right, number):
self.left = left
self.right = right
self.number = number
class GraphPoint:
def __init__(self, begin_production, id):
self.status = begin_production
self.transfer = []
self.id = id
def add_transfer(self, var, graphPoint):
self.transfer.append([var, graphPoint])
2.文法定义
1.分析目标代码:int lexicalanalysis(){ float a; int b; a=1.1; b=2; while(b<100){ b=b+1; a=a+3;}; if(a>5) {b=b-1;} else {b=b+1;}}
2.语法分析器输入为目标代码的词法分析器输出的单词序列
source = [[5, "int", " 关键字"], [1, "lexicalanalysis", " 标识符"], [13, "(", " 左括号"], [14, ")", " 右括号"], [20, "{", " 左大括号"],
[4, "float", " 关键字"], [1, "a", " 标识符"], [15, ";", " 分号"], [5, "int", " 关键字"], [1, "b", " 标识符"],
[15, ";", " 分号"], [1, "a", " 标识符"], [12, "=", " 赋值号"], [3, "1.1", " 浮点数"], [15, ";", " 分号"], [1, "b", " 标识符"],
[12, "=", " 赋值号"], [2, "2", " 整数"], [15, ";", " 分号"], [8, "while", " 关键字"], [13, "(", " 左括号"],
[1, "b", " 标识符"], [17, "<", " 小于号"], [2, "100", " 整数"], [14, ")", " 右括号"], [20, "{", " 左大括号"],
[1, "b", " 标识符"], [12, "=", " 赋值号"], [1, "b", " 标识符"], [9, "+", " 加 号"], [2, "1", " 整数"], [15, ";", " 分号"],
[1, "a", " 标识符"], [12, "=", " 赋值号"], [1, "a", " 标识符"], [9, "+", " 加号"], [2, "3", " 整数"], [15, ";", " 分号"],
[21, "}", " 右大括号"], [15, ";", " 分号"], [6, "if", " 关键字"], [13, "(", " 左括号"], [1, "a", " 标识符"],
[16, ">", " 大于号"], [2, "5", " 整数"], [14, ")", " 右括号"], [20, "{", " 左大括号"], [1, "b", " 标识符"],
[12, "=", " 赋值号"], [1, "b", " 标识符"], [10, "-", " 减号"], [2, "1", " 整数"], [15, ";", " 分号"], [21, "}", " 右大括号"],
[7, "else", " 关键字"], [20, "{", " 左大括号"], [1, "b", " 标识符"], [12, "=", " 赋值号"], [1, "b", " 标识符"],
[9, "+", " 加号"], [2, "1", " 整数"], [15, ";", " 分号"], [21, "}", " 右大括号"], [21, "}", " 右大括号"]]
3.文法定义:拓广文法共有28个产生式,0号产生式为保证分析器只有一个接受状态,而拓广的产生式。
def initProduction():
production_list = []
production = Production(["A1"], ["A"], 0)
production_list.append(production)
production = Production(["A"], ["E", "I", "(", ")", "{", "D", "}"], 1)
production_list.append(production)
production = Production(["E"], ["int"], 2)
production_list.append(production)
production = Production(["E"], ["float"], 3)
production_list.append(production)
production = Production(["D"], ["D", ";", "B"], 4)
production_list.append(production)
production = Production(["B"], ["F"], 5)
production_list.append(production)
production = Production(["B"], ["G"], 6)
production_list.append(production)
production = Production(["B"], ["M"], 7)
production_list.append(production)
production = Production(["F"], ["E", "I"], 8)
production_list.append(production)
production = Production(["G"], ["I", "=", "P"], 9)
production_list.append(production)
production = Production(["P"], ["K"], 10)
production_list.append(production)
production = Production(["P"], ["K", "+", "P"], 11)
production_list.append(production)
production = Production(["P"], ["K", "-", "P"], 12)
production_list.append(production)
production = Production(["I"], ["id"], 13)
production_list.append(production)
production = Production(["K"], ["I"], 14)
production_list.append(production)
production = Production(["K"], ["number"], 15)
production_list.append(production)
production = Production(["K"], ["floating"], 16)
production_list.append(production)
production = Production(["M"], ["while", "(", "T", ")", "{", "D", ";", "}"], 18)
production_list.append(production)
production = Production(["N"], ["if", "(", "T", ")", "{", "D",";", "}", "else", "{", "D", ";","}"], 19)
production_list.append(production)
production = Production(["T"], ["K", "L", "K"], 20)
production_list.append(production)
production = Production(["L"], [">"], 21)
production_list.append(production)
production = Production(["L"], ["<"], 22)
production_list.append(production)
production = Production(["L"], [">="], 23)
production_list.append(production)
production = Production(["L"], ["<="], 24)
production_list.append(production)
production = Production(["L"], ["=="], 25)
production_list.append(production)
production = Production(["D"], ["B"], 26)
production_list.append(production)
production = Production(["B"], ["N"], 27)
production_list.append(production)
return production_list
3.求First集

根据此算法即可求解first集,第8,9步可以采用递归的方式进行求解。
def getFirst(production_list, varset, terminalset):
first_dic = {}
# 用来标记first集是否计算完毕,防止重复计算浪费时间
done = {}
for var in varset:
first_dic[var] = set()
done[var] = 0
# 所有终结符的first集是他自身
for var in terminalset:
first_dic[var] = {var}
done[var] = 1
# print("初始化后的done",done)
# print("初始化的first_dic",first_dic)
for var in varset:
if done[var] == 0:
# print("计算",var)
getFirstForVar(var, first_dic, varset, terminalset, done)
# print("计算完毕",var)
# print("此时的done", done)
# print("此时的first_dic", first_dic)
else:
pass
return first_dic
def getFirstForVar(var, first_dic, varset, terminalset, done):
# 已经推导过直接结束
if done[var] == 1:
# print("我已经推导过了吼")
return
# 对非终结符求first集合,先看右边第一个元素为终结符
for production in production_list:
if var in production.left:
if isTerminal(production.right[0], terminalset):
first_dic[var].add(production.right[0])
# 用null表示空字符
if production.right[0] == "null":
# print("出现右侧为空")
first_dic[var].add("null")
# 右边第一个元素为非终结符
for production in production_list:
if var in production.left:
if isVariable(production.right[0], varset):
if var == production.right[0]:
continue
if done[production.right[0]] == 0:
getFirstForVar(production.right[0], first_dic, varset, terminalset, done)
if "null" in first_dic[production.right[0]]:
first_dic[production.right[0]].remove("null")
first_dic[var] = first_dic[var] | first_dic[production.right[0]]
# print("将 ",production.right[0],"的集合 ",first_dic[production.right[0]],"并入",var,"的集合中",first_dic[var],"中","得到",)
if isVariable(production.right[0], varset) and len(production.right) > 1:
index = 1
count = 1
while isVariable(production.right[index], varset):
index = index + 1
count = c

3087

被折叠的 条评论
为什么被折叠?



