与电脑的极致对决——决胜五子棋python篇
“纵横十九道内的,是无穷宇宙”🏠
相信很多小伙伴们都曾下过围棋,亦或者是五子棋,这黑白棋子间的千变万化也是有着无穷的乐趣,周末的笔者闲来无事,研究了些许关于五子棋下法的AI检索,给大伙送上。
令笔者万万没想到的是,竟然开始输给人机
这不能忍,直接减小运算参数,怎么说也得赢一次 🐱
最终,经过笔者的不懈努力(修改参数),笔者终于取得了胜利🐕,小伙伴们也赶快体验一下吧。
🚀附上源码:
import numpy as npimport random as rdimport copyfrom collections import defaultdictVECTOR = 7 #棋盘的维度,五子棋一般是15*15,为了减少运算先使用7*7SIMU_EPSDS = 5000 #自我对弈模拟的次数,越大代表自我对弈的次数越多,速度越慢class Node(): def __init__(self, state, parent=None, parent_action=None): self.state = state self.parent = parent self.parent_action = parent_action self.children = [] self._untried_actions = None self._untried_actions = self.untried_actions() self._number_of_visits = 0 self._results = defaultdict(int) self._results[1] = 0 self._results[-1] = 0 #q值代表当前节点胜利次数减去失败次数的差值 def q(self): wins = self._results[1] loses = self._results[-1] return wins - loses #n值代表当前节点的访问次数 def n(self): return self._number_of_visits #扩展子节点,即访问该节点的每个子节点 def expand(self): action = self._untried_actions.pop() next_state = self.state.play(action,0) child_node = Node(next_state, parent=self, parent_action=action) self.children.append(child_node) return child_node def untried_actions(self): untried_actions = copy.deepcopy(self.state.get_legal_actions()) return untried_actions #生成树的逻辑是先判断该节点是否是最终状态,如果不是再判断是否完全扩展,如果不是则继续扩展该节点的子节点,否则, #从子结点中随机选择一个作为下一个要扩展节点 def _tree_policy(self): current_node=self while not current_node.is_terminal_node(): if not current_node.is_fully_expanded(): return current_node.expand() else: current_node=current_node.rd_child() return current_node def is_terminal_node(self): return self.state.is_game_over() def is_fully_expanded(self): return len(self._untried_actions) == 0 #从所有的子节点(下一状态)中选择一个最优节点 def best_child(self, c_param=0.1): current_state = self.state #UCT算法 choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2*np.log(self.n()) / c.n())) for c in self.children] order = current_state.state[0] if order==1: #如果当前玩家是先手,则选取权重最大的子节点作为最优action return self.children[np.argmax(choices_weights)] else: #如果当前玩家是后手,则选取权重最小的子节点(先手胜率最低的状态)作为最优action return self.children[np.argmin(choices_weights)] def rd_child(self): current_state = self.state return rd.choice(self.children) #self.children[np.argmax(choices_weights)] #自我对弈模拟,从子结点中随机选取action def rollout(self): current_rollout_state = self.state while not current_rollout_state.is_game_over(): possible_moves = current_rollout_state.get_legal_actions() action = self.rollout_policy(possible_moves) current_rollout_state = current_rollout_state.play(action,0) return current_rollout_state.game_result() def rollout_policy(self, possible_moves): return possible_moves[np.random.randint(len(possible_moves))] #rd.choice(possible_moves) #向上回溯,将该节点的胜负信息传到父节点 def backpropagate(self, result): self._number_of_visits += 1. self._results[result] += 1. if self.parent: self.parent.backpropagate(result) #每个节点计算best action都要自我对弈(次数是SIMU_EPSDS)并记录结果,从中选出最优的子节点 def best_action(self): for i in range(SIMU_EPSDS): v=self._tree_policy() reward=v.rollout() v.backpropagate(reward) return self.best_child()class State(): def __init__(self,ini_state): #初始化时导入棋盘的初始状态 self.state=ini_state def play(self,action,is_expl):#player:1 - attack, -1 - defensive order = self.state[0] board = copy.deepcopy(self.state[1]) x=action[0] y=action[1] board[x][y]=order new_state=[-order,board] return State(new_state) def is_game_over(self): return self.game_result()!=999 #判断当前游戏结果,返回结果是1 - 先手赢,-1 - 后手赢,0 - 未分胜负,没有空余点,游戏和棋结束,999 - 未分胜负,继续游戏 def game_result(self): #遍历判断纵向是否五连,前两行和倒数两行不用判断 for i in range(2,VECTOR-2): for j in range(VECTOR): if self.state[1][i-2][j]==self.state[1][i-1][j]==self.state[1][i][j]==self.state[1][i+1][j]==self.state[1][i+2][j]!=0: return self.state[1][i][j] #遍历判断横向是否五连,前两列和倒数两列不用判断 for i in range(VECTOR): for j in range(2,VECTOR-2): if self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0: return self.state[1][i][j] #遍历斜向是否五连,前两行、前两列、倒数两行、倒数两列不用判断 for i in range(2,VECTOR-2): for j in range(2,VECTOR-2): if (self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0) \ | \ (self.state[1][i][j-2]==self.state[1][i][j-1]==self.state[1][i][j]==self.state[1][i][j+1]==self.state[1][i][j+1]!=0): return self.state[1][i][j] #若未分胜负,遍历是否还有空余落子点,若没有,结果是和棋结束游戏,返回0 blank_count=0 for i in range(VECTOR): for j in range(VECTOR): if (self.state[1][i][j]==0): blank_count+=1 if blank_count==0: return 0 else: #未分胜负胜负,游戏继续 return 999 #获得当前棋盘中所有可落子的点 def get_legal_actions(self): legal_actions=[] for i in range(VECTOR): for j in range(VECTOR): if self.state[1][i][j] == 0: legal_actions.append([i,j]) return legal_actionsif __name__ == '__main__': #初始化状态 state=np.zeros([VECTOR,VECTOR]) order=1 state=State([order,state]) first='player' #'player' - 玩家先手 'ai' - 玩家后手 while True: if first=='player': for i in state.state[1]: print(i) player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')] state=state.play(player_choice,0) print('player:',player_choice) state=State([state.state[0],state.state[1]]) for i in state.state[1]: print(i) tree=Node(state) ai_choice=tree.best_action().parent_action state=state.play(ai_choice,0) state=State([state.state[0],state.state[1]]) print('ai:',ai_choice) else: state=State([state.state[0],state.state[1]]) tree=Node(state) ai_choice=tree.best_action().state.state[1][-1] state=state.play(ai_choice,0) print('ai:',ai_choice) for i in state.state[1]: print(i) player_choice=[int(i) for i in input('input the loacation (x,y):').split(',')] state=state.play(player_choice,0) print('player:',player_choice) for i in state.state[1]: print(i)
想偷懒的小伙伴也不要慌,这里附上源码链接:
智能对战五子棋AI(python)-Python文档类资源-CSDN下载