强化学习Q-Learning解决FrozenLake例子(Python)
import gym import numpy as np import random import matplotlib.pyplot as plt # gym创建冰湖环境 env = gym.make(FrozenLake-v1) # 初始化Q表格,矩阵维度为【S,A】,即状态数*动作数 Q_all = np.zeros([env.observation_space.n,env.action_space.n]) # 设置参数, # 其中αalpha 为学习速率(learning rate),γgamma为折扣因子(discount factor) alpha = 0.8 gamma = 0.95 num_episodes = 2000 rList = [] for i in range(num_episodes): # 初始化环境,并开始观察 s = env.reset() rAll = 0 d = False j = 0 # 最大步数 while j < 99: j += 1 # 贪婪动作选择,含嗓声干扰 a = np.argmax(Q_all[s,:] + np.random.randn(1, env.action_space.n) * (1./(i+1))) # 从环境中得到新的状态和回报 s1, r, d, _ = env.step(a) # 更新Q表 Q_all[s,a] = Q_all[s, a] + alpha*(r + gamma*np.max(Q_all[s1,:]) - Q_all[s,a]) # 累加回报 rAll += r # 更新状态 s = s1 # Game Over if d == True: break rList.append(rAll) print("Score over time:"+ str(sum(rList)/num_episodes)) print(Q_all)
参考资料:
1.
2.
上一篇:
JS实现多线程数据分片下载