强化学习Q-Learning解决FrozenLake例子(Python)

import gym
import numpy as np
import random
import matplotlib.pyplot as plt

# gym创建冰湖环境
env = gym.make(FrozenLake-v1)
# 初始化Q表格,矩阵维度为【S,A】,即状态数*动作数
Q_all = np.zeros([env.observation_space.n,env.action_space.n])
# 设置参数,
# 其中αalpha 为学习速率(learning rate),γgamma为折扣因子(discount factor)
alpha = 0.8
gamma = 0.95
num_episodes = 2000
rList = []
for i in range(num_episodes):
    # 初始化环境,并开始观察
    s = env.reset()
    rAll = 0
    d = False
    j = 0
    # 最大步数
    while j < 99:
        j += 1
        # 贪婪动作选择,含嗓声干扰
        a = np.argmax(Q_all[s,:] + np.random.randn(1, env.action_space.n) * (1./(i+1)))
        # 从环境中得到新的状态和回报
        s1, r, d, _ = env.step(a)
        # 更新Q表
        Q_all[s,a] = Q_all[s, a] + alpha*(r + gamma*np.max(Q_all[s1,:]) - Q_all[s,a])
        # 累加回报
        rAll += r
        # 更新状态
        s = s1
        # Game Over
        if d == True:
            break
    rList.append(rAll)

print("Score over time:"+ str(sum(rList)/num_episodes))
print(Q_all)

参考资料:

1.

2.

经验分享 程序员 微信小程序 职场和发展