Source code for rlzoo.algorithms.dqn.default

from gym.spaces import Discrete

from rlzoo.common.utils import set_seed
from rlzoo.common.value_networks import *

""" 
full list of algorithm parameters (alg_params)
-----------------------------------------------
-----------------------------------------------

full list of learning parameters (learn_params)
-----------------------------------------------
double_q (bool): if True double DQN will be used
dueling (bool): if True dueling value estimation will be used
exploration_rate (float): fraction of entire training period over
    which the exploration rate is annealed
exploration_final_eps (float): final value of random action probability
batch_size (int): size of a batched sampled from replay buffer for training
train_freq (int): update the model every `train_freq` steps
learning_starts (int): how many steps of the model to collect transitions
                        for before learning starts
target_network_update_freq (int): update the target network every
                                    `target_network_update_freq` steps
buffer_size (int): size of the replay buffer
prioritized_replay (bool): if True prioritized replay buffer will be used.
prioritized_alpha (float): alpha parameter for prioritized replay
prioritized_beta0 (float): beta parameter for prioritized replay
mode (str): train or test
-----------------------------------------------
"""


[docs]def atari(env, default_seed=False, **kwargs):
    if default_seed:
        seed = 2
        set_seed(seed, env)  # reproducible

    assert isinstance(env.action_space, Discrete)

    alg_params = dict(
        dueling=True,
        double_q=True,
        buffer_size=1000,
        prioritized_replay=True,
        prioritized_alpha=0.6,
        prioritized_beta0=0.4,
    )
    alg_params.update(kwargs)
    if alg_params.get('net_list') is None:
        alg_params['net_list'] = [QNetwork(env.observation_space, env.action_space, [64],
                                           state_only=True, dueling=alg_params['dueling'])]

    if alg_params.get('optimizers_list') is None:
        alg_params['optimizers_list'] = tf.optimizers.Adam(1e-4, epsilon=1e-5, clipnorm=10),

    learn_params = dict(
        train_episodes=int(1e5),
        test_episodes=10,
        max_steps=200,
        save_interval=1e4,
        batch_size=32,
        exploration_rate=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
    )

    return alg_params, learn_params


[docs]def classic_control(env, default_seed=False, **kwargs):
    if default_seed:
        seed = 2
        set_seed(seed, env)  # reproducible

    assert isinstance(env.action_space, Discrete)

    alg_params = dict(
        dueling=True,
        double_q=True,
        buffer_size=1000,
        prioritized_replay=False,
        prioritized_alpha=0.6,
        prioritized_beta0=0.4,
    )
    alg_params.update(kwargs)
    if alg_params.get('net_list') is None:
        alg_params['net_list'] = [QNetwork(env.observation_space, env.action_space, [64], activation=tf.nn.tanh,
                                           state_only=True, dueling=alg_params['dueling'])]

    if alg_params.get('optimizers_list') is None:
        alg_params['optimizers_list'] = tf.optimizers.Adam(5e-3, epsilon=1e-5),

    learn_params = dict(
        train_episodes=int(1e3),
        test_episodes=10,
        max_steps=200,
        save_interval=1e3,
        batch_size=32,
        exploration_rate=0.2,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=200,
        target_network_update_freq=50,
        gamma=0.99,
    )

    return alg_params, learn_params


# class CNNQNet(tl.models.Model):
#     def __init__(self, in_dim, act_dim, dueling):
#         super().__init__()
#         self._state_shape = in_dim
#         self._action_shape = act_dim,
#         self.dueling = dueling
#         with tf.name_scope('DQN'):
#             with tf.name_scope('CNN'):
#                 self.cnn = basic_nets.CNNModel(in_dim)
#             mlp_in_shape = self.cnn.outputs[0].shape[0]
#             with tf.name_scope('QValue'):
#                 hidden_dim = 256
#                 self.preq = tl.layers.Dense(
#                     hidden_dim, tf.nn.relu,
#                     tf.initializers.Orthogonal(1.0),
#                     in_channels=mlp_in_shape
#                 )
#                 self.qout = tl.layers.Dense(
#                     act_dim, None,
#                     tf.initializers.Orthogonal(1.0),
#                     in_channels=hidden_dim
#                 )
#             if dueling:
#                 with tf.name_scope('Value'):
#                     hidden_dim = 256
#                     self.prev = tl.layers.Dense(
#                         hidden_dim, tf.nn.relu,
#                         tf.initializers.Orthogonal(1.0),
#                         in_channels=mlp_in_shape
#                     )
#                     self.vout = tl.layers.Dense(
#                         1, None,
#                         tf.initializers.Orthogonal(1.0),
#                         in_channels=hidden_dim
#                     )
#
#     def forward(self, obv):
#         obv = tf.cast(obv, tf.float32) / 255.0
#         mlp_in = tl.layers.flatten_reshape(self.cnn(obv))
#         q_out = self.qout(self.preq(mlp_in))
#         if self.dueling:
#             v_out = self.vout(self.prev(mlp_in))
#             q_out = v_out + q_out - tf.reduce_mean(q_out, 1, True)
#         return q_out
#
#     @property
#     def state_shape(self):
#         return copy.deepcopy(self._state_shape)
#
#     @property
#     def action_shape(self):
#         return copy.deepcopy(self._action_shape)
#
#
# class MLPQNet(tl.models.Model):
#     def __init__(self, in_dim, act_dim, dueling):
#         super().__init__()
#         self._state_shape = in_dim,
#         self._action_shape = act_dim,
#         self.dueling = dueling
#         hidden_dim = 64
#         with tf.name_scope('DQN'):
#             with tf.name_scope('MLP'):
#                 self.mlp = tl.layers.Dense(
#                     hidden_dim, tf.nn.tanh,
#                     tf.initializers.Orthogonal(1.0),
#                     in_channels=in_dim
#                 )
#             with tf.name_scope('QValue'):
#                 self.qmlp = tl.layers.Dense(
#                     act_dim, None,
#                     tf.initializers.Orthogonal(1.0),
#                     in_channels=hidden_dim
#                 )
#             if dueling:
#                 with tf.name_scope('Value'):
#                     self.vmlp = tl.layers.Dense(
#                         1, None,
#                         tf.initializers.Orthogonal(1.0),
#                         in_channels=hidden_dim
#                     )
#
#     def forward(self, obv):
#         obv = tf.cast(obv, tf.float32)
#         latent = self.mlp(obv)
#         q_out = self.qmlp(latent)
#         if self.dueling:
#             v_out = self.vmlp(latent)
#             q_out = v_out + q_out - tf.reduce_mean(q_out, 1, True)
#         return q_out
#
#     @property
#     def state_shape(self):
#         return copy.deepcopy(self._state_shape)
#
#     @property
#     def action_shape(self):
#         return copy.deepcopy(self._action_shape)