Source code for qualia2.rl.agents.dqn

# -*- coding: utf-8 -*- 
from ..rl_core import ValueAgent
from ..rl_util import Trainer

[docs]class DQN(ValueAgent):
    '''DQN 2013 implementation\n
    This implementation uses single network for learning. 
    DQN class incopolates the model (Module) and the optim (Optimizer).
    The model learns with experience replay, which is implemented in update() method.
    '''
    def __init__(self, eps, actions):
        super().__init__(eps, actions)

[docs]class DQNTrainer(Trainer):
    ''' DQNTrainer \n
    Args:
        memory (deque): replay memory object
        capacity (int): capacity of the memory
        batch (int): batch size for training
        gamma (int): gamma value
    '''
    def __init__(self, memory, batch, capacity, gamma=0.99):
        super().__init__(memory, batch, capacity, gamma)    

[docs]    def train(self, env, agent, episodes=200, render=False, filename=None):
        self.before_train(env, agent)
        self.train_routine(env, agent, episodes=episodes, render=render, filename=filename)
        self.after_train()
        return agent