takemikami
7/18/2017 - 3:40 AM

hyperas with keras-rl sample program

hyperas with keras-rl sample program

# sample program - hyperas with keras-rl
# see. hyperas https://github.com/maxpumperla/hyperas
# see. keras-rl https://github.com/matthiasplappert/keras-rl

from __future__ import print_function

from hyperopt import Trials, STATUS_OK, tpe
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils

from hyperas import optim
from hyperas.distributions import choice, uniform, conditional

import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

# dummy data function
def data():
    return

# keras-rl model
def model():
    # static parameter
    step = 500

    # open ai gym env
    ENV_NAME = 'CartPole-v0'
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n

    # define model
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense({{choice([8, 16, 32])}}))
    model.add(Activation('relu'))
    model.add(Dense({{choice([8, 16, 32])}}))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # define dqn agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = EpsGreedyQPolicy(eps={{uniform(0, 1)}})
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # fitting
    rtn = dqn.fit(env, nb_steps=step, visualize=True, verbose=2)

    # test & calcurate totral_reward
    rtn2 = dqn.test(env, nb_episodes=5, visualize=True)
    total_reward = np.sum(rtn2.history['episode_reward'])

    return {'loss': -total_reward, 'status': STATUS_OK, 'model': model}


if __name__ == '__main__':
    best_run, best_model = optim.minimize(model=model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=5,
                                          trials=Trials())
    print("Best performing model chosen hyper-parameters:")
    print(best_run)