diff --git a/examples/marl/openai/train.py b/examples/marl/openai/train.py index 66e8c9f..4fe7332 100644 --- a/examples/marl/openai/train.py +++ b/examples/marl/openai/train.py @@ -28,7 +28,7 @@ from gridworld.log import logger from gridworld.multiagent_env import MultiAgentEnv -from gridworld.multiagent_env_openai_style import MultiagentEnvOpenAIStyle +from gridworld.multiagent_list_interface_env import MultiAgentListInterfaceEnv from gridworld.scenarios.buildings import make_env_config logger.setLevel(logging.ERROR) @@ -180,7 +180,7 @@ def make_env(arglist): num_buildings=3 ) - env = MultiagentEnvOpenAIStyle( + env = MultiAgentListInterfaceEnv( CoordinatedMultiBuildingControlEnv, env_config ) diff --git a/gridworld/__init__.py b/gridworld/__init__.py index 8cb8374..c2350d3 100644 --- a/gridworld/__init__.py +++ b/gridworld/__init__.py @@ -1,4 +1,4 @@ __version__ = "0.0.1" from .base import ComponentEnv, MultiComponentEnv from .multiagent_env import MultiAgentEnv -from .multiagent_env_openai_style import MultiagentEnvOpenAIStyle +from .multiagent_list_interface_env import MultiAgentListInterfaceEnv diff --git a/gridworld/multiagent_env_citylearn_style.py b/gridworld/multiagent_env_citylearn_style.py deleted file mode 100644 index e2a6109..0000000 --- a/gridworld/multiagent_env_citylearn_style.py +++ /dev/null @@ -1,181 +0,0 @@ - -from collections import OrderedDict - -import gym -import numpy as np - -from gridworld.multiagent_env import MultiAgentEnv - -# TODO: Can this share a common base class with the OpenAI version? - -class MultiagentEnvCityLearnStyle(gym.Env): - """ A wrapper class to convert RLLib multi-agent gym env to the CityLearn style. - - """ - - def __init__(self, env_config): - - self.ma_env = MultiAgentEnv(**env_config) - - # nested_sequence is used as reference to keep the sequence correct when - # putting data into CityLearn arrays. - self.nested_sequence = self.get_nested_sequence(env_config['agents']) - - # Definitions below for observation_space(s) and action_space(s) follow - # the CityLearn example. - self.observation_spaces = [] - self.action_spaces = [] - total_agents_obs_len = 0 - total_agents_act_len = 0 - - for k, v in self.nested_sequence.items(): - - agent_obs_len = sum( - [self.ma_env.observation_space[k][component].shape[0] for component in v]) - agent_act_len = sum( - [self.ma_env.action_space[k][component].shape[0] for component in v]) - - agent_obs_space = gym.spaces.Box( - shape=(agent_obs_len,), - low=-1.0, - high=1.0, - dtype=np.float64 - ) - - agent_action_space = gym.spaces.Box( - shape=(agent_act_len,), - low=-1.0, - high=1.0, - dtype=np.float64 - ) - - self.observation_spaces.append(agent_obs_space) - self.action_spaces.append(agent_action_space) - - total_agents_act_len += agent_act_len - total_agents_obs_len += agent_obs_len - - self.observation_space = gym.spaces.Box( - shape=(total_agents_obs_len,), - low=-1.0, - high=1.0, - dtype=np.float64 - ) - - self.action_space = gym.spaces.Box( - shape=(total_agents_act_len,), - low=-1.0, - high=1.0, - dtype=np.float64 - ) - - - @staticmethod - def get_nested_sequence(config): - results = OrderedDict() - for item in config: - results[item['name']] = [x['name'] for x in item['config']['components']] - return results - - - def reset(self): - - obs = self.ma_env.reset() - obs_citylearn_style = self.convert_to_citylearn_obs(obs) - - return obs_citylearn_style - - - def step(self, action): - - action = self.convert_from_citylearn_act(action) - - next_obs, reward, done, info = self.ma_env.step(action) - - next_obs_citylearn_style = self.convert_to_citylearn_obs(next_obs) - reward_citylearn_style = [reward[k] for k in self.nested_sequence.keys()] - done = done['__all__'] - - return next_obs_citylearn_style, reward_citylearn_style, done, info - - - def convert_to_citylearn_obs(self, obs): - """Convert the RLLib dictionary based observation to CityLearn array - based observation. - """ - - obs_cl = [] - for k, v in self.nested_sequence.items(): - obs_cl.append(np.concatenate([obs[k][x] for x in v])) - - return np.array(obs_cl) - - - def convert_from_citylearn_act(self, action): - """Convert the CityLearn array based action into RLLib dictionary based action. - """ - - converted_action = {} - idx = 0 - - for k, v in self.nested_sequence.items(): - agent_action = OrderedDict() - act_start_idx = 0 - for component in v: - act_len = self.ma_env.action_space[k][component].shape[0] - agent_action[component] = action[idx][act_start_idx: act_start_idx + act_len] - act_start_idx += act_len - - converted_action[k] = agent_action - idx += 1 - - return converted_action - - -if __name__ == '__main__': - - from gridworld.scenarios.ieee_13_bus_buildings import make_env_config - - env_config = make_env_config( - building_config={ - "reward_structure": {"target": "min_voltage", "alpha": 0.5} - }, - pv_config={ - "profile_csv": "off-peak.csv", - "scaling_factor": 40. - }, - storage_config={ - "max_power": 20., - "storage_range": (3., 250.) - }, - system_load_rescale_factor=0.6, - num_buildings=3 - ) - - env = MultiagentEnvCityLearnStyle(env_config) - - print("******** Test 1 **********") - obs_rllib = env.ma_env.reset() - obs_citylearn = env.convert_to_citylearn_obs(obs_rllib) - - print(obs_rllib) - print(obs_citylearn) - - print() - print("******** Test 2 **********") - acts_citylearn = [x.sample() for x in env.action_spaces] - acts_rllib = env.convert_from_citylearn_act(acts_citylearn) - - print(acts_rllib) - print(acts_citylearn) - - print() - print("******** Test 3 **********") - - print(env.reset()) - done = False - - while not done: - acts = [x.sample() for x in env.action_spaces] - new_state, reward, done, info = env.step(acts) - diff --git a/gridworld/multiagent_env_openai_style.py b/gridworld/multiagent_env_openai_style.py deleted file mode 100644 index 1304238..0000000 --- a/gridworld/multiagent_env_openai_style.py +++ /dev/null @@ -1,172 +0,0 @@ - -from collections import OrderedDict - -import numpy as np - -import gym - - -# TODO: Can this share a common base class with the CityLearn version? - - -class MultiagentEnvOpenAIStyle(gym.Env): - """ A wrapper class to convert RLLib multi-agent gym env to the OpenAI - MADDPG style. - """ - - def __init__(self, multi_agent_env_cls, env_config): - - self.ma_env = multi_agent_env_cls(**env_config) - self.n = len(self.ma_env.agents) - - # nested_sequence is used as reference to keep the sequence correct - # when putting data into CityLearn arrays. - self.nested_sequence = self.get_nested_sequence(env_config['agents']) - - # Definitions below for observation_space(s) and action_space(s) follow - # the OpenAI example. - self.observation_space = [] - self.action_space = [] - total_agents_obs_len = 0 - total_agents_act_len = 0 - - for k, v in self.nested_sequence.items(): - - agent_obs_len = sum( - [self.ma_env.observation_space[k][component].shape[0] - for component in v]) - agent_act_len = sum( - [self.ma_env.action_space[k][component].shape[0] - for component in v]) - - agent_obs_space = gym.spaces.Box(shape=(agent_obs_len,), - low=-1.0, - high=1.0, - dtype=np.float64) - agent_action_space = gym.spaces.Box(shape=(agent_act_len,), - low=-1.0, - high=1.0, - dtype=np.float64) - self.observation_space.append(agent_obs_space) - self.action_space.append(agent_action_space) - - total_agents_act_len += agent_act_len - total_agents_obs_len += agent_obs_len - - @staticmethod - def get_nested_sequence(config): - results = OrderedDict() - for item in config: - results[item['name']] = [x['name'] - for x in item['config']['components']] - return results - - def reset(self): - - obs = self.ma_env.reset() - obs_openai_style = self.convert_to_openai_obs(obs) - - return obs_openai_style - - def step(self, action): - - action = self.convert_from_openai_act(action) - - next_obs, reward, done, info = self.ma_env.step(action) - - next_obs_openai_style = self.convert_to_openai_obs(next_obs) - reward_openai_style = [reward[k] for k in self.nested_sequence.keys()] - done = [done[k] for k in self.nested_sequence.keys()] - - return next_obs_openai_style, reward_openai_style, done, info - - def convert_to_openai_obs(self, obs): - """ Convert the RLLib dictionary based observation to OpenAI array - based observation. - """ - - obs_oa = [] - for k, v in self.nested_sequence.items(): - obs_oa.append(np.concatenate([obs[k][x] for x in v])) - - return obs_oa - - def convert_from_openai_act(self, action): - """ Convert the OpenAI array based action into RLLib dictionary based - action. - """ - - converted_action = {} - idx = 0 - - for k, v in self.nested_sequence.items(): - agent_action = OrderedDict() - act_start_idx = 0 - for component in v: - act_len = self.ma_env.action_space[k][component].shape[0] - agent_action[component] = action[idx][act_start_idx: - act_start_idx + act_len] - act_start_idx += act_len - - converted_action[k] = agent_action - idx += 1 - - return converted_action - - -if __name__ == '__main__': - - import pprint - - from gridworld.multiagent_env import MultiAgentEnv - from gridworld.scenarios.ieee_13_bus_buildings import make_env_config - - pp = pprint.PrettyPrinter(indent=2) - - env_config = make_env_config( - building_config={ - "reward_structure": {"target": "min_voltage", "alpha": 0.5} - }, - pv_config={ - "profile_csv": "off-peak.csv", - "scaling_factor": 40. - }, - storage_config={ - "max_power": 20., - "storage_range": (3., 250.) - }, - system_load_rescale_factor=0.6, - num_buildings=3 - ) - - env = MultiagentEnvOpenAIStyle(MultiAgentEnv, env_config) - - print("******** Test 1 **********") - obs_rllib = env.ma_env.reset() - obs_openai = env.convert_to_openai_obs(obs_rllib) - - pp.pprint(obs_rllib) - pp.pprint(obs_openai) - - print() - print("******** Test 2 **********") - acts_openai = [x.sample() for x in env.action_space] - acts_rllib = env.convert_from_openai_act(acts_openai) - - pp.pprint(acts_rllib) - pp.pprint(acts_openai) - - print() - print("******** Test 3 **********") - - pp.pprint(env.reset()) - done_all = False - cnt = 0 - - while not done_all: - acts = [x.sample() for x in env.action_space] - new_state, reward, done, info = env.step(acts) - done_all = all(done) - cnt += 1 - - print(cnt) \ No newline at end of file diff --git a/gridworld/multiagent_list_interface_env.py b/gridworld/multiagent_list_interface_env.py new file mode 100644 index 0000000..00a1d67 --- /dev/null +++ b/gridworld/multiagent_list_interface_env.py @@ -0,0 +1,111 @@ + +from collections import OrderedDict + +import gym +import numpy as np + + +class MultiAgentListInterfaceEnv(gym.Env): + """ A wrapper class to convert the env's dict interface to list interface. + + By default, the MultiAgentEnv provides interface to RL algorithms using + a dictionary, e.g., observation_space = {'agent_1': Box(...), 'agent_2': + Box(...), ...}, and RL training frameworks like RLLib can handle this. + Other frameworks, however, require the interface to be a list, e.g., + action_space = [Box(...), Box(...), ...]. This wrapper class is to convert + the default dict interface to list interface. + + """ + + def __init__(self, multi_agent_env_cls, env_config): + + self.ma_env = multi_agent_env_cls(**env_config) + self.n = len(self.ma_env.agents) + + # nested_sequence is used as reference to keep the sequence correct + # when putting data into the list interface format. + self.nested_sequence = self.get_nested_sequence(env_config['agents']) + + self.observation_space = [] + self.action_space = [] + + for k, v in self.nested_sequence.items(): + + agent_obs_len = sum( + [self.ma_env.observation_space[k][component].shape[0] + for component in v]) + agent_act_len = sum( + [self.ma_env.action_space[k][component].shape[0] + for component in v]) + + agent_obs_space = gym.spaces.Box(shape=(agent_obs_len,), + low=-1.0, + high=1.0, + dtype=np.float64) + agent_action_space = gym.spaces.Box(shape=(agent_act_len,), + low=-1.0, + high=1.0, + dtype=np.float64) + self.observation_space.append(agent_obs_space) + self.action_space.append(agent_action_space) + + @staticmethod + def get_nested_sequence(agent_config): + nested_agents_components_sequence = OrderedDict() + for item in agent_config: + nested_agents_components_sequence[item['name']] = [x['name'] + for x in item['config']['components']] + return nested_agents_components_sequence + + def reset(self): + + obs = self.ma_env.reset() + obs_list_interface = self.convert_to_list_obs(obs) + + return obs_list_interface + + def step(self, action): + + action = self.convert_from_list_act(action) + + next_obs, reward, done, info = self.ma_env.step(action) + + next_obs_list_interface = self.convert_to_list_obs(next_obs) + reward_list_interface = [reward[k] + for k in self.nested_sequence.keys()] + done = [done[k] for k in self.nested_sequence.keys()] + + return next_obs_list_interface, reward_list_interface, done, info + + def convert_to_list_obs(self, obs): + """ Convert the dictionary based observation to list based observation. + """ + + obs_list = [] + for k, v in self.nested_sequence.items(): + obs_list.append(np.concatenate([obs[k][x] for x in v])) + + return obs_list + + def convert_from_list_act(self, action): + """ Convert the list based action into dictionary based action. + """ + + converted_action = {} + idx = 0 + + for k, v in self.nested_sequence.items(): + + agent_action = {} + act_start_idx = 0 + + for component in v: + act_len = self.ma_env.action_space[k][component].shape[0] + agent_action[component] = action[idx][act_start_idx: + act_start_idx + act_len] + act_start_idx += act_len + + converted_action[k] = agent_action + idx += 1 + + return converted_action diff --git a/tests/conftest.py b/tests/conftest.py index 401f40a..10afe81 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ from gridworld.agents.vehicles import EVChargingEnv from gridworld import MultiAgentEnv, MultiComponentEnv from gridworld.distribution_system import OpenDSSSolver -# from gridworld.multiagent_env_openai_style import MultiagentEnvOpenAIStyle +from gridworld.multiagent_list_interface_env import MultiAgentListInterfaceEnv ## Functions for running simply policy baselines ##