Source code for coopihc.policy.ExamplePolicy

import numpy
from coopihc.policy.BasePolicy import BasePolicy
import copy


[docs]class ExamplePolicy(BasePolicy): """ExamplePolicy A simple policy which assumes that the agent using it has a 'goal' state and that the task has an 'x' state. x is compared to the goal and appropriate action is taken to make sure x reaches the goal. """ def __init____init__(self, *args, action_state=None, **kwargs): super().__init__(*args, action_state=None, **kwargs) @BasePolicy.default_value def sample(self, agent_observation=None, agent_state=None): """sample Compares 'x' to goal and issues +-1 accordingly. :return: action, reward :rtype: tuple(`StateElement<coopihc.base.StateElement.StateElement>`, float) """ if ( agent_observation["task_state"]["x"] < agent_observation["user_state"]["goal"] ): _action_value = 1 elif ( agent_observation["task_state"]["x"] > agent_observation["user_state"]["goal"] ): _action_value = -1 else: _action_value = 0 return _action_value, 0
[docs]class PseudoRandomPolicy(BasePolicy): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def sample(self, agent_observation=None, agent_state=None): if agent_observation is None: agent_observation = self.observation x = agent_observation.task_state.x _action_value = ( 8 + self.state.p0 * x + self.state.p1 * x * x + self.state.p2 * x * x * x ) % 10 return _action_value, 0
[docs]class CoordinatedPolicy(BasePolicy): @property def simulation_bundle(self): return self.host.simulation_bundle def sample(self, agent_observation=None, agent_state=None): if agent_observation is None: agent_observation = self.observation reset_dic = {"task_state": agent_observation.task_state} self.simulation_bundle.reset(dic=reset_dic) self.simulation_bundle.step(turn=2) _action_value = self.simulation_bundle.user.action return _action_value, 0
[docs]class CoordinatedPolicyWithParams(CoordinatedPolicy): def sample(self, agent_observation=None, agent_state=None): if agent_observation is None: agent_observation = self.observation reset_dic = { "task_state": copy.deepcopy(agent_observation.task_state), "user_state": { "p0": copy.deepcopy(agent_observation.assistant_state.user_p0) }, } self.simulation_bundle.reset(dic=reset_dic) self.simulation_bundle.step(turn=2) _action_value = copy.copy(self.simulation_bundle.user.action) return _action_value, 0