Source code for coopihc.policy.ExamplePolicy

import numpy
from coopihc.policy.BasePolicy import BasePolicy
import copy


[docs]class ExamplePolicy(BasePolicy):
    """ExamplePolicy

    A simple policy which assumes that the agent using it has a 'goal' state and that the task has an 'x' state. x is compared to the goal and appropriate action is taken to make sure x reaches the goal.


    """

    def __init____init__(self, *args, action_state=None, **kwargs):
        super().__init__(*args, action_state=None, **kwargs)

    @BasePolicy.default_value
    def sample(self, agent_observation=None, agent_state=None):
        """sample

        Compares 'x' to goal and issues +-1 accordingly.

        :return: action, reward
        :rtype: tuple(`StateElement<coopihc.base.StateElement.StateElement>`, float)
        """

        if (
            agent_observation["task_state"]["x"]
            < agent_observation["user_state"]["goal"]
        ):
            _action_value = 1
        elif (
            agent_observation["task_state"]["x"]
            > agent_observation["user_state"]["goal"]
        ):
            _action_value = -1
        else:
            _action_value = 0

        return _action_value, 0


[docs]class PseudoRandomPolicy(BasePolicy):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def sample(self, agent_observation=None, agent_state=None):
        if agent_observation is None:
            agent_observation = self.observation

        x = agent_observation.task_state.x

        _action_value = (
            8 + self.state.p0 * x + self.state.p1 * x * x + self.state.p2 * x * x * x
        ) % 10

        return _action_value, 0


[docs]class CoordinatedPolicy(BasePolicy):
    @property
    def simulation_bundle(self):
        return self.host.simulation_bundle

    def sample(self, agent_observation=None, agent_state=None):
        if agent_observation is None:
            agent_observation = self.observation

        reset_dic = {"task_state": agent_observation.task_state}

        self.simulation_bundle.reset(dic=reset_dic)
        self.simulation_bundle.step(turn=2)

        _action_value = self.simulation_bundle.user.action

        return _action_value, 0


[docs]class CoordinatedPolicyWithParams(CoordinatedPolicy):
    def sample(self, agent_observation=None, agent_state=None):
        if agent_observation is None:
            agent_observation = self.observation

        reset_dic = {
            "task_state": copy.deepcopy(agent_observation.task_state),
            "user_state": {
                "p0": copy.deepcopy(agent_observation.assistant_state.user_p0)
            },
        }

        self.simulation_bundle.reset(dic=reset_dic)
        self.simulation_bundle.step(turn=2)

        _action_value = copy.copy(self.simulation_bundle.user.action)

        return _action_value, 0