Source code for coopihc.bundle.BaseBundle

from random import random
from coopihc.base.State import State
from coopihc.base.elements import discrete_array_element, array_element, cat_element
from coopihc.base.elements import discrete_array_element, cat_element

import numpy
import yaml
import matplotlib.pyplot as plt
import copy


[docs]class BaseBundle:
    """Main class for bundles.

    Main class for bundles. This class is subclassed by Bundle, which defines the interface with which to interact.

    A bundle combines a task with a user and an assistant. The bundle creates the ``game_state`` by combining the task, user and assistant states with the turn index and both agent's actions.

    The bundle takes care of all the messaging between classes, making sure the gamestate and all individual states are synchronized at all times.

    The bundle implements a forced reset mechanism, where each state of the bundle can be forced to a particular state via a dictionnary mechanism (see :py:func:reset)

    The bundle also takes care of rendering each of the three component in a single place.


    :param task: (:py:class:`coopihc.interactiontask.InteractionTask.InteractionTask`) A task that inherits from ``InteractionTask``
    :param user: (:py:class:`coopihc.agents.BaseAgent.BaseAgent`) a user which inherits from ``BaseAgent``
    :param assistant: (:py:class:`coopihc.agents.BaseAgent.BaseAgent`) an assistant which inherits from ``BaseAgent``

    :meta public:
    """

    turn_dict = {
        "after_assistant_action": 0,
        "before_user_action": 1,
        "after_user_action": 2,
        "before_assistant_action": 3,
    }

    def __init__(
        self,
        task,
        user,
        assistant,
        *args,
        reset_random=False,
        reset_start_after=-1,
        reset_go_to=0,
        **kwargs,
    ):
        self._reset_random = reset_random
        self._reset_start_after = reset_start_after
        self._reset_go_to = reset_go_to

        self.kwargs = kwargs
        self.task = task
        self.task.bundle = self
        self.user = user
        self.user.bundle = self
        self.assistant = assistant
        self.assistant.bundle = self

        # Form complete game state
        self.game_state = State()

        turn_index = cat_element(
            N=4, init=0, out_of_bounds_mode="raw", dtype=numpy.int8
        )
        round_index = discrete_array_element(
            init=0, low=0, high=numpy.iinfo(numpy.int64).max, out_of_bounds_mode="raw"
        )

        self.game_state["game_info"] = State()
        self.game_state["game_info"]["turn_index"] = turn_index
        self.game_state["game_info"]["round_index"] = round_index
        self.game_state["task_state"] = task.state
        self.game_state["user_state"] = user.state
        self.game_state["assistant_state"] = assistant.state

        # here there is a small caveat: you can not access action states in the game_state at finit, you have to pass through the agent instead. This is due to the current way of creating the game_state.

        self.task.finit()
        self.user.finit()
        self.assistant.finit()

        if user.policy is not None:
            self.game_state["user_action"] = user.policy.action_state
        else:
            self.game_state["user_action"] = State()
            self.game_state["user_action"]["action"] = array_element()
        if assistant.policy is not None:
            self.game_state["assistant_action"] = assistant.policy.action_state
        else:
            self.game_state["assistant_action"] = State()
            self.game_state["assistant_action"]["action"] = array_element()

        # This will not work sometimes

        # self.task.finit()
        # self.user.finit()
        # self.assistant.finit()

        # Needed for render
        self.active_render_figure = None
        self.figure_layout = [211, 223, 224]
        self.rendered_mode = None
        self.render_perm = False
        self.playspeed = 0.1

    def __repr__(self):
        """__repr__

        Pretty representation for Bundles.

        :return: pretty bundle print
        :rtype: string
        """
        return "{}\n".format(self.__class__.__name__) + yaml.safe_dump(
            self.__content__()
        )

    def __content__(self):
        """__content__

        Custom class representation

        :return: class repr
        :rtype: dictionnary
        """
        return {
            "Task": self.task.__content__(),
            "User": self.user.__content__(),
            "Assistant": self.assistant.__content__(),
        }

    @property
    def parameters(self):
        return {
            **self.task._parameters,
            **self.user._parameters,
            **self.assistant._parameters,
        }

    @property
    def turn_number(self):
        """turn_number

        The turn number in the game (0 to 3)

        :return: turn number
        :rtype: numpy.ndarray
        """
        return self.game_state["game_info"]["turn_index"]

    @turn_number.setter
    def turn_number(self, value):
        self._turn_number = value
        self.game_state["game_info"]["turn_index"] = value

    @property
    def round_number(self):
        """round_number

        The round number in the game (0 to N)

        :return: turn number
        :rtype: numpy.ndarray
        """
        return self.game_state["game_info"]["round_index"]

    @round_number.setter
    def round_number(self, value):
        self._round_number = value
        self.game_state["game_info"]["round_index"] = value

    @property
    def state(self):
        return self.game_state

[docs]    def reset(
        self,
        go_to=None,
        start_after=None,
        task=True,
        user=True,
        assistant=True,
        dic={},
        random_reset=False,
    ):
        """Reset bundle.

        1. Reset the game and start at a specific turn number.
        2. select which components to reset
        3. forced reset mechanism using dictionnaries


        Example:

        .. code-block:: python

            new_target_value = self.game_state["task_state"]["targets"]
            new_fixation_value = self.game_state["task_state"]["fixation"]
            )
            reset_dic = {"task_state": {"targets": new_target_value, "fixation": new_fixation_value}}
            self.reset(dic=reset_dic, turn = 1)

        Will set the substates "targets" and "fixation" of state "task_state" to some value.


        .. note ::

            If subclassing BaseBundle, make sure to call super().reset() in the new reset method.


        :param turn: game turn number. Can also be set globally at the bundle level by passing the "reset_turn" keyword argument, defaults to 0
        :type turn: int, optional
        :param start_after: which turn to start at (allows skipping some turns during reset), defaults to 0
        :type start_after: int, optional
        :param task: reset task?, defaults to True
        :type task: bool, optional
        :param user: reset user?, defaults to True
        :type user: bool, optional
        :param assistant: reset assistant?, defaults to True
        :type assistant: bool, optional
        :param dic: reset_dic, defaults to {}
        :type dic: dict, optional
        :param random_reset: whether during resetting values should be randomized or not if not set by a reset dic, default to False
        :type random_reset: bool, optional
        :return: new game state
        :rtype: :py:class:`State<coopihc.base.State.State>`
        """

        if go_to is None:
            go_to = self._reset_go_to

        if start_after is None:
            start_after = self._reset_start_after

        random_reset = self._reset_random or random_reset

        if task:
            task_dic = dic.get("task_state")
            self.task._base_reset(
                dic=task_dic,
                random=random_reset,
            )

        if user:
            user_dic = dic.get("user_state")
            self.user._base_reset(
                dic=user_dic,
                random=random_reset,
            )

        if assistant:
            assistant_dic = dic.get("assistant_state")
            self.assistant._base_reset(
                dic=assistant_dic,
                random=random_reset,
            )

        self.round_number = 0

        if not isinstance(go_to, (numpy.integer, int)):
            go_to = self.turn_dict[go_to]
        if not isinstance(start_after, (numpy.integer, int)):
            start_after = self.turn_dict[start_after]

        self.turn_number = go_to

        if go_to == 0 and start_after + 1 == 0:
            return self.game_state
        if start_after <= go_to:
            if go_to >= 1 and start_after + 1 <= 1:
                self._user_first_half_step()
            if go_to >= 2 and start_after + 1 <= 2:
                user_action, _ = self.user.take_action(increment_turn=False)
                self.user.action = user_action
                self._user_second_half_step(user_action)
            if go_to >= 3 and start_after + 1 <= 3:
                self._assistant_first_half_step()
        else:
            raise ValueError(
                f"start_after ({start_after}) can not be after go_to ({go_to}). You can likely use a combination of reset and step to achieve what you are looking for"
            )

        return self.game_state

    def quarter_step(self, user_action=None, assistant_action=None, **kwargs):
        return self.step(
            user_action=user_action,
            assistant_action=assistant_action,
            go_to=(int(self.turn_number) + 1) % 4,
        )

[docs]    def step(self, user_action=None, assistant_action=None, go_to=None, **kwargs):
        """Play a round

        Play a round of the game. A round consists in 4 turns. If go_to is not None, the round is only played until that turn.
        If a user action and assistant action are passed as arguments, then these are used as actions to play the round. Otherwise, these actions are sampled from each agent's policy.

        :param user action: user action
        :type: any
        :param assistant action: assistant action
        :type: any
        :param go_to: turn at which round stops, defaults to None
        :type go_to: int, optional
        :return: gamestate, reward, game finished flag
        :rtype: tuple(:py:class:`State<coopihc.base.State.State>`, collections.OrderedDict, boolean)
        """

        if go_to is None:
            go_to = int(self.turn_number)

        if not isinstance(go_to, (numpy.integer, int)):
            go_to = self.turn_dict[go_to]

        _started = False
        rewards = {}
        rewards["user_observation_reward"] = 0
        rewards["user_inference_reward"] = 0
        rewards["user_policy_reward"] = 0
        rewards["first_task_reward"] = 0
        rewards["assistant_observation_reward"] = 0
        rewards["assistant_inference_reward"] = 0
        rewards["assistant_policy_reward"] = 0
        rewards["second_task_reward"] = 0

        while self.turn_number != go_to or (not _started):

            _started = True
            # User observes and infers
            if self.turn_number == 0 and "no-user" != self.kwargs.get("name"):
                (
                    user_obs_reward,
                    user_infer_reward,
                ) = self._user_first_half_step()
                (
                    rewards["user_observation_reward"],
                    rewards["user_inference_reward"],
                ) = (user_obs_reward, user_infer_reward)

            # User takes action and receives reward from task
            elif self.turn_number == 1 and "no-user" != self.kwargs.get("name"):
                if user_action is None:
                    user_action, user_policy_reward = self.user.take_action(
                        increment_turn=False
                    )
                else:
                    self.user.action = user_action
                    user_policy_reward = 0

                task_reward, is_done = self._user_second_half_step(user_action)
                rewards["user_policy_reward"] = user_policy_reward
                rewards["first_task_reward"] = task_reward
                if is_done:
                    return self.game_state, rewards, is_done

            elif self.turn_number == 2 and "no-assistant" == self.kwargs.get("name"):
                self.round_number = self.round_number + 1

            # Assistant observes and infers
            elif self.turn_number == 2 and "no-assistant" != self.kwargs.get("name"):
                (
                    assistant_obs_reward,
                    assistant_infer_reward,
                ) = self._assistant_first_half_step()
                (
                    rewards["assistant_observation_reward"],
                    rewards["assistant_inference_reward"],
                ) = (assistant_obs_reward, assistant_infer_reward)

            # Assistant takes action and receives reward from task
            elif self.turn_number == 3 and "no-assistant" != self.kwargs.get("name"):
                if assistant_action is None:
                    (
                        assistant_action,
                        assistant_policy_reward,
                    ) = self.assistant.take_action(increment_turn=False)
                else:
                    self.assistant.action = assistant_action
                    assistant_policy_reward = 0

                task_reward, is_done = self._assistant_second_half_step(
                    assistant_action
                )
                rewards["assistant_policy_reward"] = assistant_policy_reward
                rewards["second_task_reward"] = task_reward
                if is_done:
                    return self.game_state, rewards, is_done

                self.round_number = self.round_number + 1

            self.turn_number = (self.turn_number + 1) % 4

        return self.game_state, rewards, False

[docs]    def render(self, mode, *args, **kwargs):
        """render

        Combines all render methods.

        :param mode: "text" or "plot"
        :param type: string

        :meta public:
        """

        self.rendered_mode = mode
        if "text" in mode:
            print("\n")
            print("Round number {}".format(self.round_number.tolist()))
            print("Task Render")
            self.task.render(mode="text", *args, **kwargs)
            print("User Render")
            self.user.render(mode="text", *args, **kwargs)
            print("Assistant Render")
            self.assistant.render(mode="text", *args, **kwargs)
        if "log" in mode:
            self.task.render(mode="log", *args, **kwargs)
            self.user.render(mode="log", *args, **kwargs)
            self.assistant.render(mode="log", *args, **kwargs)
        if "plot" in mode:
            if self.active_render_figure:
                plt.pause(self.playspeed)
                self.task.render(
                    ax_task=self.axtask,
                    ax_user=self.axuser,
                    ax_assistant=self.axassistant,
                    mode="plot",
                    **kwargs,
                )
                self.user.render(
                    ax_task=self.axtask,
                    ax_user=self.axuser,
                    ax_assistant=self.axassistant,
                    mode="plot",
                    **kwargs,
                )
                self.assistant.render(
                    ax_task=self.axtask,
                    ax_user=self.axuser,
                    ax_assistant=self.axassistant,
                    mode="plot",
                    **kwargs,
                )
                self.fig.canvas.draw()
            else:
                self.active_render_figure = True
                self.fig = plt.figure()
                self.axtask = self.fig.add_subplot(self.figure_layout[0])
                self.axtask.set_title("Task State")
                self.axuser = self.fig.add_subplot(self.figure_layout[1])
                self.axuser.set_title("User State")
                self.axassistant = self.fig.add_subplot(self.figure_layout[2])
                self.axassistant.set_title("Assistant State")
                self.task.render(
                    ax_task=self.axtask,
                    ax_user=self.axuser,
                    ax_assistant=self.axassistant,
                    mode="plot",
                    **kwargs,
                )
                self.user.render(
                    ax_task=self.axtask,
                    ax_user=self.axuser,
                    ax_assistant=self.axassistant,
                    mode="plot",
                    **kwargs,
                )
                self.assistant.render(
                    ax_task=self.axtask,
                    ax_user=self.axuser,
                    ax_assistant=self.axassistant,
                    mode="plot",
                    **kwargs,
                )
                self.fig.show()

            plt.tight_layout()

        if not ("plot" in mode or "text" in mode):
            self.task.render(None, mode=mode, *args, **kwargs)
            self.user.render(None, mode=mode, *args, **kwargs)
            self.assistant.render(None, mode=mode, *args, **kwargs)

[docs]    def close(self):
        """close

        Close the bundle once the game is finished.
        """

        if self.active_render_figure:
            plt.close(self.fig)
            # self.active_render_figure = None

    def _user_first_half_step(self):
        """_user_first_half_step

        Turn 1, where the user observes the game state and updates its state via inference.

        :return: user observation and inference reward
        :rtype: tuple(float, float)
        """

        if not self.kwargs.get("onreset_deterministic_first_half_step"):
            user_obs_reward, user_infer_reward = self.user._agent_step()

        else:
            # Store the probabilistic rules
            store = self.user.observation_engine.extraprobabilisticrules
            # Remove the probabilistic rules
            self.user.observation_engine.extraprobabilisticrules = {}
            # Generate an observation without generating an inference
            user_obs_reward, user_infer_reward = self.user._agent_step(infer=False)
            # Reposition the probabilistic rules, and reset mapping
            self.user.observation_engine.extraprobabilisticrules = store
            self.user.observation_engine.mapping = None

        self.kwargs["onreset_deterministic_first_half_step"] = False

        return user_obs_reward, user_infer_reward

    def _user_second_half_step(self, user_action):
        """_user_second_half_step

        Turn 2, where the operaror takes an action.

        :param user_action: user action
        :param type: Any

        :return: task reward, task done?
        :rtype: tuple(float, boolean)
        """

        # Play user's turn in the task
        task_state, task_reward, is_done = self.task.base_on_user_action(
            user_action=user_action
        )

        return task_reward, is_done

    def _assistant_first_half_step(self):
        """_assistant_first_half_step

        Turn 3, where the assistant observes the game state and updates its state via inference.

        :return: assistant observation and inference reward
        :rtype: tuple(float, float)
        """
        (
            assistant_obs_reward,
            assistant_infer_reward,
        ) = self.assistant._agent_step()

        return assistant_obs_reward, assistant_infer_reward

    def _assistant_second_half_step(self, assistant_action):
        """_assistant_second_half_step

        Turn 4, where the assistant takes an action.

        :param user_action: assistant action
        :param type: Any

        :return: task reward, task done?
        :rtype: tuple(float, boolean)
        """

        # Play assistant's turn in the task

        task_state, task_reward, is_done = self.task.base_on_assistant_action(
            assistant_action=assistant_action
        )

        return task_reward, is_done

    def _on_user_action(self, *args):
        """Turns 1 and 2

        :param \*args: either provide the user action or not. If no action is provided the action is determined by the agent's policy using sample()
        :param type: (None or list)
        :return: user observation, inference, policy and task rewards, game is done flag
        :return type: tuple(float, float, float, float, bool)
        """
        user_obs_reward, user_infer_reward = self._user_first_half_step()
        try:
            # If human input is provided
            user_action = args[0]
        except IndexError:
            # else sample from policy
            user_action, user_policy_reward = self.user.take_action(
                increment_turn=False
            )

        self.user.action = user_action

        task_reward, is_done = self._user_second_half_step(user_action)

        return (
            user_obs_reward,
            user_infer_reward,
            user_policy_reward,
            task_reward,
            is_done,
        )

    def _on_assistant_action(self, *args):
        """Turns 3 and 4

        :param \*args: either provide the assistant action or not. If no action is provided the action is determined by the agent's policy using sample()
        :param type: (None or list)
        :return: assistant observation, inference, policy and task rewards, game is done flag
        :return type: tuple(float, float, float, float, bool)
        """
        (
            assistant_obs_reward,
            assistant_infer_reward,
        ) = self._assistant_first_half_step()

        try:
            # If human input is provided
            assistant_action = args[0]
        except IndexError:
            # else sample from policy
            (
                assistant_action,
                assistant_policy_reward,
            ) = self.assistant.take_action(increment_turn=False)

        self.assistant.action = assistant_action

        task_reward, is_done = self._assistant_second_half_step(assistant_action)
        return (
            assistant_obs_reward,
            assistant_infer_reward,
            assistant_policy_reward,
            task_reward,
            is_done,
        )