Solving behaviors

HierarchyCraft environments comes with built-in solutions. For ANY task in ANY HierarchyCraft environment, a solving behavior can be given thanks to the fact that no feature extraction is required.

This behavior can be called on the observation and will return relevant actions, like any agent.

Solving behavior for any task can simply be obtained like this:

behavior = env.solving_behavior(task)
action = behavior(observation)

Solving behaviors can be used for imitation learning, as teacher or an expert policy.

Example

Let's get a DIAMOND in MineHcraft:

from hcraft.examples import MineHcraftEnv
from hcraft.examples.minecraft.items import DIAMOND
from hcraft.task import GetItemTask

get_diamond = GetItemTask(DIAMOND)
env = MineHcraftEnv(purpose=get_diamond)
solving_behavior = env.solving_behavior(get_diamond)

done = False
observation, _info = env.reset()
while not done:
    action = solving_behavior(observation)
    observation, _reward, terminated, truncated, _info = env.step(action)
    done = terminated or truncated

assert terminated  # Env is successfuly terminated
assert get_diamond.is_terminated # DIAMOND has been obtained !
  1"""# Solving behaviors
  2
  3HierarchyCraft environments comes with built-in solutions.
  4For ANY task in ANY HierarchyCraft environment, a solving behavior can be given
  5thanks to the fact that no feature extraction is required.
  6
  7This behavior can be called on the observation and will return relevant actions, like any agent.
  8
  9Solving behavior for any task can simply be obtained like this:
 10
 11```python
 12behavior = env.solving_behavior(task)
 13action = behavior(observation)
 14```
 15
 16Solving behaviors can be used for imitation learning, as teacher or an expert policy.
 17
 18## Example
 19
 20Let's get a DIAMOND in MineHcraft:
 21
 22```python
 23from hcraft.examples import MineHcraftEnv
 24from hcraft.examples.minecraft.items import DIAMOND
 25from hcraft.task import GetItemTask
 26
 27get_diamond = GetItemTask(DIAMOND)
 28env = MineHcraftEnv(purpose=get_diamond)
 29solving_behavior = env.solving_behavior(get_diamond)
 30
 31done = False
 32observation, _info = env.reset()
 33while not done:
 34    action = solving_behavior(observation)
 35    observation, _reward, terminated, truncated, _info = env.step(action)
 36    done = terminated or truncated
 37
 38assert terminated  # Env is successfuly terminated
 39assert get_diamond.is_terminated # DIAMOND has been obtained !
 40```
 41
 42
 43"""
 44
 45from typing import TYPE_CHECKING, Dict
 46
 47from hebg import Behavior
 48
 49from hcraft.behaviors.behaviors import (
 50    AbleAndPerformTransformation,
 51    GetItem,
 52    DropItem,
 53    PlaceItem,
 54    ReachZone,
 55)
 56from hcraft.requirements import RequirementNode, req_node_name
 57from hcraft.task import GetItemTask, GoToZoneTask, PlaceItemTask, Task
 58
 59
 60if TYPE_CHECKING:
 61    from hcraft.env import HcraftEnv
 62
 63
 64def build_all_solving_behaviors(env: "HcraftEnv") -> Dict[str, "Behavior"]:
 65    """Return a dictionary of handcrafted behaviors to get each item, zone and property."""
 66    all_behaviors = {}
 67    all_behaviors = _reach_zones_behaviors(env, all_behaviors)
 68    all_behaviors = _get_item_behaviors(env, all_behaviors)
 69    all_behaviors = _drop_item_behaviors(env, all_behaviors)
 70    all_behaviors = _get_zone_item_behaviors(env, all_behaviors)
 71    all_behaviors = _do_transfo_behaviors(env, all_behaviors)
 72
 73    empty_behaviors = []
 74    for name, behavior in all_behaviors.items():
 75        try:
 76            behavior.graph
 77        except ValueError:
 78            empty_behaviors.append(name)
 79    for name in empty_behaviors:
 80        all_behaviors.pop(name)
 81
 82    # TODO: Use learning complexity instead for more generality
 83    requirements_graph = env.world.requirements.graph
 84
 85    for behavior in all_behaviors.values():
 86        if isinstance(behavior, AbleAndPerformTransformation):
 87            behavior.complexity = 1
 88            continue
 89        if isinstance(behavior, GetItem):
 90            req_node = req_node_name(behavior.item, RequirementNode.ITEM)
 91        elif isinstance(behavior, DropItem):
 92            # TODO: this clearly is not general enough,
 93            # it would need requirements for non-accumulative to be fine
 94            req_node = req_node_name(behavior.item, RequirementNode.ITEM)
 95        elif isinstance(behavior, ReachZone):
 96            req_node = req_node_name(behavior.zone, RequirementNode.ZONE)
 97        elif isinstance(behavior, PlaceItem):
 98            req_node = req_node_name(behavior.item, RequirementNode.ZONE_ITEM)
 99        else:
100            raise NotImplementedError
101        behavior.complexity = requirements_graph.nodes[req_node]["level"]
102        continue
103
104    return all_behaviors
105
106
107def task_to_behavior_name(task: Task) -> str:
108    """Get the behavior name that will solve the given task.
109
110    Args:
111        task: Task to be solved.
112
113    Raises:
114        NotImplementedError: If task is not supported yet.
115
116    Returns:
117        str: Name of the solving behavior.
118    """
119    if isinstance(task, GetItemTask):
120        behavior_name = GetItem.get_name(task.item_stack.item)
121    elif isinstance(task, GoToZoneTask):
122        behavior_name = ReachZone.get_name(task.zone)
123    elif isinstance(task, PlaceItemTask):
124        behavior_name = PlaceItem.get_name(task.item_stack.item, task.zone)
125    else:
126        raise NotImplementedError
127    return behavior_name
128
129
130def _reach_zones_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]):
131    for zone in env.world.zones:
132        behavior = ReachZone(zone, env, all_behaviors=all_behaviors)
133        all_behaviors[behavior.name] = behavior
134    return all_behaviors
135
136
137def _get_item_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]):
138    for item in env.world.items:
139        behavior = GetItem(item, env, all_behaviors=all_behaviors)
140        all_behaviors[behavior.name] = behavior
141    return all_behaviors
142
143
144def _drop_item_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]):
145    for item in env.world.items:
146        behavior = DropItem(item, env, all_behaviors=all_behaviors)
147        all_behaviors[behavior.name] = behavior
148    return all_behaviors
149
150
151def _get_zone_item_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]):
152    for zone in [None] + env.world.zones:  # Anywhere + in every specific zone
153        for item in env.world.zones_items:
154            behavior = PlaceItem(item, env, all_behaviors=all_behaviors, zone=zone)
155            all_behaviors[behavior.name] = behavior
156    return all_behaviors
157
158
159def _do_transfo_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]):
160    for transfo in env.world.transformations:
161        behavior = AbleAndPerformTransformation(
162            env, transfo, all_behaviors=all_behaviors
163        )
164        all_behaviors[behavior.name] = behavior
165    return all_behaviors

API Documentation

def build_all_solving_behaviors(env: hcraft.HcraftEnv) -> Dict[str, hebg.behavior.Behavior]:
 65def build_all_solving_behaviors(env: "HcraftEnv") -> Dict[str, "Behavior"]:
 66    """Return a dictionary of handcrafted behaviors to get each item, zone and property."""
 67    all_behaviors = {}
 68    all_behaviors = _reach_zones_behaviors(env, all_behaviors)
 69    all_behaviors = _get_item_behaviors(env, all_behaviors)
 70    all_behaviors = _drop_item_behaviors(env, all_behaviors)
 71    all_behaviors = _get_zone_item_behaviors(env, all_behaviors)
 72    all_behaviors = _do_transfo_behaviors(env, all_behaviors)
 73
 74    empty_behaviors = []
 75    for name, behavior in all_behaviors.items():
 76        try:
 77            behavior.graph
 78        except ValueError:
 79            empty_behaviors.append(name)
 80    for name in empty_behaviors:
 81        all_behaviors.pop(name)
 82
 83    # TODO: Use learning complexity instead for more generality
 84    requirements_graph = env.world.requirements.graph
 85
 86    for behavior in all_behaviors.values():
 87        if isinstance(behavior, AbleAndPerformTransformation):
 88            behavior.complexity = 1
 89            continue
 90        if isinstance(behavior, GetItem):
 91            req_node = req_node_name(behavior.item, RequirementNode.ITEM)
 92        elif isinstance(behavior, DropItem):
 93            # TODO: this clearly is not general enough,
 94            # it would need requirements for non-accumulative to be fine
 95            req_node = req_node_name(behavior.item, RequirementNode.ITEM)
 96        elif isinstance(behavior, ReachZone):
 97            req_node = req_node_name(behavior.zone, RequirementNode.ZONE)
 98        elif isinstance(behavior, PlaceItem):
 99            req_node = req_node_name(behavior.item, RequirementNode.ZONE_ITEM)
100        else:
101            raise NotImplementedError
102        behavior.complexity = requirements_graph.nodes[req_node]["level"]
103        continue
104
105    return all_behaviors

Return a dictionary of handcrafted behaviors to get each item, zone and property.

def task_to_behavior_name(task: hcraft.task.Task) -> str:
108def task_to_behavior_name(task: Task) -> str:
109    """Get the behavior name that will solve the given task.
110
111    Args:
112        task: Task to be solved.
113
114    Raises:
115        NotImplementedError: If task is not supported yet.
116
117    Returns:
118        str: Name of the solving behavior.
119    """
120    if isinstance(task, GetItemTask):
121        behavior_name = GetItem.get_name(task.item_stack.item)
122    elif isinstance(task, GoToZoneTask):
123        behavior_name = ReachZone.get_name(task.zone)
124    elif isinstance(task, PlaceItemTask):
125        behavior_name = PlaceItem.get_name(task.item_stack.item, task.zone)
126    else:
127        raise NotImplementedError
128    return behavior_name

Get the behavior name that will solve the given task.

Arguments:
  • task: Task to be solved.
Raises:
  • NotImplementedError: If task is not supported yet.
Returns:

str: Name of the solving behavior.