Solving behaviors
HierarchyCraft environments comes with built-in solutions. For ANY task in ANY HierarchyCraft environment, a solving behavior can be given thanks to the fact that no feature extraction is required.
This behavior can be called on the observation and will return relevant actions, like any agent.
Solving behavior for any task can simply be obtained like this:
behavior = env.solving_behavior(task)
action = behavior(observation)
Solving behaviors can be used for imitation learning, as teacher or an expert policy.
Example
Let's get a DIAMOND in MineHcraft:
from hcraft.examples import MineHcraftEnv
from hcraft.examples.minecraft.items import DIAMOND
from hcraft.task import GetItemTask
get_diamond = GetItemTask(DIAMOND)
env = MineHcraftEnv(purpose=get_diamond)
solving_behavior = env.solving_behavior(get_diamond)
done = False
observation, _info = env.reset()
while not done:
action = solving_behavior(observation)
observation, _reward, terminated, truncated, _info = env.step(action)
done = terminated or truncated
assert terminated # Env is successfuly terminated
assert get_diamond.is_terminated # DIAMOND has been obtained !
1"""# Solving behaviors 2 3HierarchyCraft environments comes with built-in solutions. 4For ANY task in ANY HierarchyCraft environment, a solving behavior can be given 5thanks to the fact that no feature extraction is required. 6 7This behavior can be called on the observation and will return relevant actions, like any agent. 8 9Solving behavior for any task can simply be obtained like this: 10 11```python 12behavior = env.solving_behavior(task) 13action = behavior(observation) 14``` 15 16Solving behaviors can be used for imitation learning, as teacher or an expert policy. 17 18## Example 19 20Let's get a DIAMOND in MineHcraft: 21 22```python 23from hcraft.examples import MineHcraftEnv 24from hcraft.examples.minecraft.items import DIAMOND 25from hcraft.task import GetItemTask 26 27get_diamond = GetItemTask(DIAMOND) 28env = MineHcraftEnv(purpose=get_diamond) 29solving_behavior = env.solving_behavior(get_diamond) 30 31done = False 32observation, _info = env.reset() 33while not done: 34 action = solving_behavior(observation) 35 observation, _reward, terminated, truncated, _info = env.step(action) 36 done = terminated or truncated 37 38assert terminated # Env is successfuly terminated 39assert get_diamond.is_terminated # DIAMOND has been obtained ! 40``` 41 42 43""" 44 45from typing import TYPE_CHECKING, Dict 46 47from hebg import Behavior 48 49from hcraft.behaviors.behaviors import ( 50 AbleAndPerformTransformation, 51 GetItem, 52 DropItem, 53 PlaceItem, 54 ReachZone, 55) 56from hcraft.requirements import RequirementNode, req_node_name 57from hcraft.task import GetItemTask, GoToZoneTask, PlaceItemTask, Task 58 59 60if TYPE_CHECKING: 61 from hcraft.env import HcraftEnv 62 63 64def build_all_solving_behaviors(env: "HcraftEnv") -> Dict[str, "Behavior"]: 65 """Return a dictionary of handcrafted behaviors to get each item, zone and property.""" 66 all_behaviors = {} 67 all_behaviors = _reach_zones_behaviors(env, all_behaviors) 68 all_behaviors = _get_item_behaviors(env, all_behaviors) 69 all_behaviors = _drop_item_behaviors(env, all_behaviors) 70 all_behaviors = _get_zone_item_behaviors(env, all_behaviors) 71 all_behaviors = _do_transfo_behaviors(env, all_behaviors) 72 73 empty_behaviors = [] 74 for name, behavior in all_behaviors.items(): 75 try: 76 behavior.graph 77 except ValueError: 78 empty_behaviors.append(name) 79 for name in empty_behaviors: 80 all_behaviors.pop(name) 81 82 # TODO: Use learning complexity instead for more generality 83 requirements_graph = env.world.requirements.graph 84 85 for behavior in all_behaviors.values(): 86 if isinstance(behavior, AbleAndPerformTransformation): 87 behavior.complexity = 1 88 continue 89 if isinstance(behavior, GetItem): 90 req_node = req_node_name(behavior.item, RequirementNode.ITEM) 91 elif isinstance(behavior, DropItem): 92 # TODO: this clearly is not general enough, 93 # it would need requirements for non-accumulative to be fine 94 req_node = req_node_name(behavior.item, RequirementNode.ITEM) 95 elif isinstance(behavior, ReachZone): 96 req_node = req_node_name(behavior.zone, RequirementNode.ZONE) 97 elif isinstance(behavior, PlaceItem): 98 req_node = req_node_name(behavior.item, RequirementNode.ZONE_ITEM) 99 else: 100 raise NotImplementedError 101 behavior.complexity = requirements_graph.nodes[req_node]["level"] 102 continue 103 104 return all_behaviors 105 106 107def task_to_behavior_name(task: Task) -> str: 108 """Get the behavior name that will solve the given task. 109 110 Args: 111 task: Task to be solved. 112 113 Raises: 114 NotImplementedError: If task is not supported yet. 115 116 Returns: 117 str: Name of the solving behavior. 118 """ 119 if isinstance(task, GetItemTask): 120 behavior_name = GetItem.get_name(task.item_stack.item) 121 elif isinstance(task, GoToZoneTask): 122 behavior_name = ReachZone.get_name(task.zone) 123 elif isinstance(task, PlaceItemTask): 124 behavior_name = PlaceItem.get_name(task.item_stack.item, task.zone) 125 else: 126 raise NotImplementedError 127 return behavior_name 128 129 130def _reach_zones_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]): 131 for zone in env.world.zones: 132 behavior = ReachZone(zone, env, all_behaviors=all_behaviors) 133 all_behaviors[behavior.name] = behavior 134 return all_behaviors 135 136 137def _get_item_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]): 138 for item in env.world.items: 139 behavior = GetItem(item, env, all_behaviors=all_behaviors) 140 all_behaviors[behavior.name] = behavior 141 return all_behaviors 142 143 144def _drop_item_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]): 145 for item in env.world.items: 146 behavior = DropItem(item, env, all_behaviors=all_behaviors) 147 all_behaviors[behavior.name] = behavior 148 return all_behaviors 149 150 151def _get_zone_item_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]): 152 for zone in [None] + env.world.zones: # Anywhere + in every specific zone 153 for item in env.world.zones_items: 154 behavior = PlaceItem(item, env, all_behaviors=all_behaviors, zone=zone) 155 all_behaviors[behavior.name] = behavior 156 return all_behaviors 157 158 159def _do_transfo_behaviors(env: "HcraftEnv", all_behaviors: Dict[str, "Behavior"]): 160 for transfo in env.world.transformations: 161 behavior = AbleAndPerformTransformation( 162 env, transfo, all_behaviors=all_behaviors 163 ) 164 all_behaviors[behavior.name] = behavior 165 return all_behaviors
API Documentation
65def build_all_solving_behaviors(env: "HcraftEnv") -> Dict[str, "Behavior"]: 66 """Return a dictionary of handcrafted behaviors to get each item, zone and property.""" 67 all_behaviors = {} 68 all_behaviors = _reach_zones_behaviors(env, all_behaviors) 69 all_behaviors = _get_item_behaviors(env, all_behaviors) 70 all_behaviors = _drop_item_behaviors(env, all_behaviors) 71 all_behaviors = _get_zone_item_behaviors(env, all_behaviors) 72 all_behaviors = _do_transfo_behaviors(env, all_behaviors) 73 74 empty_behaviors = [] 75 for name, behavior in all_behaviors.items(): 76 try: 77 behavior.graph 78 except ValueError: 79 empty_behaviors.append(name) 80 for name in empty_behaviors: 81 all_behaviors.pop(name) 82 83 # TODO: Use learning complexity instead for more generality 84 requirements_graph = env.world.requirements.graph 85 86 for behavior in all_behaviors.values(): 87 if isinstance(behavior, AbleAndPerformTransformation): 88 behavior.complexity = 1 89 continue 90 if isinstance(behavior, GetItem): 91 req_node = req_node_name(behavior.item, RequirementNode.ITEM) 92 elif isinstance(behavior, DropItem): 93 # TODO: this clearly is not general enough, 94 # it would need requirements for non-accumulative to be fine 95 req_node = req_node_name(behavior.item, RequirementNode.ITEM) 96 elif isinstance(behavior, ReachZone): 97 req_node = req_node_name(behavior.zone, RequirementNode.ZONE) 98 elif isinstance(behavior, PlaceItem): 99 req_node = req_node_name(behavior.item, RequirementNode.ZONE_ITEM) 100 else: 101 raise NotImplementedError 102 behavior.complexity = requirements_graph.nodes[req_node]["level"] 103 continue 104 105 return all_behaviors
Return a dictionary of handcrafted behaviors to get each item, zone and property.
def
task_to_behavior_name(task: hcraft.task.Task) -> str:
108def task_to_behavior_name(task: Task) -> str: 109 """Get the behavior name that will solve the given task. 110 111 Args: 112 task: Task to be solved. 113 114 Raises: 115 NotImplementedError: If task is not supported yet. 116 117 Returns: 118 str: Name of the solving behavior. 119 """ 120 if isinstance(task, GetItemTask): 121 behavior_name = GetItem.get_name(task.item_stack.item) 122 elif isinstance(task, GoToZoneTask): 123 behavior_name = ReachZone.get_name(task.zone) 124 elif isinstance(task, PlaceItemTask): 125 behavior_name = PlaceItem.get_name(task.item_stack.item, task.zone) 126 else: 127 raise NotImplementedError 128 return behavior_name
Get the behavior name that will solve the given task.
Arguments:
- task: Task to be solved.
Raises:
- NotImplementedError: If task is not supported yet.
Returns:
str: Name of the solving behavior.