Prerequisites

Before you begin, make sure you have:

  • Installed the SDK (see Installation)
  • Set up your API key (via environment variable or .env file)

Basic Usage

Here’s a complete example to create an environment, run a task, and record a trajectory:

import os
import asyncio
from dotenv import load_dotenv
load_dotenv()

import hud
from hud import gym

async def main():
    # Create an environment instance using the gym module
    env = gym.make("OSWorld-Ubuntu")
    await env.wait_for_ready()
    
    # Load a TaskSet containing tasks
    taskset = hud.TaskSet.load("osworld_tasks")
    
    # Get the first task from the TaskSet
    task = taskset.tasks[0]
    
    # Initialize the environment with the task
    obs = await env.reset(task=task)
    print(f"Task: {obs.text}")
    
    #
    # ! define your agent loop here !
    #
    
    # Evaluate environment state
    result = await env.evaluate()
    print(f"Evaluation result: {result}")
    
    # When finished, a trajectory is created automatically
    trajectory = await env.get_trajectory()
    
    # Generate a video of the agent's interactions
    video_url = await trajectory.get_video()
    print(f"Video available at: {video_url}")
    
    # Close when done
    await env.close()
    
if __name__ == "__main__":
    asyncio.run(main())

Example agent: Claude Computer Use

To try Claude Computer Use, clone the full github repo with the agent folder that includes a sample implementation.

import os
import asyncio
from dotenv import load_dotenv
load_dotenv()

import hud
from hud import gym
from hud.adapters.claude.adapter import ClaudeAdapter
from agent.claude import ClaudeAgent
from anthropic import Anthropic

async def main():
    # Create an environment instance using the gym module
    env = gym.make("OSWorld-Ubuntu")
    await env.wait_for_ready()
    
    # Load a TaskSet containing tasks
    taskset = hud.TaskSet.load("osworld_tasks")
    
    # Get the first task from the TaskSet
    task = taskset.tasks[0]
    
    # Initialize the environment with the task
    obs = await env.reset(task=task)
    print(f"Task: {obs.text}")
    
    # Initialize Claude Computer Use agent
    anthropic = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    agent = ClaudeAgent(anthropic)
    
    # Initialize adapter to interact with the environment
    cua_adapter = ClaudeAdapter()
    
    # Agent loop
    for i in range(8):  
        # Rescale screenshot to Claude's resolution
        screenshot = cua_adapter.rescale(obs.screenshot)
        
        # Agent's next action
        done, response = await agent.predict(screenshot, obs.text)
        if done:
            env.final_response = str(response)
            break
        
        # Convert to HUD action space
        actions = cua_adapter.adapt_list([response])
        print(f"Agent's action: {response}")
    
        # Step the environment forward
        obs, reward, terminated, info = await env.step(actions)
    
        # Drop out if terminated
        if terminated:
            break
        print(f"Step {i+1} completed")
        
    # Evaluate environment state
    result = await env.evaluate()
    print(f"Evaluation result: {result}")
    
    # Generate a trajectory video
    trajectory = await env.get_trajectory()
    video_url = await trajectory.get_video()
    print(f"Video available at: {video_url}")
    
    # Close when done
    await env.close()
    
if __name__ == "__main__":
    asyncio.run(main())

For a complete example of implementing a custom agent, check out the Custom Agent Guide.

Next Steps

  • Explore the Core Concepts to understand the SDK architecture
  • Check out the Example section for more detailed examples
  • Review the API Reference for comprehensive documentation