refactor(agent)!: Use Pydantic models for Agent process output (#7116)

* Introduce `BaseAgentActionProposal`, `OneShotAgentActionProposal`, and `AssistantThoughts` models to replace `ThoughtProcessResponse`, `DEFAULT_RESPONSE_SCHEMA`
* Refactor and clean up code because now we don't need to do as much type checking everywhere
* Tweak `OneShot` response format instruction

Granular:

* `autogpt.agents.prompt_strategies.one_shot`
  * Replace ThoughtProcessResponse`, `DEFAULT_RESPONSE_SCHEMA` and parsing logic by `AssistantThoughts` and `OneShotAgentActionProposal`
  * (TANGENTIAL) Move response format instruction into main system prompt message
  * (TANGENTIAL) Adjust response format instruction

* `autogpt.agents.base`
  * Add `BaseAgentActionProposal` base model -> replace `ThoughtProcessOutput`
  * Change signature of `execute` method to accept `BaseAgentActionProposal` instead of separate `command_name` and `command_args`
  * Add `do_not_execute(proposal, feedback)` abstract method, replacing `execute("human_feedback", ..., feedback)`

* Move `history` definition from `BaseAgentSettings` to `AgentSettings` (the only place where it's used anyway)

* `autogpt.models`
  * Add `.utils` > `ModelWithSummary` base model
  * Make the models in `.action_history` (`Episode`, `EpisodicActionHistory`) generic with a type parameter for the type of `Episode.action`

* `autogpt.core.resource.model_providers.schema`
  * Add `__str__` to `AssistantFunctionCall` which pretty-prints the function call

All other changes are a direct result of the changes above.

## BREAKING CHANGE:
* Due to the change in `autogpt.models.action_history`, the application after this change will be unable to load/resume agents from before this change and vice versa.
* The `additional_output` field in the response of `execute_step` has changed slightly:
  * Type of `.thoughts.plan` has changed from `str` to `list[str]`
  * `.command` -> `.use_tool`
  * `.command.args` -> `.use_tool.arguments`
This commit is contained in:
Reinier van der Leer 2024-05-02 00:43:11 +02:00 committed by GitHub
parent a7c7a5e18b
commit ada2e19829
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 307 additions and 440 deletions

View File

@ -1,4 +1,9 @@
from .agent import Agent
from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName
from .agent import Agent, OneShotAgentActionProposal
from .base import BaseAgent, BaseAgentActionProposal
__all__ = ["BaseAgent", "Agent", "CommandName", "CommandArgs", "AgentThoughts"]
__all__ = [
"BaseAgent",
"Agent",
"BaseAgentActionProposal",
"OneShotAgentActionProposal",
]

View File

@ -8,7 +8,6 @@ from typing import TYPE_CHECKING, Optional
import sentry_sdk
from pydantic import Field
from autogpt.agents.prompt_strategies.one_shot import OneShotAgentPromptStrategy
from autogpt.commands.execute_code import CodeExecutorComponent
from autogpt.commands.git_operations import GitOperationsComponent
from autogpt.commands.image_gen import ImageGeneratorComponent
@ -19,9 +18,11 @@ from autogpt.commands.web_selenium import WebSeleniumComponent
from autogpt.components.event_history import EventHistoryComponent
from autogpt.core.configuration import Configurable
from autogpt.core.prompting import ChatPrompt
from autogpt.core.resource.model_providers import ChatMessage, ChatModelProvider
from autogpt.core.resource.model_providers.schema import (
from autogpt.core.resource.model_providers import (
AssistantChatMessage,
AssistantFunctionCall,
ChatMessage,
ChatModelProvider,
ChatModelResponse,
)
from autogpt.core.runner.client_lib.logging.helpers import dump_prompt
@ -33,12 +34,12 @@ from autogpt.logs.log_cycle import (
USER_INPUT_FILE_NAME,
LogCycleHandler,
)
from autogpt.logs.utils import fmt_kwargs
from autogpt.models.action_history import (
ActionErrorResult,
ActionInterruptedByHuman,
ActionResult,
ActionSuccessResult,
EpisodicActionHistory,
)
from autogpt.models.command import Command, CommandOutput
from autogpt.utils.exceptions import (
@ -49,15 +50,14 @@ from autogpt.utils.exceptions import (
UnknownCommandError,
)
from .base import (
BaseAgent,
BaseAgentConfiguration,
BaseAgentSettings,
ThoughtProcessOutput,
)
from .base import BaseAgent, BaseAgentConfiguration, BaseAgentSettings
from .features.agent_file_manager import FileManagerComponent
from .features.context import ContextComponent
from .features.watchdog import WatchdogComponent
from .prompt_strategies.one_shot import (
OneShotAgentActionProposal,
OneShotAgentPromptStrategy,
)
from .protocols import (
AfterExecute,
AfterParse,
@ -79,6 +79,11 @@ class AgentConfiguration(BaseAgentConfiguration):
class AgentSettings(BaseAgentSettings):
config: AgentConfiguration = Field(default_factory=AgentConfiguration)
history: EpisodicActionHistory[OneShotAgentActionProposal] = Field(
default_factory=EpisodicActionHistory[OneShotAgentActionProposal]
)
"""(STATE) The action history of the agent."""
class Agent(BaseAgent, Configurable[AgentSettings]):
default_settings: AgentSettings = AgentSettings(
@ -137,7 +142,7 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
self.event_history = settings.history
self.legacy_config = legacy_config
async def propose_action(self) -> ThoughtProcessOutput:
async def propose_action(self) -> OneShotAgentActionProposal:
"""Proposes the next action to execute, based on the task and current state.
Returns:
@ -188,12 +193,12 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
async def complete_and_parse(
self, prompt: ChatPrompt, exception: Optional[Exception] = None
) -> ThoughtProcessOutput:
) -> OneShotAgentActionProposal:
if exception:
prompt.messages.append(ChatMessage.system(f"Error: {exception}"))
response: ChatModelResponse[
ThoughtProcessOutput
OneShotAgentActionProposal
] = await self.llm_provider.create_chat_completion(
prompt.messages,
model_name=self.llm.name,
@ -210,7 +215,7 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
result.thoughts,
result.thoughts.dict(),
NEXT_ACTION_FILE_NAME,
)
@ -220,13 +225,13 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
def parse_and_validate_response(
self, llm_response: AssistantChatMessage
) -> ThoughtProcessOutput:
) -> OneShotAgentActionProposal:
parsed_response = self.prompt_strategy.parse_response_content(llm_response)
# Validate command arguments
command_name = parsed_response.command_name
command_name = parsed_response.use_tool.name
command = self._get_command(command_name)
if arg_errors := command.validate_args(parsed_response.command_args)[1]:
if arg_errors := command.validate_args(parsed_response.use_tool.arguments)[1]:
fmt_errors = [
f"{'.'.join(str(p) for p in f.path)}: {f.message}"
if f.path
@ -242,74 +247,70 @@ class Agent(BaseAgent, Configurable[AgentSettings]):
async def execute(
self,
command_name: str,
command_args: dict[str, str] = {},
user_input: str = "",
proposal: OneShotAgentActionProposal,
user_feedback: str = "",
) -> ActionResult:
result: ActionResult
tool = proposal.use_tool
if command_name == "human_feedback":
result = ActionInterruptedByHuman(feedback=user_input)
self.log_cycle_handler.log_cycle(
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
user_input,
USER_INPUT_FILE_NAME,
# Get commands
self.commands = await self.run_pipeline(CommandProvider.get_commands)
self._remove_disabled_commands()
try:
return_value = await self._execute_tool(tool)
result = ActionSuccessResult(outputs=return_value)
except AgentTerminated:
raise
except AgentException as e:
result = ActionErrorResult.from_exception(e)
logger.warning(f"{tool} raised an error: {e}")
sentry_sdk.capture_exception(e)
result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name)
if result_tlength > self.send_token_limit // 3:
result = ActionErrorResult(
reason=f"Command {tool.name} returned too much output. "
"Do not execute this command again with the same arguments."
)
else:
# Get commands
self.commands = await self.run_pipeline(CommandProvider.get_commands)
self._remove_disabled_commands()
try:
return_value = await self._execute_command(
command_name=command_name,
arguments=command_args,
)
result = ActionSuccessResult(outputs=return_value)
except AgentTerminated:
raise
except AgentException as e:
result = ActionErrorResult.from_exception(e)
logger.warning(
f"{command_name}({fmt_kwargs(command_args)}) raised an error: {e}"
)
sentry_sdk.capture_exception(e)
result_tlength = self.llm_provider.count_tokens(str(result), self.llm.name)
if result_tlength > self.send_token_limit // 3:
result = ActionErrorResult(
reason=f"Command {command_name} returned too much output. "
"Do not execute this command again with the same arguments."
)
await self.run_pipeline(AfterExecute.after_execute, result)
logger.debug("\n".join(self.trace))
return result
async def _execute_command(
self,
command_name: str,
arguments: dict[str, str],
) -> CommandOutput:
async def do_not_execute(
self, denied_proposal: OneShotAgentActionProposal, user_feedback: str
) -> ActionResult:
result = ActionInterruptedByHuman(feedback=user_feedback)
self.log_cycle_handler.log_cycle(
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
user_feedback,
USER_INPUT_FILE_NAME,
)
await self.run_pipeline(AfterExecute.after_execute, result)
logger.debug("\n".join(self.trace))
return result
async def _execute_tool(self, tool_call: AssistantFunctionCall) -> CommandOutput:
"""Execute the command and return the result
Args:
command_name (str): The name of the command to execute
arguments (dict): The arguments for the command
tool_call (AssistantFunctionCall): The tool call to execute
Returns:
str: The result of the command
str: The execution result
"""
# Execute a native command with the same name or alias, if it exists
command = self._get_command(command_name)
command = self._get_command(tool_call.name)
try:
result = command(**arguments)
result = command(**tool_call.arguments)
if inspect.isawaitable(result):
return await result
return result

View File

@ -22,6 +22,7 @@ if TYPE_CHECKING:
from autogpt.core.resource.model_providers.schema import (
ChatModelInfo,
)
from autogpt.models.action_history import ActionResult
from autogpt.agents import protocols as _protocols
from autogpt.agents.components import (
@ -38,11 +39,12 @@ from autogpt.core.configuration import (
SystemSettings,
UserConfigurable,
)
from autogpt.core.resource.model_providers import AssistantFunctionCall
from autogpt.core.resource.model_providers.openai import (
OPEN_AI_CHAT_MODELS,
OpenAIModelName,
)
from autogpt.models.action_history import ActionResult, EpisodicActionHistory
from autogpt.models.utils import ModelWithSummary
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
logger = logging.getLogger(__name__)
@ -50,10 +52,6 @@ logger = logging.getLogger(__name__)
T = TypeVar("T")
P = ParamSpec("P")
CommandName = str
CommandArgs = dict[str, str]
AgentThoughts = dict[str, Any]
class BaseAgentConfiguration(SystemConfiguration):
allow_fs_access: bool = UserConfigurable(default=False)
@ -131,9 +129,6 @@ class BaseAgentSettings(SystemSettings):
config: BaseAgentConfiguration = Field(default_factory=BaseAgentConfiguration)
"""The configuration for this BaseAgent subsystem instance."""
history: EpisodicActionHistory = Field(default_factory=EpisodicActionHistory)
"""(STATE) The action history of the agent."""
class AgentMeta(ABCMeta):
def __call__(cls, *args, **kwargs):
@ -144,13 +139,9 @@ class AgentMeta(ABCMeta):
return instance
class ThoughtProcessOutput(BaseModel):
command_name: str = ""
command_args: dict[str, Any] = Field(default_factory=dict)
thoughts: dict[str, Any] = Field(default_factory=dict)
def to_tuple(self) -> tuple[CommandName, CommandArgs, AgentThoughts]:
return self.command_name, self.command_args, self.thoughts
class BaseAgentActionProposal(BaseModel):
thoughts: str | ModelWithSummary
use_tool: AssistantFunctionCall = None
class BaseAgent(Configurable[BaseAgentSettings], metaclass=AgentMeta):
@ -190,15 +181,22 @@ class BaseAgent(Configurable[BaseAgentSettings], metaclass=AgentMeta):
return self.config.send_token_limit or self.llm.max_tokens * 3 // 4
@abstractmethod
async def propose_action(self) -> ThoughtProcessOutput:
async def propose_action(self) -> BaseAgentActionProposal:
...
@abstractmethod
async def execute(
self,
command_name: str,
command_args: dict[str, str] = {},
user_input: str = "",
proposal: BaseAgentActionProposal,
user_feedback: str = "",
) -> ActionResult:
...
@abstractmethod
async def do_not_execute(
self,
denied_proposal: BaseAgentActionProposal,
user_feedback: str,
) -> ActionResult:
...

View File

@ -1,13 +1,11 @@
import logging
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.agents.base import BaseAgentActionProposal, BaseAgentConfiguration
from autogpt.agents.components import ComponentSystemError
from autogpt.agents.features.context import ContextComponent
from autogpt.agents.protocols import AfterParse
from autogpt.models.action_history import EpisodicActionHistory
from ..base import BaseAgentConfiguration
logger = logging.getLogger(__name__)
@ -20,13 +18,15 @@ class WatchdogComponent(AfterParse):
run_after = [ContextComponent]
def __init__(
self, config: BaseAgentConfiguration, event_history: EpisodicActionHistory
self,
config: BaseAgentConfiguration,
event_history: EpisodicActionHistory[BaseAgentActionProposal],
):
self.config = config
self.event_history = event_history
self.revert_big_brain = False
def after_parse(self, result: ThoughtProcessOutput) -> None:
def after_parse(self, result: BaseAgentActionProposal) -> None:
if self.revert_big_brain:
self.config.big_brain = False
self.revert_big_brain = False
@ -38,18 +38,18 @@ class WatchdogComponent(AfterParse):
previous_cycle = self.event_history.episodes[
self.event_history.cursor - 1
]
previous_command = previous_cycle.action.name
previous_command_args = previous_cycle.action.args
previous_command = previous_cycle.action.use_tool.name
previous_command_args = previous_cycle.action.use_tool.arguments
rethink_reason = ""
if not result.command_name:
if not result.use_tool:
rethink_reason = "AI did not specify a command"
elif (
result.command_name == previous_command
and result.command_args == previous_command_args
result.use_tool.name == previous_command
and result.use_tool.arguments == previous_command_args
):
rethink_reason = f"Repititive command detected ({result.command_name})"
rethink_reason = f"Repititive command detected ({result.use_tool.name})"
if rethink_reason:
logger.info(f"{rethink_reason}, re-thinking with SMART_LLM...")

View File

@ -6,8 +6,9 @@ import re
from logging import Logger
import distro
from pydantic import Field
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.agents.base import BaseAgentActionProposal
from autogpt.config import AIDirectives, AIProfile
from autogpt.core.configuration.schema import SystemConfiguration, UserConfigurable
from autogpt.core.prompting import (
@ -22,9 +23,32 @@ from autogpt.core.resource.model_providers.schema import (
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import extract_dict_from_json
from autogpt.models.utils import ModelWithSummary
from autogpt.prompts.utils import format_numbered_list
from autogpt.utils.exceptions import InvalidAgentResponseError
_RESPONSE_INTERFACE_NAME = "AssistantResponse"
class AssistantThoughts(ModelWithSummary):
observations: str = Field(
..., description="Relevant observations from your last action (if any)"
)
text: str = Field(..., description="Thoughts")
reasoning: str = Field(..., description="Reasoning behind the thoughts")
self_criticism: str = Field(..., description="Constructive self-criticism")
plan: list[str] = Field(
..., description="Short list that conveys the long-term plan"
)
speak: str = Field(..., description="Summary of thoughts, to say to user")
def summary(self) -> str:
return self.text
class OneShotAgentActionProposal(BaseAgentActionProposal):
thoughts: AssistantThoughts
class OneShotAgentPromptConfiguration(SystemConfiguration):
DEFAULT_BODY_TEMPLATE: str = (
@ -51,70 +75,7 @@ class OneShotAgentPromptConfiguration(SystemConfiguration):
"and respond using the JSON schema specified previously:"
)
DEFAULT_RESPONSE_SCHEMA = JSONSchema(
type=JSONSchema.Type.OBJECT,
properties={
"thoughts": JSONSchema(
type=JSONSchema.Type.OBJECT,
required=True,
properties={
"observations": JSONSchema(
description=(
"Relevant observations from your last action (if any)"
),
type=JSONSchema.Type.STRING,
required=False,
),
"text": JSONSchema(
description="Thoughts",
type=JSONSchema.Type.STRING,
required=True,
),
"reasoning": JSONSchema(
type=JSONSchema.Type.STRING,
required=True,
),
"self_criticism": JSONSchema(
description="Constructive self-criticism",
type=JSONSchema.Type.STRING,
required=True,
),
"plan": JSONSchema(
description=(
"Short markdown-style bullet list that conveys the "
"long-term plan"
),
type=JSONSchema.Type.STRING,
required=True,
),
"speak": JSONSchema(
description="Summary of thoughts, to say to user",
type=JSONSchema.Type.STRING,
required=True,
),
},
),
"command": JSONSchema(
type=JSONSchema.Type.OBJECT,
required=True,
properties={
"name": JSONSchema(
type=JSONSchema.Type.STRING,
required=True,
),
"args": JSONSchema(
type=JSONSchema.Type.OBJECT,
required=True,
),
},
),
},
)
body_template: str = UserConfigurable(default=DEFAULT_BODY_TEMPLATE)
response_schema: dict = UserConfigurable(
default_factory=DEFAULT_RESPONSE_SCHEMA.to_dict
)
choose_action_instruction: str = UserConfigurable(
default=DEFAULT_CHOOSE_ACTION_INSTRUCTION
)
@ -139,7 +100,7 @@ class OneShotAgentPromptStrategy(PromptStrategy):
logger: Logger,
):
self.config = configuration
self.response_schema = JSONSchema.from_dict(configuration.response_schema)
self.response_schema = JSONSchema.from_dict(OneShotAgentActionProposal.schema())
self.logger = logger
@property
@ -168,19 +129,12 @@ class OneShotAgentPromptStrategy(PromptStrategy):
include_os_info=include_os_info,
)
user_task = f'"""{task}"""'
response_format_instr = self.response_format_instruction(
self.config.use_functions_api
)
messages.append(ChatMessage.system(response_format_instr))
final_instruction_msg = ChatMessage.user(self.config.choose_action_instruction)
prompt = ChatPrompt(
messages=[
ChatMessage.system(system_prompt),
ChatMessage.user(user_task),
ChatMessage.user(f'"""{task}"""'),
*messages,
final_instruction_msg,
],
@ -215,6 +169,10 @@ class OneShotAgentPromptStrategy(PromptStrategy):
" in the next message. Your job is to complete the task while following"
" your directives as given above, and terminate when your task is done."
]
+ [
"## RESPONSE FORMAT\n"
+ self.response_format_instruction(self.config.use_functions_api)
]
)
# Join non-empty parts together into paragraph format
@ -225,27 +183,21 @@ class OneShotAgentPromptStrategy(PromptStrategy):
if (
use_functions_api
and response_schema.properties
and "command" in response_schema.properties
and "use_tool" in response_schema.properties
):
del response_schema.properties["command"]
del response_schema.properties["use_tool"]
# Unindent for performance
response_format = re.sub(
r"\n\s+",
"\n",
response_schema.to_typescript_object_interface("Response"),
)
instruction = (
"Respond with pure JSON containing your thoughts, " "and invoke a tool."
if use_functions_api
else "Respond with pure JSON."
response_schema.to_typescript_object_interface(_RESPONSE_INTERFACE_NAME),
)
return (
f"{instruction} "
"The JSON object should be compatible with the TypeScript type `Response` "
f"from the following:\n{response_format}"
f"YOU MUST ALWAYS RESPOND WITH A JSON OBJECT OF THE FOLLOWING TYPE:\n"
f"{response_format}"
+ ("\n\nYOU MUST ALSO INVOKE A TOOL!" if use_functions_api else "")
)
def _generate_intro_prompt(self, ai_profile: AIProfile) -> list[str]:
@ -309,7 +261,7 @@ class OneShotAgentPromptStrategy(PromptStrategy):
def parse_response_content(
self,
response: AssistantChatMessage,
) -> ThoughtProcessOutput:
) -> OneShotAgentActionProposal:
if not response.content:
raise InvalidAgentResponseError("Assistant response has no text content")
@ -323,92 +275,13 @@ class OneShotAgentPromptStrategy(PromptStrategy):
)
assistant_reply_dict = extract_dict_from_json(response.content)
self.logger.debug(
"Validating object extracted from LLM response:\n"
"Parsing object extracted from LLM response:\n"
f"{json.dumps(assistant_reply_dict, indent=4)}"
)
response_schema = self.response_schema.copy(deep=True)
if (
self.config.use_functions_api
and response_schema.properties
and "command" in response_schema.properties
):
del response_schema.properties["command"]
_, errors = response_schema.validate_object(assistant_reply_dict)
if errors:
raise InvalidAgentResponseError(
"Validation of response failed:\n "
+ ";\n ".join([str(e) for e in errors])
)
# Get command name and arguments
command_name, arguments = extract_command(
assistant_reply_dict, response, self.config.use_functions_api
)
return ThoughtProcessOutput(
command_name=command_name,
command_args=arguments,
thoughts=assistant_reply_dict,
)
#############
# Utilities #
#############
def extract_command(
assistant_reply_json: dict,
assistant_reply: AssistantChatMessage,
use_openai_functions_api: bool,
) -> tuple[str, dict[str, str]]:
"""Parse the response and return the command name and arguments
Args:
assistant_reply_json (dict): The response object from the AI
assistant_reply (AssistantChatMessage): The model response from the AI
config (Config): The config object
Returns:
tuple: The command name and arguments
Raises:
json.decoder.JSONDecodeError: If the response is not valid JSON
Exception: If any other error occurs
"""
if use_openai_functions_api:
if not assistant_reply.tool_calls:
raise InvalidAgentResponseError("Assistant did not use any tools")
assistant_reply_json["command"] = {
"name": assistant_reply.tool_calls[0].function.name,
"args": assistant_reply.tool_calls[0].function.arguments,
}
try:
if not isinstance(assistant_reply_json, dict):
raise InvalidAgentResponseError(
f"The previous message sent was not a dictionary {assistant_reply_json}"
)
if "command" not in assistant_reply_json:
raise InvalidAgentResponseError("Missing 'command' object in JSON")
command = assistant_reply_json["command"]
if not isinstance(command, dict):
raise InvalidAgentResponseError("'command' object is not a dictionary")
if "name" not in command:
raise InvalidAgentResponseError("Missing 'name' field in 'command' object")
command_name = command["name"]
# Use an empty dictionary if 'args' field is not present in 'command' object
arguments = command.get("args", {})
return command_name, arguments
except json.decoder.JSONDecodeError:
raise InvalidAgentResponseError("Invalid JSON")
except Exception as e:
raise InvalidAgentResponseError(str(e))
parsed_response = OneShotAgentActionProposal.parse_obj(assistant_reply_dict)
if self.config.use_functions_api:
if not response.tool_calls:
raise InvalidAgentResponseError("Assistant did not use a tool")
parsed_response.use_tool = response.tool_calls[0].function
return parsed_response

View File

@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Iterator
from autogpt.agents.components import AgentComponent
if TYPE_CHECKING:
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.agents.base import BaseAgentActionProposal
from autogpt.core.resource.model_providers.schema import ChatMessage
from autogpt.models.action_history import ActionResult
from autogpt.models.command import Command
@ -35,7 +35,7 @@ class MessageProvider(AgentComponent):
class AfterParse(AgentComponent):
@abstractmethod
def after_parse(self, result: "ThoughtProcessOutput") -> None:
def after_parse(self, result: "BaseAgentActionProposal") -> None:
...

View File

@ -33,11 +33,9 @@ from autogpt.agent_factory.generators import generate_agent_for_task
from autogpt.agent_manager import AgentManager
from autogpt.app.utils import is_port_free
from autogpt.config import Config
from autogpt.core.resource.model_providers import ChatModelProvider
from autogpt.core.resource.model_providers import ChatModelProvider, ModelProviderBudget
from autogpt.core.resource.model_providers.openai import OpenAIProvider
from autogpt.core.resource.model_providers.schema import ModelProviderBudget
from autogpt.file_storage import FileStorage
from autogpt.logs.utils import fmt_kwargs
from autogpt.models.action_history import ActionErrorResult, ActionSuccessResult
from autogpt.utils.exceptions import AgentFinished
from autogpt.utils.utils import DEFAULT_ASK_COMMAND, DEFAULT_FINISH_COMMAND
@ -201,7 +199,7 @@ class AgentProtocolServer:
# To prevent this from interfering with the agent's process, we ignore the input
# of this first step request, and just generate the first step proposal.
is_init_step = not bool(agent.event_history)
execute_command, execute_command_args, execute_result = None, None, None
last_proposal, tool_result = None, None
execute_approved = False
# HACK: only for compatibility with AGBenchmark
@ -215,13 +213,11 @@ class AgentProtocolServer:
and agent.event_history.current_episode
and not agent.event_history.current_episode.result
):
execute_command = agent.event_history.current_episode.action.name
execute_command_args = agent.event_history.current_episode.action.args
last_proposal = agent.event_history.current_episode.action
execute_approved = not user_input
logger.debug(
f"Agent proposed command"
f" {execute_command}({fmt_kwargs(execute_command_args)})."
f"Agent proposed command {last_proposal.use_tool}."
f" User input/feedback: {repr(user_input)}"
)
@ -229,24 +225,25 @@ class AgentProtocolServer:
step = await self.db.create_step(
task_id=task_id,
input=step_request,
is_last=execute_command == DEFAULT_FINISH_COMMAND and execute_approved,
is_last=(
last_proposal is not None
and last_proposal.use_tool.name == DEFAULT_FINISH_COMMAND
and execute_approved
),
)
agent.llm_provider = self._get_task_llm_provider(task, step.step_id)
# Execute previously proposed action
if execute_command:
assert execute_command_args is not None
if last_proposal:
agent.file_manager.workspace.on_write_file = (
lambda path: self._on_agent_write_file(
task=task, step=step, relative_path=path
)
)
if execute_command == DEFAULT_ASK_COMMAND:
execute_result = ActionSuccessResult(outputs=user_input)
agent.event_history.register_result(execute_result)
elif not execute_command:
execute_result = None
if last_proposal.use_tool.name == DEFAULT_ASK_COMMAND:
tool_result = ActionSuccessResult(outputs=user_input)
agent.event_history.register_result(tool_result)
elif execute_approved:
step = await self.db.update_step(
task_id=task_id,
@ -256,10 +253,7 @@ class AgentProtocolServer:
try:
# Execute previously proposed action
execute_result = await agent.execute(
command_name=execute_command,
command_args=execute_command_args,
)
tool_result = await agent.execute(last_proposal)
except AgentFinished:
additional_output = {}
task_total_cost = agent.llm_provider.get_incurred_cost()
@ -273,25 +267,20 @@ class AgentProtocolServer:
step = await self.db.update_step(
task_id=task_id,
step_id=step.step_id,
output=execute_command_args["reason"],
output=last_proposal.use_tool.arguments["reason"],
additional_output=additional_output,
)
await agent.file_manager.save_state()
return step
else:
assert user_input
execute_result = await agent.execute(
command_name="human_feedback", # HACK
command_args={},
user_input=user_input,
)
tool_result = await agent.do_not_execute(last_proposal, user_input)
# Propose next action
try:
next_command, next_command_args, raw_output = (
await agent.propose_action()
).to_tuple()
logger.debug(f"AI output: {raw_output}")
assistant_response = await agent.propose_action()
next_tool_to_use = assistant_response.use_tool
logger.debug(f"AI output: {assistant_response.thoughts}")
except Exception as e:
step = await self.db.update_step(
task_id=task_id,
@ -304,44 +293,44 @@ class AgentProtocolServer:
# Format step output
output = (
(
f"`{execute_command}({fmt_kwargs(execute_command_args)})` returned:"
+ ("\n\n" if "\n" in str(execute_result) else " ")
+ f"{execute_result}\n\n"
f"`{last_proposal.use_tool}` returned:"
+ ("\n\n" if "\n" in str(tool_result) else " ")
+ f"{tool_result}\n\n"
)
if execute_command_args and execute_command != DEFAULT_ASK_COMMAND
if last_proposal and last_proposal.use_tool.name != DEFAULT_ASK_COMMAND
else ""
)
output += f"{raw_output['thoughts']['speak']}\n\n"
output += f"{assistant_response.thoughts.speak}\n\n"
output += (
f"Next Command: {next_command}({fmt_kwargs(next_command_args)})"
if next_command != DEFAULT_ASK_COMMAND
else next_command_args["question"]
f"Next Command: {next_tool_to_use}"
if next_tool_to_use.name != DEFAULT_ASK_COMMAND
else next_tool_to_use.arguments["question"]
)
additional_output = {
**(
{
"last_action": {
"name": execute_command,
"args": execute_command_args,
"name": last_proposal.use_tool.name,
"args": last_proposal.use_tool.arguments,
"result": (
""
if execute_result is None
if tool_result is None
else (
orjson.loads(execute_result.json())
if not isinstance(execute_result, ActionErrorResult)
orjson.loads(tool_result.json())
if not isinstance(tool_result, ActionErrorResult)
else {
"error": str(execute_result.error),
"reason": execute_result.reason,
"error": str(tool_result.error),
"reason": tool_result.reason,
}
)
),
},
}
if not is_init_step
if last_proposal and tool_result
else {}
),
**raw_output,
**assistant_response.dict(),
}
task_cumulative_cost = agent.llm_provider.get_incurred_cost()

View File

@ -18,11 +18,12 @@ from forge.sdk.db import AgentDB
if TYPE_CHECKING:
from autogpt.agents.agent import Agent
from autogpt.agents.base import BaseAgentActionProposal
from autogpt.agent_factory.configurators import configure_agent_with_state, create_agent
from autogpt.agent_factory.profile_generator import generate_agent_profile_for_task
from autogpt.agent_manager import AgentManager
from autogpt.agents import AgentThoughts, CommandArgs, CommandName
from autogpt.agents.prompt_strategies.one_shot import AssistantThoughts
from autogpt.commands.execute_code import (
is_docker_available,
we_are_running_in_a_docker_container,
@ -40,6 +41,7 @@ from autogpt.file_storage import FileStorageBackendName, get_storage
from autogpt.logs.config import configure_logging
from autogpt.logs.helpers import print_attribute, speak
from autogpt.models.action_history import ActionInterruptedByHuman
from autogpt.models.utils import ModelWithSummary
from autogpt.utils.exceptions import AgentTerminated, InvalidAgentResponseError
from autogpt.utils.utils import DEFAULT_FINISH_COMMAND
@ -227,13 +229,12 @@ async def run_auto_gpt(
)
if (
agent.event_history.current_episode
and agent.event_history.current_episode.action.name
== DEFAULT_FINISH_COMMAND
and not agent.event_history.current_episode.result
(current_episode := agent.event_history.current_episode)
and current_episode.action.use_tool.name == DEFAULT_FINISH_COMMAND
and not current_episode.result
):
# Agent was resumed after `finish` -> rewrite result of `finish` action
finish_reason = agent.event_history.current_episode.action.args["reason"]
finish_reason = current_episode.action.use_tool.arguments["reason"]
print(f"Agent previously self-terminated; reason: '{finish_reason}'")
new_assignment = clean_input(
config, "Please give a follow-up question or assignment:"
@ -531,11 +532,7 @@ async def run_interaction_loop(
# Have the agent determine the next action to take.
with spinner:
try:
(
command_name,
command_args,
assistant_reply_dict,
) = (await agent.propose_action()).to_tuple()
action_proposal = await agent.propose_action()
except InvalidAgentResponseError as e:
logger.warning(f"The agent's thoughts could not be parsed: {e}")
consecutive_failures += 1
@ -558,9 +555,7 @@ async def run_interaction_loop(
# Print the assistant's thoughts and the next command to the user.
update_user(
ai_profile,
command_name,
command_args,
assistant_reply_dict,
action_proposal,
speak_mode=legacy_config.tts_config.speak_mode,
)
@ -569,12 +564,12 @@ async def run_interaction_loop(
##################
handle_stop_signal()
if cycles_remaining == 1: # Last cycle
user_feedback, user_input, new_cycles_remaining = await get_user_feedback(
feedback_type, feedback, new_cycles_remaining = await get_user_feedback(
legacy_config,
ai_profile,
)
if user_feedback == UserFeedback.AUTHORIZE:
if feedback_type == UserFeedback.AUTHORIZE:
if new_cycles_remaining is not None:
# Case 1: User is altering the cycle budget.
if cycle_budget > 1:
@ -598,13 +593,13 @@ async def run_interaction_loop(
"-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=",
extra={"color": Fore.MAGENTA},
)
elif user_feedback == UserFeedback.EXIT:
elif feedback_type == UserFeedback.EXIT:
logger.warning("Exiting...")
exit()
else: # user_feedback == UserFeedback.TEXT
command_name = "human_feedback"
pass
else:
user_input = ""
feedback = ""
# First log new-line so user can differentiate sections better in console
print()
if cycles_remaining != math.inf:
@ -619,33 +614,31 @@ async def run_interaction_loop(
# Decrement the cycle counter first to reduce the likelihood of a SIGINT
# happening during command execution, setting the cycles remaining to 1,
# and then having the decrement set it to 0, exiting the application.
if command_name != "human_feedback":
if not feedback:
cycles_remaining -= 1
if not command_name:
if not action_proposal.use_tool:
continue
handle_stop_signal()
if command_name:
result = await agent.execute(command_name, command_args, user_input)
if not feedback:
result = await agent.execute(action_proposal)
else:
result = await agent.do_not_execute(action_proposal, feedback)
if result.status == "success":
logger.info(
result, extra={"title": "SYSTEM:", "title_color": Fore.YELLOW}
)
elif result.status == "error":
logger.warning(
f"Command {command_name} returned an error: "
f"{result.error or result.reason}"
)
if result.status == "success":
logger.info(result, extra={"title": "SYSTEM:", "title_color": Fore.YELLOW})
elif result.status == "error":
logger.warning(
f"Command {action_proposal.use_tool.name} returned an error: "
f"{result.error or result.reason}"
)
def update_user(
ai_profile: AIProfile,
command_name: CommandName,
command_args: CommandArgs,
assistant_reply_dict: AgentThoughts,
action_proposal: "BaseAgentActionProposal",
speak_mode: bool = False,
) -> None:
"""Prints the assistant's thoughts and the next command to the user.
@ -661,18 +654,19 @@ def update_user(
print_assistant_thoughts(
ai_name=ai_profile.ai_name,
assistant_reply_json_valid=assistant_reply_dict,
thoughts=action_proposal.thoughts,
speak_mode=speak_mode,
)
if speak_mode:
speak(f"I want to execute {command_name}")
speak(f"I want to execute {action_proposal.use_tool.name}")
# First log new-line so user can differentiate sections better in console
print()
safe_tool_name = remove_ansi_escape(action_proposal.use_tool.name)
logger.info(
f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL} "
f"ARGUMENTS = {Fore.CYAN}{command_args}{Style.RESET_ALL}",
f"COMMAND = {Fore.CYAN}{safe_tool_name}{Style.RESET_ALL} "
f"ARGUMENTS = {Fore.CYAN}{action_proposal.use_tool.arguments}{Style.RESET_ALL}",
extra={
"title": "NEXT ACTION:",
"title_color": Fore.CYAN,
@ -741,56 +735,59 @@ async def get_user_feedback(
def print_assistant_thoughts(
ai_name: str,
assistant_reply_json_valid: dict,
thoughts: str | ModelWithSummary | AssistantThoughts,
speak_mode: bool = False,
) -> None:
logger = logging.getLogger(__name__)
assistant_thoughts_reasoning = None
assistant_thoughts_plan = None
assistant_thoughts_speak = None
assistant_thoughts_criticism = None
assistant_thoughts = assistant_reply_json_valid.get("thoughts", {})
assistant_thoughts_text = remove_ansi_escape(assistant_thoughts.get("text", ""))
if assistant_thoughts:
assistant_thoughts_reasoning = remove_ansi_escape(
assistant_thoughts.get("reasoning", "")
)
assistant_thoughts_plan = remove_ansi_escape(assistant_thoughts.get("plan", ""))
assistant_thoughts_criticism = remove_ansi_escape(
assistant_thoughts.get("self_criticism", "")
)
assistant_thoughts_speak = remove_ansi_escape(
assistant_thoughts.get("speak", "")
)
print_attribute(
f"{ai_name.upper()} THOUGHTS", assistant_thoughts_text, title_color=Fore.YELLOW
thoughts_text = remove_ansi_escape(
thoughts.text
if isinstance(thoughts, AssistantThoughts)
else thoughts.summary()
if isinstance(thoughts, ModelWithSummary)
else thoughts
)
print_attribute("REASONING", assistant_thoughts_reasoning, title_color=Fore.YELLOW)
if assistant_thoughts_plan:
print_attribute("PLAN", "", title_color=Fore.YELLOW)
# If it's a list, join it into a string
if isinstance(assistant_thoughts_plan, list):
assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
elif isinstance(assistant_thoughts_plan, dict):
assistant_thoughts_plan = str(assistant_thoughts_plan)
# Split the input_string using the newline character and dashes
lines = assistant_thoughts_plan.split("\n")
for line in lines:
line = line.lstrip("- ")
logger.info(line.strip(), extra={"title": "- ", "title_color": Fore.GREEN})
print_attribute(
"CRITICISM", f"{assistant_thoughts_criticism}", title_color=Fore.YELLOW
f"{ai_name.upper()} THOUGHTS", thoughts_text, title_color=Fore.YELLOW
)
# Speak the assistant's thoughts
if assistant_thoughts_speak:
if speak_mode:
speak(assistant_thoughts_speak)
else:
print_attribute("SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW)
if isinstance(thoughts, AssistantThoughts):
print_attribute(
"REASONING", remove_ansi_escape(thoughts.reasoning), title_color=Fore.YELLOW
)
if assistant_thoughts_plan := remove_ansi_escape(
"\n".join(f"- {p}" for p in thoughts.plan)
):
print_attribute("PLAN", "", title_color=Fore.YELLOW)
# If it's a list, join it into a string
if isinstance(assistant_thoughts_plan, list):
assistant_thoughts_plan = "\n".join(assistant_thoughts_plan)
elif isinstance(assistant_thoughts_plan, dict):
assistant_thoughts_plan = str(assistant_thoughts_plan)
# Split the input_string using the newline character and dashes
lines = assistant_thoughts_plan.split("\n")
for line in lines:
line = line.lstrip("- ")
logger.info(
line.strip(), extra={"title": "- ", "title_color": Fore.GREEN}
)
print_attribute(
"CRITICISM",
remove_ansi_escape(thoughts.self_criticism),
title_color=Fore.YELLOW,
)
# Speak the assistant's thoughts
if assistant_thoughts_speak := remove_ansi_escape(thoughts.speak):
if speak_mode:
speak(assistant_thoughts_speak)
else:
print_attribute(
"SPEAK", assistant_thoughts_speak, title_color=Fore.YELLOW
)
else:
speak(thoughts_text)
def remove_ansi_escape(s: str) -> str:

View File

@ -1,12 +1,11 @@
from typing import Callable, Iterator, Optional
from typing import Callable, Generic, Iterator, Optional
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.agents.features.watchdog import WatchdogComponent
from autogpt.agents.protocols import AfterExecute, AfterParse, MessageProvider
from autogpt.config.config import Config
from autogpt.core.resource.model_providers.schema import ChatMessage, ChatModelProvider
from autogpt.models.action_history import (
Action,
AP,
ActionResult,
Episode,
EpisodicActionHistory,
@ -14,14 +13,14 @@ from autogpt.models.action_history import (
from autogpt.prompts.utils import indent
class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute):
class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute, Generic[AP]):
"""Keeps track of the event history and provides a summary of the steps."""
run_after = [WatchdogComponent]
def __init__(
self,
event_history: EpisodicActionHistory,
event_history: EpisodicActionHistory[AP],
max_tokens: int,
count_tokens: Callable[[str], int],
legacy_config: Config,
@ -41,15 +40,8 @@ class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute):
):
yield ChatMessage.system(f"## Progress on your Task so far\n\n{progress}")
def after_parse(self, result: ThoughtProcessOutput) -> None:
if result.command_name:
self.event_history.register_action(
Action(
name=result.command_name,
args=result.command_args,
reasoning=result.thoughts["thoughts"]["reasoning"],
)
)
def after_parse(self, result: AP) -> None:
self.event_history.register_action(result)
async def after_execute(self, result: ActionResult) -> None:
self.event_history.register_result(result)

View File

@ -26,6 +26,7 @@ from autogpt.core.resource.schema import (
ResourceType,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.logs.utils import fmt_kwargs
class ModelProviderService(str, enum.Enum):
@ -72,6 +73,9 @@ class AssistantFunctionCall(BaseModel):
name: str
arguments: dict[str, Any]
def __str__(self) -> str:
return f"{self.name}({fmt_kwargs(self.arguments)})"
class AssistantFunctionCallDict(TypedDict):
name: str

View File

@ -1,10 +1,13 @@
from __future__ import annotations
import asyncio
from typing import TYPE_CHECKING, Any, Iterator, Literal, Optional
from typing import TYPE_CHECKING, Any, Generic, Iterator, Literal, Optional, TypeVar
from pydantic import BaseModel, Field
from pydantic.generics import GenericModel
from autogpt.agents.base import BaseAgentActionProposal
from autogpt.models.utils import ModelWithSummary
from autogpt.processing.text import summarize_text
from autogpt.prompts.utils import format_numbered_list, indent
@ -13,18 +16,6 @@ if TYPE_CHECKING:
from autogpt.core.resource.model_providers import ChatModelProvider
class Action(BaseModel):
name: str
args: dict[str, Any]
reasoning: str
def format_call(self) -> str:
return (
f"{self.name}"
f"({', '.join([f'{a}={repr(v)}' for a, v in self.args.items()])})"
)
class ActionSuccessResult(BaseModel):
outputs: Any
status: Literal["success"] = "success"
@ -86,15 +77,22 @@ class ActionInterruptedByHuman(BaseModel):
ActionResult = ActionSuccessResult | ActionErrorResult | ActionInterruptedByHuman
AP = TypeVar("AP", bound=BaseAgentActionProposal)
class Episode(BaseModel):
action: Action
class Episode(GenericModel, Generic[AP]):
action: AP
result: ActionResult | None
summary: str | None = None
def format(self):
step = f"Executed `{self.action.format_call()}`\n"
step += f'- **Reasoning:** "{self.action.reasoning}"\n'
step = f"Executed `{self.action.use_tool}`\n"
reasoning = (
_r.summary()
if isinstance(_r := self.action.thoughts, ModelWithSummary)
else _r
)
step += f'- **Reasoning:** "{reasoning}"\n'
step += (
"- **Status:** "
f"`{self.result.status if self.result else 'did_not_finish'}`\n"
@ -113,28 +111,28 @@ class Episode(BaseModel):
return step
def __str__(self) -> str:
executed_action = f"Executed `{self.action.format_call()}`"
executed_action = f"Executed `{self.action.use_tool}`"
action_result = f": {self.result}" if self.result else "."
return executed_action + action_result
class EpisodicActionHistory(BaseModel):
class EpisodicActionHistory(GenericModel, Generic[AP]):
"""Utility container for an action history"""
episodes: list[Episode] = Field(default_factory=list)
episodes: list[Episode[AP]] = Field(default_factory=list)
cursor: int = 0
_lock = asyncio.Lock()
@property
def current_episode(self) -> Episode | None:
def current_episode(self) -> Episode[AP] | None:
if self.cursor == len(self):
return None
return self[self.cursor]
def __getitem__(self, key: int) -> Episode:
def __getitem__(self, key: int) -> Episode[AP]:
return self.episodes[key]
def __iter__(self) -> Iterator[Episode]:
def __iter__(self) -> Iterator[Episode[AP]]:
return iter(self.episodes)
def __len__(self) -> int:
@ -143,7 +141,7 @@ class EpisodicActionHistory(BaseModel):
def __bool__(self) -> bool:
return len(self.episodes) > 0
def register_action(self, action: Action) -> None:
def register_action(self, action: AP) -> None:
if not self.current_episode:
self.episodes.append(Episode(action=action, result=None))
assert self.current_episode

View File

@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
from pydantic import BaseModel
class ModelWithSummary(BaseModel, ABC):
@abstractmethod
def summary(self) -> str:
"""Should produce a human readable summary of the model content."""
pass