From e4aeb391c1c5112b16f12ccc2ed2812679c661a1 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 17:53:10 +0530 Subject: [PATCH 01/40] user deny instructions --- src/wcgw/client/mcp_server/server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index e038597..ead593f 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -302,6 +302,8 @@ def try_json(x: str) -> Any: Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd. Always write production ready, syntactically correct code. + +Important note: as soon as you encounter "The user has chosen to disallow the tool call.", immediately stop doing everything and ask user for the reason. """ content.append(types.TextContent(type="text", text=output_or_done)) From 4958d26c1e60336794fe5fe0594aafbb8dc4d7ec Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 18:04:04 +0530 Subject: [PATCH 02/40] Initial plan set up --- src/wcgw/client/modes.py | 32 ++++++++++++++++++++++++++++++++ src/wcgw/types_.py | 20 ++++++++++++++------ 2 files changed, 46 insertions(+), 6 deletions(-) create mode 100644 src/wcgw/client/modes.py diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py new file mode 100644 index 0000000..d753ffa --- /dev/null +++ b/src/wcgw/client/modes.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass +from typing import Literal, Protocol + +from ..types_ import Modes + + +@dataclass +class RestrictedCommands: + allowed_commands: list[str] + + +@dataclass +class RestrictedGlobs: + allowed_globs: list[str] + + +Skills = ( + Literal["file_edits", "write_new_files", "all_commands"] + | RestrictedCommands + | RestrictedGlobs +) + + +class ModeImpl(Protocol): + prompt: str + allowed_skills: set[Skills] + + +MODES_IMPL = dict[Modes, ModeImpl]() + + +# Add all modes' implementations here diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index 0c39128..c3e36bc 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -1,3 +1,4 @@ +from enum import Enum from typing import Literal, Optional, Sequence from pydantic import BaseModel as PydanticBaseModel @@ -11,6 +12,19 @@ class Config: BaseModel = NoExtraArgs +class Modes(Enum): + wcgw = "wcgw" + architect = "architect" + coder = "coder" + test_writer = "test_writer" + + +class Initialize(BaseModel): + any_workspace_path: str + initial_files_to_read: list[str] + task_id_to_resume: str + + class BashCommand(BaseModel): command: str wait_for_seconds: Optional[int] = None @@ -56,12 +70,6 @@ class FileEdit(BaseModel): file_edit_using_search_replace_blocks: str -class Initialize(BaseModel): - any_workspace_path: str - initial_files_to_read: list[str] - task_id_to_resume: str - - class GetScreenInfo(BaseModel): docker_image_id: str From 27ff7d96a8485c8181e75de585f0aaa5b90c6dbe Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 18:26:22 +0530 Subject: [PATCH 03/40] Initial plan update --- src/wcgw/client/modes.py | 55 ++++++++++++++++++++++++++++++++-------- src/wcgw/types_.py | 3 +-- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index d753ffa..32aa60a 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Literal, Protocol +from typing import Literal, NamedTuple from ..types_ import Modes @@ -14,19 +14,52 @@ class RestrictedGlobs: allowed_globs: list[str] -Skills = ( - Literal["file_edits", "write_new_files", "all_commands"] - | RestrictedCommands - | RestrictedGlobs -) +class BashCommandMode(NamedTuple): + bash_mode: Literal[ + "normal_mode", "restricted_mode" + ] # restricted_mode runs 'bash --restricted' + allowed_commands: ( + Literal["all"] | list[str] + ) # Allows all or a set of commands. Leave it empty to disable BashCommand. -class ModeImpl(Protocol): - prompt: str - allowed_skills: set[Skills] +class FileEditMode(NamedTuple): + allowed_globs: ( + Literal["all"] | list[str] + ) # Allows all or a set of globs. Leave it empty to disable FileEdit. + + +class WriteIfEmptyMode(NamedTuple): + allowed_globs: ( + Literal["all"] | list[str] + ) # Allows all or a set of globs. Leave it empty to disable WriteIfEmpty. -MODES_IMPL = dict[Modes, ModeImpl]() +@dataclass +class ModeImpl: + prompt: str + bash_command_mode: BashCommandMode + file_edit_mode: FileEditMode + write_if_empty_mode: WriteIfEmptyMode -# Add all modes' implementations here +DEFAULT_MODES: dict[Modes, ModeImpl] = { + Modes.wcgw: ModeImpl( + prompt="", + bash_command_mode=BashCommandMode("normal_mode", "all"), + write_if_empty_mode=WriteIfEmptyMode("all"), + file_edit_mode=FileEditMode("all"), + ), + Modes.architect: ModeImpl( + prompt="", + bash_command_mode=BashCommandMode("restricted_mode", "all"), + write_if_empty_mode=WriteIfEmptyMode([]), + file_edit_mode=FileEditMode([]), + ), + Modes.code_writer: ModeImpl( + prompt="", + bash_command_mode=BashCommandMode("restricted_mode", "all"), + write_if_empty_mode=WriteIfEmptyMode("all"), + file_edit_mode=FileEditMode("all"), + ), +} diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index c3e36bc..24b2201 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -15,8 +15,7 @@ class Config: class Modes(Enum): wcgw = "wcgw" architect = "architect" - coder = "coder" - test_writer = "test_writer" + code_writer = "code_writer" class Initialize(BaseModel): From c3c765358fa7632a236cd7885d9d54d8d9f697c4 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 18:37:03 +0530 Subject: [PATCH 04/40] Updated plan --- src/wcgw/client/modes.py | 9 +-------- src/wcgw/types_.py | 9 +++++++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 32aa60a..9f5c061 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -4,11 +4,6 @@ from ..types_ import Modes -@dataclass -class RestrictedCommands: - allowed_commands: list[str] - - @dataclass class RestrictedGlobs: allowed_globs: list[str] @@ -18,9 +13,7 @@ class BashCommandMode(NamedTuple): bash_mode: Literal[ "normal_mode", "restricted_mode" ] # restricted_mode runs 'bash --restricted' - allowed_commands: ( - Literal["all"] | list[str] - ) # Allows all or a set of commands. Leave it empty to disable BashCommand. + allowed_commands: Literal["all", "none"] # Allows all or none class FileEditMode(NamedTuple): diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index 24b2201..e882fef 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -18,10 +18,19 @@ class Modes(Enum): code_writer = "code_writer" +class CodeWriterMode: + allowed_globs: Literal["all"] | list[str] + allowed_commands: Literal["all"] | list[str] + + +ModesConfig = Literal["wcgw", "architect"] | CodeWriterMode + + class Initialize(BaseModel): any_workspace_path: str initial_files_to_read: list[str] task_id_to_resume: str + mode: ModesConfig = "wcgw" class BashCommand(BaseModel): From 7a1901ba344441e55a5b2c541906e8e8b20c24be Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 19:11:29 +0530 Subject: [PATCH 05/40] Relative globs --- src/wcgw/client/anthropic_client.py | 2 +- src/wcgw/client/openai_client.py | 2 +- src/wcgw/client/tools.py | 82 ++++++++++++++++++++++++++- src/wcgw/types_.py | 15 ++++- tests/client/tools/test_write_file.py | 6 ++ 5 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/wcgw/client/anthropic_client.py b/src/wcgw/client/anthropic_client.py index b5f9adf..8274898 100644 --- a/src/wcgw/client/anthropic_client.py +++ b/src/wcgw/client/anthropic_client.py @@ -282,7 +282,7 @@ def loop( ] initial_info = initialize( - os.getcwd(), [], resume if (memory and resume) else "", 8000 + os.getcwd(), [], resume if (memory and resume) else "", max_tokens=8000 ) system = f""" You're an expert software engineer with shell and code knowledge. diff --git a/src/wcgw/client/openai_client.py b/src/wcgw/client/openai_client.py index bc35fea..86715e5 100644 --- a/src/wcgw/client/openai_client.py +++ b/src/wcgw/client/openai_client.py @@ -227,7 +227,7 @@ def loop( ] initial_info = initialize( - os.getcwd(), [], resume if (memory and resume) else "", 8000 + os.getcwd(), [], resume if (memory and resume) else "", max_tokens=8000 ) system = f""" You're an expert software engineer with shell and code knowledge. diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 70f877b..27fea4d 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -1,5 +1,6 @@ import base64 import datetime +import fnmatch import glob import importlib.metadata import json @@ -39,12 +40,15 @@ from ..types_ import ( BashCommand, BashInteraction, + CodeWriterMode, ContextSave, FileEdit, FileEditFindReplace, GetScreenInfo, Initialize, Keyboard, + Modes, + ModesConfig, Mouse, ReadFiles, ReadImage, @@ -55,6 +59,13 @@ from .computer_use import run_computer_tool from .file_ops.search_replace import search_replace_edit from .memory import load_memory, save_memory +from .modes import ( + DEFAULT_MODES, + BashCommandMode, + FileEditMode, + ModeImpl, + WriteIfEmptyMode, +) from .repo_ops.repo_context import get_repo_context from .sys_utils import command_run @@ -125,8 +136,12 @@ def ask_confirmation(prompt: Confirmation) -> str: def start_shell() -> pexpect.spawn: # type: ignore try: + cmd = "/bin/bash" + if BASH_STATE.bash_command_mode.bash_mode == "restricted_mode": + cmd += " -r" + shell = pexpect.spawn( - "/bin/bash", + cmd, env={**os.environ, **{"PS1": PROMPT}}, # type: ignore[arg-type] echo=False, encoding="utf-8", @@ -204,8 +219,46 @@ def _ensure_env_and_bg_jobs(shell: pexpect.spawn) -> Optional[int]: # type: ign class BashState: def __init__(self) -> None: + self._bash_command_mode: BashCommandMode = BashCommandMode("normal_mode", "all") + self._file_edit_mode: FileEditMode = FileEditMode("all") + self._write_if_empty_mode: WriteIfEmptyMode = WriteIfEmptyMode("all") self._init() + @property + def bash_command_mode(self) -> BashCommandMode: + return self._bash_command_mode + + @property + def file_edit_mode(self) -> FileEditMode: + return self._file_edit_mode + + @property + def write_if_empty_mode(self) -> WriteIfEmptyMode: + return self._write_if_empty_mode + + def set_modes(self, mode: ModesConfig) -> None: + # First get default mode config + if isinstance(mode, str): + mode_impl = DEFAULT_MODES[Modes[mode]] # converts str to Modes enum + else: + # For CodeWriterMode, use code_writer as base and override + mode_impl = DEFAULT_MODES[Modes.code_writer] + # Override with custom settings from CodeWriterMode + mode_impl = ModeImpl( + prompt=mode_impl.prompt, + bash_command_mode=BashCommandMode( + mode_impl.bash_command_mode.bash_mode, + "all" if mode.allowed_commands == "all" else "none", + ), + file_edit_mode=FileEditMode(mode.allowed_globs), + write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs), + ) + + # Set the individual mode components + self._bash_command_mode = mode_impl.bash_command_mode + self._file_edit_mode = mode_impl.file_edit_mode + self._write_if_empty_mode = mode_impl.write_if_empty_mode + def _init(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" self._is_in_docker: Optional[str] = "" @@ -298,6 +351,7 @@ def initialize( read_files_: list[str], task_id_to_resume: str, max_tokens: Optional[int], + mode: ModesConfig, ) -> str: reset_shell() @@ -331,11 +385,20 @@ def initialize( BASH_STATE.update_cwd() repo_context = f"---\n# Workspace structure\n{repo_context}\n---\n" + + # update modes if they're relative + if isinstance(mode, CodeWriterMode): + mode.update_relative_globs(any_workspace_path) + else: + assert isinstance(mode, str) else: repo_context = ( f"\nInfo: Workspace path {any_workspace_path} does not exist\n" ) + # Set mode for the shell + BASH_STATE.set_modes(mode) + initial_files_context = "" if read_files_: initial_files = read_files(read_files_, max_tokens) @@ -472,6 +535,8 @@ def execute_bash( try: is_interrupt = False if isinstance(bash_arg, BashCommand): + if BASH_STATE.bash_command_mode.allowed_commands == "none": + return "Error: BashCommand not allowed in current mode", 0.0 updated_repl_mode = update_repl_prompt(bash_arg.command) if updated_repl_mode: BASH_STATE.set_repl() @@ -766,6 +831,13 @@ def write_file( path_ = expand_user(writefile.file_path, BASH_STATE.is_in_docker) error_on_exist_ = error_on_exist and path_ not in BASH_STATE.whitelist_for_overwrite + + # Validate using write_if_empty_mode after checking whitelist + allowed_globs = BASH_STATE.write_if_empty_mode.allowed_globs + if allowed_globs != "all" and not any( + fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs + ): + return "Error: File path not allowed in current mode" add_overwrite_warning = "" if not BASH_STATE.is_in_docker: if (error_on_exist or error_on_exist_) and os.path.exists(path_): @@ -897,6 +969,13 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int]) -> str: else: path_ = expand_user(fedit.file_path, BASH_STATE.is_in_docker) + # Validate using file_edit_mode + allowed_globs = BASH_STATE.file_edit_mode.allowed_globs + if allowed_globs != "all" and not any( + fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs + ): + raise Exception("Error: File path not allowed in current mode") + # The LLM is now aware that the file exists BASH_STATE.add_to_whitelist_for_overwrite(path_) @@ -1106,6 +1185,7 @@ def get_tool_output( arg.initial_files_to_read, arg.task_id_to_resume, max_tokens, + arg.mode, ), 0.0, ) diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index e882fef..bf3a697 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -1,5 +1,6 @@ +import os from enum import Enum -from typing import Literal, Optional, Sequence +from typing import Literal, Optional, Sequence, Union from pydantic import BaseModel as PydanticBaseModel @@ -18,12 +19,20 @@ class Modes(Enum): code_writer = "code_writer" -class CodeWriterMode: +class CodeWriterMode(BaseModel): allowed_globs: Literal["all"] | list[str] allowed_commands: Literal["all"] | list[str] + def update_relative_globs(self, workspace_root: str) -> None: + """Update globs if they're relative paths""" + if self.allowed_globs != "all": + self.allowed_globs = [ + glob if os.path.isabs(glob) else os.path.join(workspace_root, glob) + for glob in self.allowed_globs + ] -ModesConfig = Literal["wcgw", "architect"] | CodeWriterMode + +ModesConfig = Union[Literal["wcgw", "architect"], CodeWriterMode] class Initialize(BaseModel): diff --git a/tests/client/tools/test_write_file.py b/tests/client/tools/test_write_file.py index 1f1ebe5..f893a37 100644 --- a/tests/client/tools/test_write_file.py +++ b/tests/client/tools/test_write_file.py @@ -22,6 +22,8 @@ def test_write_file_success(self, mock_isabs, mock_exists, mock_bash_state, mock mock_exists.return_value = False mock_bash_state.is_in_docker = None mock_bash_state.whitelist_for_overwrite = set() + mock_bash_state.write_if_empty_mode = MagicMock() + mock_bash_state.write_if_empty_mode.allowed_globs = "all" # Setup Path mocking mock_path_instance = MagicMock(spec=Path) @@ -58,6 +60,8 @@ def test_write_file_with_existing_file(self, mock_isabs, mock_exists, mock_bash_ mock_exists.return_value = True mock_bash_state.is_in_docker = None mock_bash_state.whitelist_for_overwrite = set() + mock_bash_state.write_if_empty_mode = MagicMock() + mock_bash_state.write_if_empty_mode.allowed_globs = "all" # Setup Path with existing content mock_path_instance = MagicMock(spec=Path) @@ -93,6 +97,8 @@ def test_write_file_with_whitelist(self, mock_isabs, mock_exists, mock_bash_stat mock_exists.return_value = True mock_bash_state.is_in_docker = None mock_bash_state.whitelist_for_overwrite = {self.test_path} + mock_bash_state.write_if_empty_mode = MagicMock() + mock_bash_state.write_if_empty_mode.allowed_globs = "all" # Setup Path mocking mock_path_instance = MagicMock(spec=Path) From d3495b10fb8f54402689781763cce8d416acdf16 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 19:27:14 +0530 Subject: [PATCH 06/40] Not really any change just a minor refactor --- src/wcgw/client/mcp_server/server.py | 48 +++++++++++++++------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index ead593f..e1508bd 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -45,28 +45,13 @@ async def handle_read_resource(uri: AnyUrl) -> str: raise ValueError("No resources available") -@server.list_prompts() # type: ignore -async def handle_list_prompts() -> list[types.Prompt]: - return [ +PROMPTS = { + "KnowledgeTransfer": ( types.Prompt( name="KnowledgeTransfer", description="Prompt for invoking ContextSave tool in order to do a comprehensive knowledge transfer of a coding task. Prompts to save detailed error log and instructions.", - ) - ] - - -@server.get_prompt() # type: ignore -async def handle_get_prompt( - name: str, arguments: dict[str, str] | None -) -> types.GetPromptResult: - messages = [] - if name == "KnowledgeTransfer": - messages = [ - types.PromptMessage( - role="user", - content=types.TextContent( - type="text", - text="""Use `ContextSave` tool to do a knowledge transfer of the task in hand. + ), + """Use `ContextSave` tool to do a knowledge transfer of the task in hand. Write detailed description in order to do a KT. Save all information necessary for a person to understand the task and the problems. @@ -84,9 +69,28 @@ async def handle_get_prompt( (Note to self: this conversation can then be resumed later asking "Resume ``" which should call Initialize tool) """, - ), - ) - ] + ) +} + + +@server.list_prompts() # type: ignore +async def handle_list_prompts() -> list[types.Prompt]: + return [x[0] for x in PROMPTS.values()] + + +@server.get_prompt() # type: ignore +async def handle_get_prompt( + name: str, arguments: dict[str, str] | None +) -> types.GetPromptResult: + messages = [ + types.PromptMessage( + role="user", + content=types.TextContent( + type="text", + text=PROMPTS[name][1], + ), + ) + ] return types.GetPromptResult(messages=messages) From 9389dd1e9452664a18fff1f1a0217d4390e2a43f Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 19:53:09 +0530 Subject: [PATCH 07/40] Memory saving bash state --- src/wcgw/client/memory.py | 23 +++++++++++++++++++---- src/wcgw/client/modes.py | 37 ++++++++++++++++++++++++++----------- src/wcgw/client/tools.py | 39 +++++++++++++++++++++++++++++++++++---- 3 files changed, 80 insertions(+), 19 deletions(-) diff --git a/src/wcgw/client/memory.py b/src/wcgw/client/memory.py index ed4a403..dcd45dd 100644 --- a/src/wcgw/client/memory.py +++ b/src/wcgw/client/memory.py @@ -1,7 +1,8 @@ +import json import os import re import shlex -from typing import Callable, Optional +from typing import Any, Callable, Optional from ..types_ import ContextSave @@ -30,7 +31,7 @@ def format_memory(task_memory: ContextSave, relevant_files: str) -> str: return memory_data -def save_memory(task_memory: ContextSave, relevant_files: str) -> str: +def save_memory(task_memory: ContextSave, relevant_files: str, bash_state_dict: Optional[dict[str, Any]] = None) -> str: app_dir = get_app_dir_xdg() memory_dir = os.path.join(app_dir, "memory") os.makedirs(memory_dir, exist_ok=True) @@ -45,6 +46,12 @@ def save_memory(task_memory: ContextSave, relevant_files: str) -> str: with open(memory_file_full, "w") as f: f.write(memory_data) + # Save bash state if provided + if bash_state_dict is not None: + state_file = os.path.join(memory_dir, f"{task_id}_bash_state.json") + with open(state_file, "w") as f: + json.dump(bash_state_dict, f, indent=2) + return memory_file_full @@ -53,7 +60,7 @@ def load_memory[T]( max_tokens: Optional[int], encoder: Callable[[str], list[T]], decoder: Callable[[list[T]], str], -) -> tuple[str, str]: +) -> tuple[str, str, dict[str, Any]]: app_dir = get_app_dir_xdg() memory_dir = os.path.join(app_dir, "memory") memory_file = os.path.join(memory_dir, f"{task_id}.txt") @@ -75,4 +82,12 @@ def load_memory[T]( parsed_ = shlex.split(matched_path) if parsed_ and len(parsed_) == 1: project_root_path = parsed_[0] - return project_root_path, data + + # Try to load bash state if exists + state_file = os.path.join(memory_dir, f"{task_id}_bash_state.json") + bash_state = None + if os.path.exists(state_file): + with open(state_file) as f: + bash_state: dict[str, Any] = json.load(f) + + return project_root_path, data, bash_state diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 9f5c061..8bf1677 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Literal, NamedTuple +from typing import Any, Literal, NamedTuple from ..types_ import Modes @@ -10,22 +10,37 @@ class RestrictedGlobs: class BashCommandMode(NamedTuple): - bash_mode: Literal[ - "normal_mode", "restricted_mode" - ] # restricted_mode runs 'bash --restricted' - allowed_commands: Literal["all", "none"] # Allows all or none + bash_mode: Literal["normal_mode", "restricted_mode"] + allowed_commands: Literal["all", "none"] + + def serialize(self) -> dict[str, Any]: + return {"bash_mode": self.bash_mode, "allowed_commands": self.allowed_commands} + + @classmethod + def deserialize(cls, data: dict[str, Any])-> "BashCommandMode": + return cls(data["bash_mode"], data["allowed_commands"]) class FileEditMode(NamedTuple): - allowed_globs: ( - Literal["all"] | list[str] - ) # Allows all or a set of globs. Leave it empty to disable FileEdit. + allowed_globs: Literal["all"] | list[str] + + def serialize(self) -> dict[str, Any]: + return {"allowed_globs": self.allowed_globs} + + @classmethod + def deserialize(cls, data: dict[str, Any])-> "FileEditMode": + return cls(data["allowed_globs"]) class WriteIfEmptyMode(NamedTuple): - allowed_globs: ( - Literal["all"] | list[str] - ) # Allows all or a set of globs. Leave it empty to disable WriteIfEmpty. + allowed_globs: Literal["all"] | list[str] + + def serialize(self) -> dict[str, Any]: + return {"allowed_globs": self.allowed_globs} + + @classmethod + def deserialize(cls, data: dict[str, Any])-> "WriteIfEmptyMode": + return cls(data["allowed_globs"]) @dataclass diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 27fea4d..a2b3c02 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -15,6 +15,7 @@ from pathlib import Path from tempfile import NamedTemporaryFile, TemporaryDirectory from typing import ( + Any, Callable, Literal, Optional, @@ -315,6 +316,29 @@ def reset(self) -> None: self.shell.close(True) self._init() + def serialize(self) -> dict[str, Any]: + """Serialize BashState to a dictionary for saving""" + return { + "bash_command_mode": self._bash_command_mode.serialize(), + "file_edit_mode": self._file_edit_mode.serialize(), + "write_if_empty_mode": self._write_if_empty_mode.serialize(), + "whitelist_for_overwrite": list(self._whitelist_for_overwrite), + } + + @classmethod + def deserialize(cls, state: dict[str, Any]) -> "BashState": + """Create a new BashState instance from a serialized state dictionary""" + instance = cls() + instance._bash_command_mode = BashCommandMode.deserialize( + state["bash_command_mode"] + ) + instance._file_edit_mode = FileEditMode.deserialize(state["file_edit_mode"]) + instance._write_if_empty_mode = WriteIfEmptyMode.deserialize( + state["write_if_empty_mode"] + ) + instance._whitelist_for_overwrite = set(state["whitelist_for_overwrite"]) + return instance + def get_pending_for(self) -> str: if isinstance(self._state, datetime.datetime): timedelta = datetime.datetime.now() - self._state @@ -353,16 +377,17 @@ def initialize( max_tokens: Optional[int], mode: ModesConfig, ) -> str: - reset_shell() + global BASH_STATE # Expand the workspace path - any_workspace_path = expand_user(any_workspace_path, BASH_STATE.is_in_docker) + any_workspace_path = expand_user(any_workspace_path, None) repo_context = "" memory = "" + bash_state = None if task_id_to_resume: try: - project_root_path, task_mem = load_memory( + project_root_path, task_mem, bash_state = load_memory( task_id_to_resume, max_tokens, lambda x: default_enc.encode(x).ids, @@ -373,9 +398,15 @@ def initialize( not any_workspace_path or not os.path.exists(any_workspace_path) ) and os.path.exists(project_root_path): any_workspace_path = project_root_path + except Exception: memory = f'Error: Unable to load task with ID "{task_id_to_resume}" ' + # Restore bash state if available + if bash_state is not None: + BASH_STATE = BashState.deserialize(bash_state) + else: + reset_shell() if any_workspace_path: if os.path.exists(any_workspace_path): repo_context, folder_to_start = get_repo_context(any_workspace_path, 200) @@ -1238,7 +1269,7 @@ def get_tool_output( if not globs: warnings += f"Warning: No files found for the glob: {fglob}\n" relevant_files_data = read_files(relevant_files[:10_000], None) - output_ = save_memory(arg, relevant_files_data) + output_ = save_memory(arg, relevant_files_data, BASH_STATE.serialize()) if not relevant_files and arg.relevant_file_globs: output_ = f'Error: No files found for the given globs. Context file successfully saved at "{output_}", but please fix the error.' elif warnings: From 4135319d83af453c52123a1811bcdcc061c25d17 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 20:19:21 +0530 Subject: [PATCH 08/40] Modes update --- src/wcgw/client/modes.py | 49 ++++++++++++++++++++++++++++++++++------ src/wcgw/client/tools.py | 45 ++++++++++++++++++++++++++++-------- 2 files changed, 77 insertions(+), 17 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 8bf1677..cc70052 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -17,7 +17,7 @@ def serialize(self) -> dict[str, Any]: return {"bash_mode": self.bash_mode, "allowed_commands": self.allowed_commands} @classmethod - def deserialize(cls, data: dict[str, Any])-> "BashCommandMode": + def deserialize(cls, data: dict[str, Any]) -> "BashCommandMode": return cls(data["bash_mode"], data["allowed_commands"]) @@ -28,7 +28,7 @@ def serialize(self) -> dict[str, Any]: return {"allowed_globs": self.allowed_globs} @classmethod - def deserialize(cls, data: dict[str, Any])-> "FileEditMode": + def deserialize(cls, data: dict[str, Any]) -> "FileEditMode": return cls(data["allowed_globs"]) @@ -39,33 +39,68 @@ def serialize(self) -> dict[str, Any]: return {"allowed_globs": self.allowed_globs} @classmethod - def deserialize(cls, data: dict[str, Any])-> "WriteIfEmptyMode": + def deserialize(cls, data: dict[str, Any]) -> "WriteIfEmptyMode": return cls(data["allowed_globs"]) @dataclass class ModeImpl: - prompt: str bash_command_mode: BashCommandMode file_edit_mode: FileEditMode write_if_empty_mode: WriteIfEmptyMode +def code_writer_prompt( + allowed_globs: Literal["all"] | list[str], + allowed_commands: Literal["all"] | list[str], +) -> str: + base = """You have to run in "code_writer" mode. This means +""" + + path_prompt = """ + - You are allowed to create and update files in the provided repository only. + """ + + if allowed_globs != "all" and allowed_globs: + path_prompt = f""" +- You are allowed to create and update files in the following globs: {', '.join(allowed_globs)} +""" + base += path_prompt + + command_prompt = """ +- You are only allowed to run commands for project setup, code writing, testing, running and debugging related to the proejct. +- Do not run anything that adds or removes packages, changes system configuration or environment. +""" + if allowed_commands != "all": + command_prompt = f""" +- You are only allowed to run the following commands: {', '.join(allowed_commands)} +""" + + base += command_prompt + return base + + +ARCHITECT_PROMPT = """You have to run in "architect" mode. This means +- You are not allowed to edit or update any file. You are not allowed to create any file. +- You are not allowed to run any commands that may change disk, system configuration, packages or environment. Only read-only commands are allowed. +- Only run commands that allows you to explore the repository, understand the system or read anything of relevance. + +Your response should be in self-critique and brainstorm style. +- Read as many relevant files as possible. +- Be comprehensive in your understanding and search of relevant files. +""" DEFAULT_MODES: dict[Modes, ModeImpl] = { Modes.wcgw: ModeImpl( - prompt="", bash_command_mode=BashCommandMode("normal_mode", "all"), write_if_empty_mode=WriteIfEmptyMode("all"), file_edit_mode=FileEditMode("all"), ), Modes.architect: ModeImpl( - prompt="", bash_command_mode=BashCommandMode("restricted_mode", "all"), write_if_empty_mode=WriteIfEmptyMode([]), file_edit_mode=FileEditMode([]), ), Modes.code_writer: ModeImpl( - prompt="", bash_command_mode=BashCommandMode("restricted_mode", "all"), write_if_empty_mode=WriteIfEmptyMode("all"), file_edit_mode=FileEditMode("all"), diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index a2b3c02..a8f3de9 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -61,11 +61,13 @@ from .file_ops.search_replace import search_replace_edit from .memory import load_memory, save_memory from .modes import ( + ARCHITECT_PROMPT, DEFAULT_MODES, BashCommandMode, FileEditMode, ModeImpl, WriteIfEmptyMode, + code_writer_prompt, ) from .repo_ops.repo_context import get_repo_context from .sys_utils import command_run @@ -223,8 +225,13 @@ def __init__(self) -> None: self._bash_command_mode: BashCommandMode = BashCommandMode("normal_mode", "all") self._file_edit_mode: FileEditMode = FileEditMode("all") self._write_if_empty_mode: WriteIfEmptyMode = WriteIfEmptyMode("all") + self._mode = Modes.wcgw self._init() + @property + def mode(self) -> Modes: + return self._mode + @property def bash_command_mode(self) -> BashCommandMode: return self._bash_command_mode @@ -241,12 +248,12 @@ def set_modes(self, mode: ModesConfig) -> None: # First get default mode config if isinstance(mode, str): mode_impl = DEFAULT_MODES[Modes[mode]] # converts str to Modes enum + mode_name = Modes[mode] else: # For CodeWriterMode, use code_writer as base and override mode_impl = DEFAULT_MODES[Modes.code_writer] # Override with custom settings from CodeWriterMode mode_impl = ModeImpl( - prompt=mode_impl.prompt, bash_command_mode=BashCommandMode( mode_impl.bash_command_mode.bash_mode, "all" if mode.allowed_commands == "all" else "none", @@ -254,11 +261,13 @@ def set_modes(self, mode: ModesConfig) -> None: file_edit_mode=FileEditMode(mode.allowed_globs), write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs), ) + mode_name = Modes.code_writer # Set the individual mode components self._bash_command_mode = mode_impl.bash_command_mode self._file_edit_mode = mode_impl.file_edit_mode self._write_if_empty_mode = mode_impl.write_if_empty_mode + self._mode = mode_name def _init(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" @@ -323,20 +332,27 @@ def serialize(self) -> dict[str, Any]: "file_edit_mode": self._file_edit_mode.serialize(), "write_if_empty_mode": self._write_if_empty_mode.serialize(), "whitelist_for_overwrite": list(self._whitelist_for_overwrite), + "mode": self._mode, } @classmethod def deserialize(cls, state: dict[str, Any]) -> "BashState": - """Create a new BashState instance from a serialized state dictionary""" instance = cls() - instance._bash_command_mode = BashCommandMode.deserialize( - state["bash_command_mode"] - ) - instance._file_edit_mode = FileEditMode.deserialize(state["file_edit_mode"]) - instance._write_if_empty_mode = WriteIfEmptyMode.deserialize( - state["write_if_empty_mode"] - ) - instance._whitelist_for_overwrite = set(state["whitelist_for_overwrite"]) + try: + """Create a new BashState instance from a serialized state dictionary""" + instance._bash_command_mode = BashCommandMode.deserialize( + state["bash_command_mode"] + ) + instance._file_edit_mode = FileEditMode.deserialize(state["file_edit_mode"]) + instance._write_if_empty_mode = WriteIfEmptyMode.deserialize( + state["write_if_empty_mode"] + ) + instance._whitelist_for_overwrite = set(state["whitelist_for_overwrite"]) + instance._mode = Modes[str(state["mode"])] + except Exception: + console.print(traceback.format_exc()) + console.print("Error deserializing BashState") + return instance def get_pending_for(self) -> str: @@ -438,7 +454,16 @@ def initialize( uname_sysname = os.uname().sysname uname_machine = os.uname().machine + mode_prompt = "" + if isinstance(BASH_STATE.mode, CodeWriterMode): + mode_prompt = code_writer_prompt( + BASH_STATE.mode.allowed_globs, mode.allowed_commands + ) + elif BASH_STATE.mode == Modes.architect: + mode_prompt = ARCHITECT_PROMPT output = f""" +{mode_prompt} + # Environment System: {uname_sysname} Machine: {uname_machine} From b33fb3eec1c15340c874f523213632def9e3e002 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 20:25:12 +0530 Subject: [PATCH 09/40] Updated messages --- src/wcgw/client/tools.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index a8f3de9..acc19ae 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -137,10 +137,10 @@ def ask_confirmation(prompt: Confirmation) -> str: PROMPT = PROMPT_CONST -def start_shell() -> pexpect.spawn: # type: ignore +def start_shell(is_restricted_mode: bool) -> pexpect.spawn: # type: ignore try: cmd = "/bin/bash" - if BASH_STATE.bash_command_mode.bash_mode == "restricted_mode": + if is_restricted_mode: cmd += " -r" shell = pexpect.spawn( @@ -273,7 +273,9 @@ def _init(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" self._is_in_docker: Optional[str] = "" self._cwd: str = os.getcwd() - self._shell = start_shell() + self._shell = start_shell( + self._bash_command_mode.bash_mode == "restricted_mode" + ) self._whitelist_for_overwrite: set[str] = set() self._pending_output = "" @@ -893,7 +895,7 @@ def write_file( if allowed_globs != "all" and not any( fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs ): - return "Error: File path not allowed in current mode" + return f"Error: updating file {path_} not allowed in current mode. Matches restricted glob: {allowed_globs}" add_overwrite_warning = "" if not BASH_STATE.is_in_docker: if (error_on_exist or error_on_exist_) and os.path.exists(path_): @@ -1030,7 +1032,9 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int]) -> str: if allowed_globs != "all" and not any( fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs ): - raise Exception("Error: File path not allowed in current mode") + raise Exception( + f"Error: updating file {path_} not allowed in current mode. Matches restricted glob: {allowed_globs}" + ) # The LLM is now aware that the file exists BASH_STATE.add_to_whitelist_for_overwrite(path_) From e97ea16f11f5d2cac6194cdc070f32371caffb51 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 20:36:20 +0530 Subject: [PATCH 10/40] Bash state fix --- src/wcgw/client/tools.py | 52 ++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index acc19ae..d9ffb9c 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -221,11 +221,13 @@ def _ensure_env_and_bg_jobs(shell: pexpect.spawn) -> Optional[int]: # type: ign class BashState: - def __init__(self) -> None: + def __init__(self, mode: Optional[ModesConfig]) -> None: self._bash_command_mode: BashCommandMode = BashCommandMode("normal_mode", "all") self._file_edit_mode: FileEditMode = FileEditMode("all") self._write_if_empty_mode: WriteIfEmptyMode = WriteIfEmptyMode("all") self._mode = Modes.wcgw + if mode: + self._set_modes(mode) self._init() @property @@ -244,7 +246,7 @@ def file_edit_mode(self) -> FileEditMode: def write_if_empty_mode(self) -> WriteIfEmptyMode: return self._write_if_empty_mode - def set_modes(self, mode: ModesConfig) -> None: + def _set_modes(self, mode: ModesConfig) -> None: # First get default mode config if isinstance(mode, str): mode_impl = DEFAULT_MODES[Modes[mode]] # converts str to Modes enum @@ -337,26 +339,24 @@ def serialize(self) -> dict[str, Any]: "mode": self._mode, } - @classmethod - def deserialize(cls, state: dict[str, Any]) -> "BashState": - instance = cls() + def load_state(self, state: dict[str, Any]) -> None: try: """Create a new BashState instance from a serialized state dictionary""" - instance._bash_command_mode = BashCommandMode.deserialize( - state["bash_command_mode"] - ) - instance._file_edit_mode = FileEditMode.deserialize(state["file_edit_mode"]) - instance._write_if_empty_mode = WriteIfEmptyMode.deserialize( + _bash_command_mode = BashCommandMode.deserialize(state["bash_command_mode"]) + if _bash_command_mode != self._bash_command_mode: + self._bash_command_mode = _bash_command_mode + self.reset() + + self._file_edit_mode = FileEditMode.deserialize(state["file_edit_mode"]) + self._write_if_empty_mode = WriteIfEmptyMode.deserialize( state["write_if_empty_mode"] ) - instance._whitelist_for_overwrite = set(state["whitelist_for_overwrite"]) - instance._mode = Modes[str(state["mode"])] + self._whitelist_for_overwrite = set(state["whitelist_for_overwrite"]) + self._mode = Modes[str(state["mode"])] except Exception: console.print(traceback.format_exc()) console.print("Error deserializing BashState") - return instance - def get_pending_for(self) -> str: if isinstance(self._state, datetime.datetime): timedelta = datetime.datetime.now() - self._state @@ -385,7 +385,7 @@ def pending_output(self) -> str: return self._pending_output -BASH_STATE = BashState() +BASH_STATE = BashState(None) def initialize( @@ -420,19 +420,11 @@ def initialize( except Exception: memory = f'Error: Unable to load task with ID "{task_id_to_resume}" ' - # Restore bash state if available - if bash_state is not None: - BASH_STATE = BashState.deserialize(bash_state) - else: - reset_shell() + folder_to_start = None if any_workspace_path: if os.path.exists(any_workspace_path): repo_context, folder_to_start = get_repo_context(any_workspace_path, 200) - BASH_STATE.shell.sendline(f"cd {shlex.quote(str(folder_to_start))}") - BASH_STATE.shell.expect(PROMPT, timeout=0.2) - BASH_STATE.update_cwd() - repo_context = f"---\n# Workspace structure\n{repo_context}\n---\n" # update modes if they're relative @@ -445,8 +437,16 @@ def initialize( f"\nInfo: Workspace path {any_workspace_path} does not exist\n" ) - # Set mode for the shell - BASH_STATE.set_modes(mode) + # Restore bash state if available + if bash_state is not None: + BASH_STATE.load_state(bash_state) + else: + BASH_STATE = BashState(mode) + + if folder_to_start: + BASH_STATE.shell.sendline(f"cd {shlex.quote(str(folder_to_start))}") + BASH_STATE.shell.expect(PROMPT, timeout=0.2) + BASH_STATE.update_cwd() initial_files_context = "" if read_files_: From d734bd8937ef21fc98d6a2b3d06fc5b324e2918b Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 20:38:32 +0530 Subject: [PATCH 11/40] Made initialization must --- src/wcgw/client/tools.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index d9ffb9c..b7e3cf8 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -386,6 +386,7 @@ def pending_output(self) -> str: BASH_STATE = BashState(None) +INITIALIZED = False def initialize( @@ -480,6 +481,9 @@ def initialize( {memory} """ + global INITIALIZED + INITIALIZED = True + return output @@ -1202,7 +1206,7 @@ def get_tool_output( loop_call: Callable[[str, float], tuple[str, float]], max_tokens: Optional[int], ) -> tuple[list[str | ImageData | DoneFlag], float]: - global IS_IN_DOCKER, TOOL_CALLS + global IS_IN_DOCKER, TOOL_CALLS, INITIALIZED if isinstance(args, dict): adapter = TypeAdapter[TOOLS](TOOLS, config={"extra": "forbid"}) arg = adapter.validate_python(args) @@ -1210,6 +1214,10 @@ def get_tool_output( arg = args output: tuple[str | DoneFlag | ImageData, float] TOOL_CALLS.append(arg) + + if not isinstance(arg, Initialize) and not INITIALIZED: + raise Exception("Initialize tool not called yet.") + if isinstance(arg, Confirmation): console.print("Calling ask confirmation tool") output = ask_confirmation(arg), 0.0 From 2ef247c7130dd316e0ee8ab23889bfc777bbf188 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 20:44:33 +0530 Subject: [PATCH 12/40] Fixed serialisation --- src/wcgw/client/tools.py | 12 +++++++++--- src/wcgw/types_.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index b7e3cf8..ed7aba4 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -1215,20 +1215,26 @@ def get_tool_output( output: tuple[str | DoneFlag | ImageData, float] TOOL_CALLS.append(arg) - if not isinstance(arg, Initialize) and not INITIALIZED: - raise Exception("Initialize tool not called yet.") - if isinstance(arg, Confirmation): console.print("Calling ask confirmation tool") output = ask_confirmation(arg), 0.0 elif isinstance(arg, (BashCommand | BashInteraction)): console.print("Calling execute bash tool") + if not INITIALIZED: + raise Exception("Initialize tool not called yet.") + output = execute_bash(enc, arg, max_tokens, arg.wait_for_seconds) elif isinstance(arg, WriteIfEmpty): console.print("Calling write file tool") + if not INITIALIZED: + raise Exception("Initialize tool not called yet.") + output = write_file(arg, True, max_tokens), 0 elif isinstance(arg, FileEdit): console.print("Calling full file edit tool") + if not INITIALIZED: + raise Exception("Initialize tool not called yet.") + output = do_diff_edit(arg, max_tokens), 0.0 elif isinstance(arg, DoneFlag): console.print("Calling mark finish tool") diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index bf3a697..7c82071 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -13,7 +13,7 @@ class Config: BaseModel = NoExtraArgs -class Modes(Enum): +class Modes(str, Enum): wcgw = "wcgw" architect = "architect" code_writer = "code_writer" From 1f9eb9e63ea902e6632e53eff124bdd8b0f9253f Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 23:13:36 +0530 Subject: [PATCH 13/40] Saving state update for proper override --- src/wcgw/client/modes.py | 22 ++++++- src/wcgw/client/tools.py | 136 +++++++++++++++++++++++---------------- 2 files changed, 102 insertions(+), 56 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index cc70052..edddf5f 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Any, Literal, NamedTuple -from ..types_ import Modes +from ..types_ import Modes, ModesConfig @dataclass @@ -106,3 +106,23 @@ def code_writer_prompt( file_edit_mode=FileEditMode("all"), ), } + +def modes_to_state(mode: ModesConfig) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes]: + # First get default mode config + if isinstance(mode, str): + mode_impl = DEFAULT_MODES[Modes[mode]] # converts str to Modes enum + mode_name = Modes[mode] + else: + # For CodeWriterMode, use code_writer as base and override + mode_impl = DEFAULT_MODES[Modes.code_writer] + # Override with custom settings from CodeWriterMode + mode_impl = ModeImpl( + bash_command_mode=BashCommandMode( + mode_impl.bash_command_mode.bash_mode, + "all" if mode.allowed_commands == "all" else "none", + ), + file_edit_mode=FileEditMode(mode.allowed_globs), + write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs), + ) + mode_name = Modes.code_writer + return (mode_impl.bash_command_mode, mode_impl.file_edit_mode, mode_impl.write_if_empty_mode, mode_name) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index ed7aba4..46ea288 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -62,12 +62,11 @@ from .memory import load_memory, save_memory from .modes import ( ARCHITECT_PROMPT, - DEFAULT_MODES, BashCommandMode, FileEditMode, - ModeImpl, WriteIfEmptyMode, code_writer_prompt, + modes_to_state, ) from .repo_ops.repo_context import get_repo_context from .sys_utils import command_run @@ -221,13 +220,24 @@ def _ensure_env_and_bg_jobs(shell: pexpect.spawn) -> Optional[int]: # type: ign class BashState: - def __init__(self, mode: Optional[ModesConfig]) -> None: - self._bash_command_mode: BashCommandMode = BashCommandMode("normal_mode", "all") - self._file_edit_mode: FileEditMode = FileEditMode("all") - self._write_if_empty_mode: WriteIfEmptyMode = WriteIfEmptyMode("all") - self._mode = Modes.wcgw - if mode: - self._set_modes(mode) + def __init__( + self, + bash_command_mode: Optional[BashCommandMode], + file_edit_mode: Optional[FileEditMode], + write_if_empty_mode: Optional[WriteIfEmptyMode], + mode: Optional[Modes], + whitelist_for_overwrite: Optional[set[str]] = None, + ) -> None: + self._bash_command_mode: BashCommandMode = bash_command_mode or BashCommandMode( + "normal_mode", "all" + ) + self._file_edit_mode: FileEditMode = file_edit_mode or FileEditMode("all") + self._write_if_empty_mode: WriteIfEmptyMode = ( + write_if_empty_mode or WriteIfEmptyMode("all") + ) + self._mode = mode or Modes.wcgw + self._whitelist_for_overwrite: set[str] = whitelist_for_overwrite or set() + self._init() @property @@ -246,31 +256,6 @@ def file_edit_mode(self) -> FileEditMode: def write_if_empty_mode(self) -> WriteIfEmptyMode: return self._write_if_empty_mode - def _set_modes(self, mode: ModesConfig) -> None: - # First get default mode config - if isinstance(mode, str): - mode_impl = DEFAULT_MODES[Modes[mode]] # converts str to Modes enum - mode_name = Modes[mode] - else: - # For CodeWriterMode, use code_writer as base and override - mode_impl = DEFAULT_MODES[Modes.code_writer] - # Override with custom settings from CodeWriterMode - mode_impl = ModeImpl( - bash_command_mode=BashCommandMode( - mode_impl.bash_command_mode.bash_mode, - "all" if mode.allowed_commands == "all" else "none", - ), - file_edit_mode=FileEditMode(mode.allowed_globs), - write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs), - ) - mode_name = Modes.code_writer - - # Set the individual mode components - self._bash_command_mode = mode_impl.bash_command_mode - self._file_edit_mode = mode_impl.file_edit_mode - self._write_if_empty_mode = mode_impl.write_if_empty_mode - self._mode = mode_name - def _init(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" self._is_in_docker: Optional[str] = "" @@ -278,7 +263,7 @@ def _init(self) -> None: self._shell = start_shell( self._bash_command_mode.bash_mode == "restricted_mode" ) - self._whitelist_for_overwrite: set[str] = set() + self._pending_output = "" # Get exit info to ensure shell is ready @@ -339,23 +324,35 @@ def serialize(self) -> dict[str, Any]: "mode": self._mode, } - def load_state(self, state: dict[str, Any]) -> None: - try: - """Create a new BashState instance from a serialized state dictionary""" - _bash_command_mode = BashCommandMode.deserialize(state["bash_command_mode"]) - if _bash_command_mode != self._bash_command_mode: - self._bash_command_mode = _bash_command_mode - self.reset() - - self._file_edit_mode = FileEditMode.deserialize(state["file_edit_mode"]) - self._write_if_empty_mode = WriteIfEmptyMode.deserialize( - state["write_if_empty_mode"] - ) - self._whitelist_for_overwrite = set(state["whitelist_for_overwrite"]) - self._mode = Modes[str(state["mode"])] - except Exception: - console.print(traceback.format_exc()) - console.print("Error deserializing BashState") + @staticmethod + def parse_state( + state: dict[str, Any], + ) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes, list[str]]: + return ( + BashCommandMode.deserialize(state["bash_command_mode"]), + FileEditMode.deserialize(state["file_edit_mode"]), + WriteIfEmptyMode.deserialize(state["write_if_empty_mode"]), + Modes[str(state["mode"])], + state["whitelist_for_overwrite"], + ) + + def load_state( + self, + bash_command_mode: BashCommandMode, + file_edit_mode: FileEditMode, + write_if_empty_mode: WriteIfEmptyMode, + mode: Modes, + whitelist_for_overwrite: list[str], + ) -> None: + """Create a new BashState instance from a serialized state dictionary""" + if bash_command_mode != self._bash_command_mode: + self._bash_command_mode = bash_command_mode + self.reset() + + self._file_edit_mode = file_edit_mode + self._write_if_empty_mode = write_if_empty_mode + self._whitelist_for_overwrite = set(whitelist_for_overwrite) + self._mode = mode def get_pending_for(self) -> str: if isinstance(self._state, datetime.datetime): @@ -385,7 +382,7 @@ def pending_output(self) -> str: return self._pending_output -BASH_STATE = BashState(None) +BASH_STATE = BashState(None, None, None, None) INITIALIZED = False @@ -440,9 +437,38 @@ def initialize( # Restore bash state if available if bash_state is not None: - BASH_STATE.load_state(bash_state) + try: + parsed_state = BashState.parse_state(bash_state) + if mode == "wcgw": + BASH_STATE.load_state( + parsed_state[0], + parsed_state[1], + parsed_state[2], + parsed_state[3], + parsed_state[4], + ) + else: + state = modes_to_state(mode) + BASH_STATE.load_state( + state[0], + state[1], + state[2], + state[3], + parsed_state[4] + list(BASH_STATE.whitelist_for_overwrite), + ) + except ValueError: + console.print(traceback.format_exc()) + console.print("Error: couldn't load bash state") + pass else: - BASH_STATE = BashState(mode) + state = modes_to_state(mode) + BASH_STATE.load_state( + state[0], + state[1], + state[2], + state[3], + list(BASH_STATE.whitelist_for_overwrite), + ) if folder_to_start: BASH_STATE.shell.sendline(f"cd {shlex.quote(str(folder_to_start))}") From 630ba7c7dcfe2810ee52cd6fd61062632f37debd Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 23:19:31 +0530 Subject: [PATCH 14/40] Bug fix in try json loading --- src/wcgw/client/mcp_server/server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index e1508bd..4c93c0f 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -267,6 +267,8 @@ async def handle_call_tool( except ValidationError: def try_json(x: str) -> Any: + if not isinstance(x, str): + return x try: return json.loads(x) except json.JSONDecodeError: From 6fbe704ebb7dc7ffaf86865b2c2bf86e40706b39 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 23:30:06 +0530 Subject: [PATCH 15/40] Fixed loading state --- src/wcgw/client/modes.py | 31 +++++++++++++++++++++++++------ src/wcgw/client/tools.py | 9 +++++++-- 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index edddf5f..6666930 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -51,19 +51,30 @@ class ModeImpl: def code_writer_prompt( - allowed_globs: Literal["all"] | list[str], + allowed_file_edit_globs: Literal["all"] | list[str], + all_write_new_globs: Literal["all"] | list[str], allowed_commands: Literal["all"] | list[str], ) -> str: base = """You have to run in "code_writer" mode. This means """ path_prompt = """ - - You are allowed to create and update files in the provided repository only. + - You are allowed to edit or update files in the provided repository only. """ - if allowed_globs != "all" and allowed_globs: + if allowed_file_edit_globs != "all" and allowed_file_edit_globs: path_prompt = f""" -- You are allowed to create and update files in the following globs: {', '.join(allowed_globs)} +- You are allowed to edit and update files only in the following globs: {', '.join(allowed_file_edit_globs)} +""" + base += path_prompt + + path_prompt = """ + - You are allowed to create new files in the provided repository only. + """ + + if all_write_new_globs != "all" and all_write_new_globs: + path_prompt = f""" +- You are allowed to create new files only in the following globs: {', '.join(allowed_file_edit_globs)} """ base += path_prompt @@ -107,7 +118,10 @@ def code_writer_prompt( ), } -def modes_to_state(mode: ModesConfig) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes]: + +def modes_to_state( + mode: ModesConfig, +) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes]: # First get default mode config if isinstance(mode, str): mode_impl = DEFAULT_MODES[Modes[mode]] # converts str to Modes enum @@ -125,4 +139,9 @@ def modes_to_state(mode: ModesConfig) -> tuple[BashCommandMode, FileEditMode, Wr write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs), ) mode_name = Modes.code_writer - return (mode_impl.bash_command_mode, mode_impl.file_edit_mode, mode_impl.write_if_empty_mode, mode_name) + return ( + mode_impl.bash_command_mode, + mode_impl.file_edit_mode, + mode_impl.write_if_empty_mode, + mode_name, + ) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 46ea288..8cab0db 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -469,6 +469,7 @@ def initialize( state[3], list(BASH_STATE.whitelist_for_overwrite), ) + del mode if folder_to_start: BASH_STATE.shell.sendline(f"cd {shlex.quote(str(folder_to_start))}") @@ -484,9 +485,13 @@ def initialize( uname_machine = os.uname().machine mode_prompt = "" - if isinstance(BASH_STATE.mode, CodeWriterMode): + if BASH_STATE.mode == Modes.code_writer: mode_prompt = code_writer_prompt( - BASH_STATE.mode.allowed_globs, mode.allowed_commands + BASH_STATE.file_edit_mode.allowed_globs, + BASH_STATE.write_if_empty_mode.allowed_globs, + BASH_STATE.bash_command_mode.allowed_commands + if BASH_STATE.bash_command_mode.allowed_commands == "all" + else [], ) elif BASH_STATE.mode == Modes.architect: mode_prompt = ARCHITECT_PROMPT From 8578a2893cc1dac592d7431db0bdd9ff1d1bc8ac Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 23:41:43 +0530 Subject: [PATCH 16/40] Fixed allowed commands handling --- src/wcgw/client/modes.py | 2 +- src/wcgw/client/tools.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 6666930..64e4e6f 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -133,7 +133,7 @@ def modes_to_state( mode_impl = ModeImpl( bash_command_mode=BashCommandMode( mode_impl.bash_command_mode.bash_mode, - "all" if mode.allowed_commands == "all" else "none", + "all" if mode.allowed_commands else "none", ), file_edit_mode=FileEditMode(mode.allowed_globs), write_if_empty_mode=WriteIfEmptyMode(mode.allowed_globs), diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 8cab0db..8238b6a 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -489,9 +489,7 @@ def initialize( mode_prompt = code_writer_prompt( BASH_STATE.file_edit_mode.allowed_globs, BASH_STATE.write_if_empty_mode.allowed_globs, - BASH_STATE.bash_command_mode.allowed_commands - if BASH_STATE.bash_command_mode.allowed_commands == "all" - else [], + "all" if BASH_STATE.bash_command_mode.allowed_commands else [], ) elif BASH_STATE.mode == Modes.architect: mode_prompt = ARCHITECT_PROMPT From 5324ecf81a9bf58665bfa74cbb11e92d40981fea Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Fri, 10 Jan 2025 23:47:03 +0530 Subject: [PATCH 17/40] Fixed mypy issues --- src/wcgw/client/memory.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/wcgw/client/memory.py b/src/wcgw/client/memory.py index dcd45dd..2e8a7ed 100644 --- a/src/wcgw/client/memory.py +++ b/src/wcgw/client/memory.py @@ -31,7 +31,11 @@ def format_memory(task_memory: ContextSave, relevant_files: str) -> str: return memory_data -def save_memory(task_memory: ContextSave, relevant_files: str, bash_state_dict: Optional[dict[str, Any]] = None) -> str: +def save_memory( + task_memory: ContextSave, + relevant_files: str, + bash_state_dict: Optional[dict[str, Any]] = None, +) -> str: app_dir = get_app_dir_xdg() memory_dir = os.path.join(app_dir, "memory") os.makedirs(memory_dir, exist_ok=True) @@ -60,7 +64,7 @@ def load_memory[T]( max_tokens: Optional[int], encoder: Callable[[str], list[T]], decoder: Callable[[list[T]], str], -) -> tuple[str, str, dict[str, Any]]: +) -> tuple[str, str, Optional[dict[str, Any]]]: app_dir = get_app_dir_xdg() memory_dir = os.path.join(app_dir, "memory") memory_file = os.path.join(memory_dir, f"{task_id}.txt") @@ -85,9 +89,9 @@ def load_memory[T]( # Try to load bash state if exists state_file = os.path.join(memory_dir, f"{task_id}_bash_state.json") - bash_state = None + bash_state: Optional[dict[str, Any]] = None if os.path.exists(state_file): with open(state_file) as f: - bash_state: dict[str, Any] = json.load(f) + bash_state = json.load(f) return project_root_path, data, bash_state From 585149a2e0c2e9aebbcf4c22dfbb70ba10bc24c6 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Mon, 13 Jan 2025 22:15:17 +0530 Subject: [PATCH 18/40] Added base instructions to modes to avoid duplicate prompts --- src/wcgw/client/anthropic_client.py | 23 +++++++-------------- src/wcgw/client/mcp_server/server.py | 19 ----------------- src/wcgw/client/modes.py | 31 ++++++++++++++++++++++++++++ src/wcgw/client/openai_client.py | 24 +++++++-------------- 4 files changed, 45 insertions(+), 52 deletions(-) diff --git a/src/wcgw/client/anthropic_client.py b/src/wcgw/client/anthropic_client.py index 8274898..94147b3 100644 --- a/src/wcgw/client/anthropic_client.py +++ b/src/wcgw/client/anthropic_client.py @@ -130,7 +130,7 @@ def loop( memory = None if resume: try: - _, memory = load_memory( + _, memory, _ = load_memory( resume, 8000, lambda x: default_enc.encode(x).ids, @@ -281,22 +281,13 @@ def loop( ), ] - initial_info = initialize( - os.getcwd(), [], resume if (memory and resume) else "", max_tokens=8000 + system = initialize( + os.getcwd(), + [], + resume if (memory and resume) else "", + max_tokens=8000, + mode="wcgw", ) - system = f""" -You're an expert software engineer with shell and code knowledge. - -Instructions: - - - You should use the provided bash execution, reading and writing file tools to complete objective. - - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) - - Always read relevant files before editing. - - Do not provide code snippets unless asked by the user, instead directly add/edit the code. - - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists. - -{initial_info} -""" with open(os.path.join(os.path.dirname(__file__), "diff-instructions.txt")) as f: system += f.read() diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index 4c93c0f..a6309e8 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -290,25 +290,6 @@ def try_json(x: str) -> Any: if isinstance(output_or_done, str): if issubclass(tool_type, Initialize): output_or_done += """ ---- -You're an expert software engineer with shell and code knowledge. - -Instructions: - - - You should use the provided bash execution, reading and writing file tools to complete objective. - - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) - - Always read relevant files before editing. - - Do not provide code snippets unless asked by the user, instead directly add/edit the code. - - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists. - - Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using shell tools. - - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the program don't show any update but they still are running. - - Do not use echo to write multi-line files, always use FileEdit tool to update a code. - -Additional instructions: - Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd. - - Always write production ready, syntactically correct code. - Important note: as soon as you encounter "The user has chosen to disallow the tool call.", immediately stop doing everything and ask user for the reason. """ diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 64e4e6f..a2d3d6b 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -85,21 +85,52 @@ def code_writer_prompt( if allowed_commands != "all": command_prompt = f""" - You are only allowed to run the following commands: {', '.join(allowed_commands)} +- Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the programs don't show any update but they still are running. +- Do not use echo to write multi-line files, always use FileEdit tool to update a code. +- Do not provide code snippets unless asked by the user, instead directly add/edit the code. +- You should use the provided bash execution, reading and writing file tools to complete objective. +- First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) +- Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using wcgw tools. """ base += command_prompt return base +WCGW_PROMPT = """ +--- +You're an expert software engineer with shell and code knowledge. + +Instructions: + + - You should use the provided bash execution, reading and writing file tools to complete objective. + - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) + - Do not provide code snippets unless asked by the user, instead directly add/edit the code. + - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists. + - Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using wcgw tools + - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the programs don't show any update but they still are running. + - Do not use echo to write multi-line files, always use FileEdit tool to update a code. + +Additional instructions: + Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd. + + Always write production ready, syntactically correct code. + + +""" ARCHITECT_PROMPT = """You have to run in "architect" mode. This means - You are not allowed to edit or update any file. You are not allowed to create any file. - You are not allowed to run any commands that may change disk, system configuration, packages or environment. Only read-only commands are allowed. - Only run commands that allows you to explore the repository, understand the system or read anything of relevance. +- Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the programs don't show any update but they still are running. Your response should be in self-critique and brainstorm style. - Read as many relevant files as possible. - Be comprehensive in your understanding and search of relevant files. +- First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) """ + + DEFAULT_MODES: dict[Modes, ModeImpl] = { Modes.wcgw: ModeImpl( bash_command_mode=BashCommandMode("normal_mode", "all"), diff --git a/src/wcgw/client/openai_client.py b/src/wcgw/client/openai_client.py index 86715e5..aaab351 100644 --- a/src/wcgw/client/openai_client.py +++ b/src/wcgw/client/openai_client.py @@ -125,7 +125,7 @@ def loop( memory = None if resume: try: - _, memory = load_memory( + _, memory, _ = load_memory( resume, 8000, lambda x: default_enc.encode(x).ids, @@ -226,23 +226,13 @@ def loop( ), ] - initial_info = initialize( - os.getcwd(), [], resume if (memory and resume) else "", max_tokens=8000 + system = initialize( + os.getcwd(), + [], + resume if (memory and resume) else "", + max_tokens=8000, + mode="wcgw", ) - system = f""" -You're an expert software engineer with shell and code knowledge. - -Instructions: - - - You should use the provided bash execution, reading and writing file tools to complete objective. - - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) - - Always read relevant files before editing. - - Do not provide code snippets unless asked by the user, instead directly add/edit the code. - - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists. - -{initial_info} - -""" with open(os.path.join(os.path.dirname(__file__), "diff-instructions.txt")) as f: system += f.read() From 06c41f66b74d8993748d0615d735075dcc4b7f64 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Mon, 13 Jan 2025 22:33:02 +0530 Subject: [PATCH 19/40] More test fixes --- tests/client/test_memory.py | 112 ++++++++++++++++++++++---- tests/client/test_tools_extended.py | 49 ++++++++++- tests/client/test_tools_file_ops.py | 9 +++ tests/client/test_tools_shell.py | 11 ++- tests/client/test_tools_validation.py | 20 +++++ 5 files changed, 180 insertions(+), 21 deletions(-) diff --git a/tests/client/test_memory.py b/tests/client/test_memory.py index d803b03..bba0a61 100644 --- a/tests/client/test_memory.py +++ b/tests/client/test_memory.py @@ -1,3 +1,4 @@ +import json import os import unittest from unittest.mock import mock_open, patch @@ -7,6 +8,18 @@ class TestMemory(unittest.TestCase): + class MockTokens: + def __init__(self, ids): + self.ids = ids + + def __len__(self): + return len(self.ids) + + def __getitem__(self, key): + if isinstance(key, slice): + return TestMemory.MockTokens(self.ids[key]) + return self.ids[key] + def test_get_app_dir_xdg(self): with patch.dict("os.environ", {"XDG_DATA_HOME": "/custom/data"}): result = get_app_dir_xdg() @@ -47,31 +60,100 @@ def test_save_memory(self): mock_file().write.assert_called_once() def test_load_memory(self): + """Test loading of memory data including bash state""" task_id = "test-id" memory_data = "# PROJECT ROOT = /project\nTest description\n*.py\nfile1.py" - mock_encoder = lambda x: [1, 2, 3] # Simulate token encoding - mock_decoder = lambda x: "Decoded text" # Simulate token decoding + bash_state_data = { + "bash_command_mode": { + "bash_mode": "normal_mode", + "allowed_commands": "all", + }, + "file_edit_mode": {"allowed_globs": "all"}, + "write_if_empty_mode": {"allowed_globs": "all"}, + "whitelist_for_overwrite": [], + "mode": "wcgw", + } + mock_encoder = lambda x: self.MockTokens([1, 2, 3]) # Simulate tokenizer output + mock_decoder = lambda x: "Decoded text" # Return fixed text for any input IDs + + # Mock both the memory file and bash state file + from wcgw.client.memory import get_app_dir_xdg + + app_dir = get_app_dir_xdg() + memory_dir = os.path.join(app_dir, "memory") + mock_files = { + os.path.join(memory_dir, f"{task_id}.txt"): memory_data, + os.path.join(memory_dir, f"{task_id}_bash_state.json"): json.dumps( + bash_state_data + ), + } + + def mock_open_file(filename, *args, **kwargs): + content = mock_files.get(filename) + if content is None: + raise FileNotFoundError(filename) + return mock_open(read_data=content)() - with patch("builtins.open", mock_open(read_data=memory_data)): - project_root, data = load_memory( - task_id, max_tokens=None, encoder=mock_encoder, decoder=mock_decoder + with patch("builtins.open", side_effect=mock_open_file): + project_root, data, bash_state = load_memory( + task_id, max_tokens=2, encoder=mock_encoder, decoder=mock_decoder ) - self.assertEqual(project_root, "/project") - self.assertIn("Test description", data) + self.assertEqual(project_root, "") + self.assertEqual(data, "Decoded text\n(... truncated)") + self.assertEqual( + bash_state, bash_state_data + ) # Verify bash state was loaded def test_load_memory_with_tokens(self): + """Test loading of memory data with token limit""" task_id = "test-id" memory_data = "# PROJECT ROOT = '/project'\nTest description\n*.py\nfile1.py" - mock_encoder = lambda x: [1, 2, 3] # Simulate token encoding - mock_decoder = lambda x: x # Don't decode in test + bash_state_data = { + "bash_command_mode": { + "bash_mode": "normal_mode", + "allowed_commands": "all", + }, + "file_edit_mode": {"allowed_globs": "all"}, + "write_if_empty_mode": {"allowed_globs": "all"}, + "whitelist_for_overwrite": [], + "mode": "wcgw", + } - with patch("builtins.open", mock_open(read_data=memory_data)): - project_root, data = load_memory( - task_id, max_tokens=10, encoder=mock_encoder, decoder=mock_decoder + mock_encoder = lambda x: self.MockTokens([1, 2, 3]) # Simulate tokenizer output + mock_decoder = lambda x: "truncated text" # Return fixed text for any input IDs + + from wcgw.client.memory import get_app_dir_xdg + + app_dir = get_app_dir_xdg() + memory_dir = os.path.join(app_dir, "memory") + mock_files = { + os.path.join(memory_dir, f"{task_id}.txt"): memory_data, + os.path.join(memory_dir, f"{task_id}_bash_state.json"): json.dumps( + bash_state_data + ), + } + + def mock_open_file(filename, *args, **kwargs): + content = mock_files.get(filename) + if content is None: + raise FileNotFoundError(filename) + return mock_open(read_data=content)() + + with patch("builtins.open", side_effect=mock_open_file): + project_root, data, bash_state = load_memory( + task_id, max_tokens=2, encoder=mock_encoder, decoder=mock_decoder ) - self.assertEqual(project_root, "/project") - # Mock decoder returns input unchanged, so we expect full data - self.assertEqual(data, memory_data) + + # Since encoder returns [1, 2, 3] and decoder returns input unchanged, + # only the first chunk (after truncation) should be returned plus the truncation message + self.assertEqual(project_root, "") + # Our test decoder returns unchanged input, so we get first token plus truncation message + self.assertEqual( + data, "truncated text\n(... truncated)" + ) # Use the actual mock_decoder output + self.assertEqual( + bash_state, bash_state_data + ) # Verify bash state was loaded if __name__ == "__main__": diff --git a/tests/client/test_tools_extended.py b/tests/client/test_tools_extended.py index 294319b..a672a76 100644 --- a/tests/client/test_tools_extended.py +++ b/tests/client/test_tools_extended.py @@ -18,9 +18,33 @@ class TestToolsExtended(unittest.TestCase): def setUp(self): self.maxDiff = None - from wcgw.client.tools import BASH_STATE - + from wcgw.client.tools import BASH_STATE, initialize, INITIALIZED, TOOL_CALLS + global INITIALIZED, TOOL_CALLS + INITIALIZED = False + TOOL_CALLS = [] BASH_STATE._is_in_docker = "" # Reset Docker state without shell reset + + # Properly initialize tools for testing + initialize( + any_workspace_path="", + read_files_=[], + task_id_to_resume="", + max_tokens=None, + mode="wcgw" + ) + + def tearDown(self): + from wcgw.client.tools import INITIALIZED, TOOL_CALLS, BASH_STATE + global INITIALIZED, TOOL_CALLS + INITIALIZED = False # Reset initialization state + TOOL_CALLS = [] # Clear tool calls + try: + BASH_STATE.reset() # Reset bash state + except Exception as e: + print(f"Warning: Failed to reset BASH_STATE: {e}") + # Clean up any temporary files or directories + if hasattr(self, '_saved_filepath') and os.path.exists(getattr(self, '_saved_filepath')): + os.remove(self._saved_filepath) def test_get_incremental_output(self): old_output = ["line1", "line2"] @@ -196,7 +220,7 @@ def test_start_shell(self, mock_spawn): mock_spawn.return_value = mock_shell # Test successful shell start - shell = start_shell() + shell = start_shell(is_restricted_mode=False) self.assertEqual(shell, mock_shell) # Verify shell initialization @@ -206,6 +230,11 @@ def test_start_shell(self, mock_spawn): mock_shell.sendline.assert_any_call("set +o pipefail") mock_shell.sendline.assert_any_call("export GIT_PAGER=cat PAGER=cat") + # Test restricted mode + mock_shell.reset_mock() + shell = start_shell(is_restricted_mode=True) + self.assertEqual(shell, mock_shell) + def test_save_out_of_context(self): from wcgw.client.tools import save_out_of_context @@ -455,10 +484,24 @@ def test_get_tool_output_ai_assistant(self, mock_ai_helper): def test_get_tool_output_invalid_tool(self): """Test get_tool_output function with invalid tool""" + from wcgw.client.tools import get_tool_output mock_enc = MagicMock() mock_loop_call = MagicMock() + # Test with None + with self.assertRaises(ValueError) as cm: + get_tool_output(None, mock_enc, 1.0, mock_loop_call, 100) + self.assertEqual(str(cm.exception), "Unknown tool: None") + + # Test with invalid tool type + with self.assertRaises(ValueError) as cm: + get_tool_output(123, mock_enc, 1.0, mock_loop_call, 100) + + # Test with empty dict + with self.assertRaises(ValueError) as cm: + result, cost = get_tool_output({}, mock_enc, 1.0, mock_loop_call, 100) + def test_get_tool_output_exception_handling(self): """Test error handling in get_tool_output""" from wcgw.client.tools import get_tool_output diff --git a/tests/client/test_tools_file_ops.py b/tests/client/test_tools_file_ops.py index 75545e1..975899a 100644 --- a/tests/client/test_tools_file_ops.py +++ b/tests/client/test_tools_file_ops.py @@ -17,6 +17,15 @@ class TestToolsFileOps(unittest.TestCase): def setUp(self): + from wcgw.client.tools import INITIALIZED, TOOL_CALLS, BASH_STATE + global INITIALIZED, TOOL_CALLS + INITIALIZED = False + TOOL_CALLS = [] + if hasattr(BASH_STATE, 'reset'): + try: + BASH_STATE.reset() + except Exception: + pass self.mock_tokenizer = MagicMock() self.mock_tokenizer.encode.return_value.ids = [1, 2, 3] self.mock_tokenizer.decode.return_value = "decoded text" diff --git a/tests/client/test_tools_shell.py b/tests/client/test_tools_shell.py index 784bb1b..a3482be 100644 --- a/tests/client/test_tools_shell.py +++ b/tests/client/test_tools_shell.py @@ -62,8 +62,8 @@ def test_start_shell_error_handling(self, mock_os, mock_spawn): mock_shell.before = "" mock_spawn.return_value = mock_shell - # Test successful shell start - shell = start_shell() + # Test successful shell start with non-restricted mode + shell = start_shell(is_restricted_mode=False) self.assertEqual(shell, mock_shell) # Verify shell initialization commands @@ -72,8 +72,13 @@ def test_start_shell_error_handling(self, mock_os, mock_spawn): mock_shell.sendline.assert_any_call("set +o pipefail") # Test error handling with fallback + # Test error handling with fallback in both modes mock_spawn.side_effect = [Exception("Failed"), mock_shell] - shell = start_shell() + shell = start_shell(is_restricted_mode=False) + self.assertEqual(shell, mock_shell) + + mock_spawn.side_effect = [Exception("Failed"), mock_shell] + shell = start_shell(is_restricted_mode=True) self.assertEqual(shell, mock_shell) def test_is_int_validation(self): diff --git a/tests/client/test_tools_validation.py b/tests/client/test_tools_validation.py index 61c7f68..dbc94ec 100644 --- a/tests/client/test_tools_validation.py +++ b/tests/client/test_tools_validation.py @@ -22,7 +22,27 @@ class TestToolsValidation(unittest.TestCase): def setUp(self): self.maxDiff = None + from wcgw.client.tools import BASH_STATE, initialize + BASH_STATE.reset() + # Properly initialize tools for testing + initialize( + any_workspace_path="", + read_files_=[], + task_id_to_resume="", + max_tokens=None, + mode="wcgw" + ) + + def tearDown(self): + from wcgw.client.tools import INITIALIZED, TOOL_CALLS, BASH_STATE + global INITIALIZED, TOOL_CALLS + INITIALIZED = False # Reset initialization state + TOOL_CALLS = [] # Clear tool calls + try: + BASH_STATE.reset() # Reset bash state + except: + pass def test_ensure_no_previous_output_decorator(self): """Test ensure_no_previous_output decorator""" From 7845048ffc55b6c6f913ebc490da174247ee0826 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Mon, 13 Jan 2025 22:51:28 +0530 Subject: [PATCH 20/40] Fixed all tests --- tests/client/test_memory.py | 6 ++++-- tests/client/test_tools_extended.py | 21 +++++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tests/client/test_memory.py b/tests/client/test_memory.py index bba0a61..b9f2cb4 100644 --- a/tests/client/test_memory.py +++ b/tests/client/test_memory.py @@ -94,7 +94,8 @@ def mock_open_file(filename, *args, **kwargs): raise FileNotFoundError(filename) return mock_open(read_data=content)() - with patch("builtins.open", side_effect=mock_open_file): + with patch("builtins.open", side_effect=mock_open_file), \ + patch("os.path.exists", lambda x: x.endswith("_bash_state.json")): project_root, data, bash_state = load_memory( task_id, max_tokens=2, encoder=mock_encoder, decoder=mock_decoder ) @@ -139,7 +140,8 @@ def mock_open_file(filename, *args, **kwargs): raise FileNotFoundError(filename) return mock_open(read_data=content)() - with patch("builtins.open", side_effect=mock_open_file): + with patch("builtins.open", side_effect=mock_open_file), \ + patch("os.path.exists", lambda x: x.endswith("_bash_state.json")): project_root, data, bash_state = load_memory( task_id, max_tokens=2, encoder=mock_encoder, decoder=mock_decoder ) diff --git a/tests/client/test_tools_extended.py b/tests/client/test_tools_extended.py index a672a76..fa7152b 100644 --- a/tests/client/test_tools_extended.py +++ b/tests/client/test_tools_extended.py @@ -18,23 +18,25 @@ class TestToolsExtended(unittest.TestCase): def setUp(self): self.maxDiff = None - from wcgw.client.tools import BASH_STATE, initialize, INITIALIZED, TOOL_CALLS + from wcgw.client.tools import BASH_STATE, INITIALIZED, TOOL_CALLS, initialize + global INITIALIZED, TOOL_CALLS INITIALIZED = False TOOL_CALLS = [] BASH_STATE._is_in_docker = "" # Reset Docker state without shell reset - + # Properly initialize tools for testing initialize( any_workspace_path="", read_files_=[], task_id_to_resume="", max_tokens=None, - mode="wcgw" + mode="wcgw", ) - + def tearDown(self): - from wcgw.client.tools import INITIALIZED, TOOL_CALLS, BASH_STATE + from wcgw.client.tools import BASH_STATE, INITIALIZED, TOOL_CALLS + global INITIALIZED, TOOL_CALLS INITIALIZED = False # Reset initialization state TOOL_CALLS = [] # Clear tool calls @@ -43,7 +45,9 @@ def tearDown(self): except Exception as e: print(f"Warning: Failed to reset BASH_STATE: {e}") # Clean up any temporary files or directories - if hasattr(self, '_saved_filepath') and os.path.exists(getattr(self, '_saved_filepath')): + if hasattr(self, "_saved_filepath") and os.path.exists( + getattr(self, "_saved_filepath") + ): os.remove(self._saved_filepath) def test_get_incremental_output(self): @@ -499,8 +503,9 @@ def test_get_tool_output_invalid_tool(self): get_tool_output(123, mock_enc, 1.0, mock_loop_call, 100) # Test with empty dict - with self.assertRaises(ValueError) as cm: - result, cost = get_tool_output({}, mock_enc, 1.0, mock_loop_call, 100) + result, cost = get_tool_output({}, mock_enc, 1.0, mock_loop_call, 100) + self.assertIn("Failure:", result[0]) + self.assertEqual(cost, 0) def test_get_tool_output_exception_handling(self): """Test error handling in get_tool_output""" From 3567333c83947077005d3a58dc6ffc9f95233241 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Mon, 13 Jan 2025 22:59:34 +0530 Subject: [PATCH 21/40] KT prompts different for architect mode --- pyproject.toml | 2 +- src/wcgw/client/mcp_server/server.py | 22 ++----------- src/wcgw/client/modes.py | 47 ++++++++++++++++++++++++++++ uv.lock | 2 +- 4 files changed, 52 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 92b9cb3..acdd9dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }] name = "wcgw" -version = "2.7.2" +version = "2.8.0" description = "Shell and coding agent on claude and chatgpt" readme = "README.md" requires-python = ">=3.11, <3.13" diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index a6309e8..639091b 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -28,6 +28,7 @@ ) from .. import tools from ..computer_use import SLEEP_TIME_MAX_S +from ..modes import get_kt_prompt from ..tools import DoneFlag, default_enc, get_tool_output, which_tool_name COMPUTER_USE_ON_DOCKER_ENABLED = False @@ -51,24 +52,7 @@ async def handle_read_resource(uri: AnyUrl) -> str: name="KnowledgeTransfer", description="Prompt for invoking ContextSave tool in order to do a comprehensive knowledge transfer of a coding task. Prompts to save detailed error log and instructions.", ), - """Use `ContextSave` tool to do a knowledge transfer of the task in hand. -Write detailed description in order to do a KT. -Save all information necessary for a person to understand the task and the problems. - -Format the `description` field using Markdown with the following sections. -- "# Objective" section containing project and task objective. -- "# All user instructions" section should be provided containing all instructions user shared in the conversation. -- "# Current status of the task" should be provided containing only what is already achieved, not what's remaining. -- "# All issues with snippets" section containing snippets of error, traceback, file snippets, commands, etc. But no comments or solutions. -- Be very verbose in the all issues with snippets section providing as much error context as possible. -- "# Build and development instructions" section containing instructions to build or run project or run tests, or envrionment related information. Only include what's known. Leave empty if unknown. -- After the tool completes succesfully, tell me the task id and the file path the tool generated (important!) -- This tool marks end of your conversation, do not run any further tools after calling this. - -Provide all relevant file paths in order to understand and solve the the task. Err towards providing more file paths than fewer. - -(Note to self: this conversation can then be resumed later asking "Resume ``" which should call Initialize tool) -""", + get_kt_prompt, ) } @@ -87,7 +71,7 @@ async def handle_get_prompt( role="user", content=types.TextContent( type="text", - text=PROMPTS[name][1], + text=PROMPTS[name][1](), ), ) ] diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index a2d3d6b..c5d00ea 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -176,3 +176,50 @@ def modes_to_state( mode_impl.write_if_empty_mode, mode_name, ) + + +WCGW_KT = """Use `ContextSave` tool to do a knowledge transfer of the task in hand. +Write detailed description in order to do a KT. +Save all information necessary for a person to understand the task and the problems. + +Format the `description` field using Markdown with the following sections. +- "# Objective" section containing project and task objective. +- "# All user instructions" section should be provided containing all instructions user shared in the conversation. +- "# Current status of the task" should be provided containing only what is already achieved, not what's remaining. +- "# All issues with snippets" section containing snippets of error, traceback, file snippets, commands, etc. But no comments or solutions. +- Be very verbose in the all issues with snippets section providing as much error context as possible. +- "# Build and development instructions" section containing instructions to build or run project or run tests, or envrionment related information. Only include what's known. Leave empty if unknown. +- Any other relevant sections following the above. +- After the tool completes succesfully, tell me the task id and the file path the tool generated (important!) +- This tool marks end of your conversation, do not run any further tools after calling this. + +Provide all relevant file paths in order to understand and solve the the task. Err towards providing more file paths than fewer. + +(Note to self: this conversation can then be resumed later asking "Resume ``" which should call Initialize tool) +""" + + +ARCHITECT_KT = """Use `ContextSave` tool to do a knowledge transfer of the task in hand. +Write detailed description in order to do a KT. +Save all information necessary for a person to understand the task and the problems. + +Format the `description` field using Markdown with the following sections. +- "# Objective" section containing project and task objective. +- "# All user instructions" section should be provided containing all instructions user shared in the conversation. +- "# Designed plan" should be provided containing the designed plan as discussed. +- Any other relevant sections following the above. +- After the tool completes succesfully, tell me the task id and the file path the tool generated (important!) +- This tool marks end of your conversation, do not run any further tools after calling this. + +Provide all relevant file paths in order to understand and solve the the task. Err towards providing more file paths than fewer. + +(Note to self: this conversation can then be resumed later asking "Resume ``" which should call Initialize tool) +""" + +KTS = {Modes.wcgw: WCGW_KT, Modes.architect: ARCHITECT_KT, Modes.code_writer: WCGW_KT} + + +def get_kt_prompt() -> str: + from .tools import BASH_STATE + + return KTS[BASH_STATE.mode] diff --git a/uv.lock b/uv.lock index 3b0c88f..fb8af8a 100644 --- a/uv.lock +++ b/uv.lock @@ -1044,7 +1044,7 @@ wheels = [ [[package]] name = "wcgw" -version = "2.7.2" +version = "2.8.0" source = { editable = "." } dependencies = [ { name = "anthropic" }, From 5db062cbe1b7ebd2fd1ecd7973aee64cb55bccfa Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 00:30:15 +0530 Subject: [PATCH 22/40] Updated initialisation instructions --- src/wcgw/client/mcp_server/server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index 639091b..2637ea9 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -105,6 +105,8 @@ async def handle_list_tools() -> list[types.Tool]: - If user has mentioned any files use `initial_files_to_read` to read, use absolute paths only. - If `any_workspace_path` is provided, a tree structure of the workspace will be shown. - Leave `any_workspace_path` as empty if no file or folder is mentioned. +- Do not set a mode, unless user asks for a specific mode. +- In code-writer mode, set the commands and globs which user asked to set, otherwise use 'all'. """, ), ToolParam( From e616f41b6146227228c8c3c49ccafad5566f49ed Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 13:22:22 +0530 Subject: [PATCH 23/40] Fixed missing wcgw prompt --- src/wcgw/client/tools.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 8238b6a..b9bdbae 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -62,6 +62,7 @@ from .memory import load_memory, save_memory from .modes import ( ARCHITECT_PROMPT, + WCGW_PROMPT, BashCommandMode, FileEditMode, WriteIfEmptyMode, @@ -493,6 +494,9 @@ def initialize( ) elif BASH_STATE.mode == Modes.architect: mode_prompt = ARCHITECT_PROMPT + else: + mode_prompt = WCGW_PROMPT + output = f""" {mode_prompt} From 93b56dadbe16dfb627f08905294e8d558f6ec10f Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 16:11:08 +0530 Subject: [PATCH 24/40] Made mode argument non optional because it wouldn't set to architect many times --- src/wcgw/client/anthropic_client.py | 1 + src/wcgw/client/mcp_server/server.py | 2 +- src/wcgw/client/openai_client.py | 1 + src/wcgw/types_.py | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/wcgw/client/anthropic_client.py b/src/wcgw/client/anthropic_client.py index 94147b3..6311b5b 100644 --- a/src/wcgw/client/anthropic_client.py +++ b/src/wcgw/client/anthropic_client.py @@ -166,6 +166,7 @@ def loop( - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished. - Run long running commands in background using screen instead of "&". - Use longer wait_for_seconds if the command is expected to run for a long time. +- Do not use 'cat' to read files, use ReadFiles tool instead. """, ), ToolParam( diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index 2637ea9..ee1e657 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -105,7 +105,7 @@ async def handle_list_tools() -> list[types.Tool]: - If user has mentioned any files use `initial_files_to_read` to read, use absolute paths only. - If `any_workspace_path` is provided, a tree structure of the workspace will be shown. - Leave `any_workspace_path` as empty if no file or folder is mentioned. -- Do not set a mode, unless user asks for a specific mode. +- By default use mode `wcgw` - In code-writer mode, set the commands and globs which user asked to set, otherwise use 'all'. """, ), diff --git a/src/wcgw/client/openai_client.py b/src/wcgw/client/openai_client.py index aaab351..b963fc2 100644 --- a/src/wcgw/client/openai_client.py +++ b/src/wcgw/client/openai_client.py @@ -176,6 +176,7 @@ def loop( - Always run `pwd` if you get any file or directory not found error to make sure you're not lost. - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished. - Run long running commands in background using screen instead of "&". +- Do not use 'cat' to read files, use ReadFiles tool instead. """, ), openai.pydantic_function_tool( diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index 7c82071..4269891 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -39,7 +39,7 @@ class Initialize(BaseModel): any_workspace_path: str initial_files_to_read: list[str] task_id_to_resume: str - mode: ModesConfig = "wcgw" + mode: ModesConfig class BashCommand(BaseModel): From 1d9638c04d72c832b6de9ae26a8801758cd3fe55 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 18:43:13 +0530 Subject: [PATCH 25/40] Fix loading state in restricted mode disallowed working from project dir --- src/wcgw/client/tools.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index b9bdbae..b696aef 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -137,7 +137,7 @@ def ask_confirmation(prompt: Confirmation) -> str: PROMPT = PROMPT_CONST -def start_shell(is_restricted_mode: bool) -> pexpect.spawn: # type: ignore +def start_shell(is_restricted_mode: bool, initial_dir: str) -> pexpect.spawn: # type: ignore try: cmd = "/bin/bash" if is_restricted_mode: @@ -149,6 +149,7 @@ def start_shell(is_restricted_mode: bool) -> pexpect.spawn: # type: ignore echo=False, encoding="utf-8", timeout=TIMEOUT, + cwd=initial_dir, ) shell.sendline(f"export PS1={PROMPT}") except Exception as e: @@ -223,12 +224,14 @@ def _ensure_env_and_bg_jobs(shell: pexpect.spawn) -> Optional[int]: # type: ign class BashState: def __init__( self, + working_dir: str, bash_command_mode: Optional[BashCommandMode], file_edit_mode: Optional[FileEditMode], write_if_empty_mode: Optional[WriteIfEmptyMode], mode: Optional[Modes], whitelist_for_overwrite: Optional[set[str]] = None, ) -> None: + self._cwd = working_dir or os.getcwd() self._bash_command_mode: BashCommandMode = bash_command_mode or BashCommandMode( "normal_mode", "all" ) @@ -260,9 +263,9 @@ def write_if_empty_mode(self) -> WriteIfEmptyMode: def _init(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" self._is_in_docker: Optional[str] = "" - self._cwd: str = os.getcwd() self._shell = start_shell( - self._bash_command_mode.bash_mode == "restricted_mode" + self._bash_command_mode.bash_mode == "restricted_mode", + self._cwd, ) self._pending_output = "" @@ -344,12 +347,13 @@ def load_state( write_if_empty_mode: WriteIfEmptyMode, mode: Modes, whitelist_for_overwrite: list[str], + cwd: str, ) -> None: """Create a new BashState instance from a serialized state dictionary""" if bash_command_mode != self._bash_command_mode: self._bash_command_mode = bash_command_mode - self.reset() - + self._cwd = cwd or self._cwd + self.reset() self._file_edit_mode = file_edit_mode self._write_if_empty_mode = write_if_empty_mode self._whitelist_for_overwrite = set(whitelist_for_overwrite) @@ -383,7 +387,7 @@ def pending_output(self) -> str: return self._pending_output -BASH_STATE = BashState(None, None, None, None) +BASH_STATE = BashState(os.getcwd(), None, None, None, None) INITIALIZED = False @@ -447,6 +451,7 @@ def initialize( parsed_state[2], parsed_state[3], parsed_state[4], + str(folder_to_start) if folder_to_start else "", ) else: state = modes_to_state(mode) @@ -456,6 +461,7 @@ def initialize( state[2], state[3], parsed_state[4] + list(BASH_STATE.whitelist_for_overwrite), + str(folder_to_start) if folder_to_start else "", ) except ValueError: console.print(traceback.format_exc()) @@ -469,6 +475,7 @@ def initialize( state[2], state[3], list(BASH_STATE.whitelist_for_overwrite), + str(folder_to_start) if folder_to_start else "", ) del mode From c8355dc763a2ec6cf3a4e19791ce4609675b5bc5 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 20:09:16 +0530 Subject: [PATCH 26/40] Tests fix --- tests/client/test_tools_extended.py | 4 ++-- tests/client/test_tools_shell.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/client/test_tools_extended.py b/tests/client/test_tools_extended.py index fa7152b..175adc4 100644 --- a/tests/client/test_tools_extended.py +++ b/tests/client/test_tools_extended.py @@ -224,7 +224,7 @@ def test_start_shell(self, mock_spawn): mock_spawn.return_value = mock_shell # Test successful shell start - shell = start_shell(is_restricted_mode=False) + shell = start_shell(is_restricted_mode=False, initial_dir="/") self.assertEqual(shell, mock_shell) # Verify shell initialization @@ -236,7 +236,7 @@ def test_start_shell(self, mock_spawn): # Test restricted mode mock_shell.reset_mock() - shell = start_shell(is_restricted_mode=True) + shell = start_shell(is_restricted_mode=True, initial_dir="/") self.assertEqual(shell, mock_shell) def test_save_out_of_context(self): diff --git a/tests/client/test_tools_shell.py b/tests/client/test_tools_shell.py index a3482be..74f7e27 100644 --- a/tests/client/test_tools_shell.py +++ b/tests/client/test_tools_shell.py @@ -63,7 +63,7 @@ def test_start_shell_error_handling(self, mock_os, mock_spawn): mock_spawn.return_value = mock_shell # Test successful shell start with non-restricted mode - shell = start_shell(is_restricted_mode=False) + shell = start_shell(is_restricted_mode=False, initial_dir="/") self.assertEqual(shell, mock_shell) # Verify shell initialization commands @@ -74,11 +74,11 @@ def test_start_shell_error_handling(self, mock_os, mock_spawn): # Test error handling with fallback # Test error handling with fallback in both modes mock_spawn.side_effect = [Exception("Failed"), mock_shell] - shell = start_shell(is_restricted_mode=False) + shell = start_shell(is_restricted_mode=False, initial_dir="/") self.assertEqual(shell, mock_shell) mock_spawn.side_effect = [Exception("Failed"), mock_shell] - shell = start_shell(is_restricted_mode=True) + shell = start_shell(is_restricted_mode=True, initial_dir="/") self.assertEqual(shell, mock_shell) def test_is_int_validation(self): From be9da7ffc9d4e9d3718a5e202e2b1ebbe56d94f7 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 20:13:16 +0530 Subject: [PATCH 27/40] error description fix, specifiying how to change mode --- src/wcgw/client/mcp_server/server.py | 1 + src/wcgw/client/tools.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index ee1e657..0b4c360 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -107,6 +107,7 @@ async def handle_list_tools() -> list[types.Tool]: - Leave `any_workspace_path` as empty if no file or folder is mentioned. - By default use mode `wcgw` - In code-writer mode, set the commands and globs which user asked to set, otherwise use 'all'. +- In order to change the mode later, call this tool again but be sure to not provide any other argument like task_id_to_resume unnecessarily. """, ), ToolParam( diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index b696aef..f3cb545 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -939,7 +939,7 @@ def write_file( if allowed_globs != "all" and not any( fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs ): - return f"Error: updating file {path_} not allowed in current mode. Matches restricted glob: {allowed_globs}" + return f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}" add_overwrite_warning = "" if not BASH_STATE.is_in_docker: if (error_on_exist or error_on_exist_) and os.path.exists(path_): @@ -1077,7 +1077,7 @@ def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int]) -> str: fnmatch.fnmatch(path_, pattern) for pattern in allowed_globs ): raise Exception( - f"Error: updating file {path_} not allowed in current mode. Matches restricted glob: {allowed_globs}" + f"Error: updating file {path_} not allowed in current mode. Doesn't match allowed globs: {allowed_globs}" ) # The LLM is now aware that the file exists From ba236e406edb8e5a47259979a1aa79195b8a5c5e Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 20:22:33 +0530 Subject: [PATCH 28/40] Prompt improvement --- src/wcgw/client/modes.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index c5d00ea..2469121 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -59,27 +59,27 @@ def code_writer_prompt( """ path_prompt = """ - - You are allowed to edit or update files in the provided repository only. + - You are allowed to run FileEdit in the provided repository only. """ if allowed_file_edit_globs != "all" and allowed_file_edit_globs: path_prompt = f""" -- You are allowed to edit and update files only in the following globs: {', '.join(allowed_file_edit_globs)} +- You are allowed to run FileEdit for files matching only the following globs: {', '.join(allowed_file_edit_globs)} """ base += path_prompt path_prompt = """ - - You are allowed to create new files in the provided repository only. + - You are allowed to run WriteIfEmpty in the provided repository only. """ if all_write_new_globs != "all" and all_write_new_globs: path_prompt = f""" -- You are allowed to create new files only in the following globs: {', '.join(allowed_file_edit_globs)} +- You are allowed to run WriteIfEmpty files matching only the following globs: {', '.join(allowed_file_edit_globs)} """ base += path_prompt command_prompt = """ -- You are only allowed to run commands for project setup, code writing, testing, running and debugging related to the proejct. +- You are only allowed to run commands for project setup, code writing, editing, updating, testing, running and debugging related to the project. - Do not run anything that adds or removes packages, changes system configuration or environment. """ if allowed_commands != "all": From cb17a7008f06e097d19b403ce08191e464745dcb Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 22:27:37 +0530 Subject: [PATCH 29/40] Remove unused code --- src/wcgw/client/tools.py | 28 ++-- tests/client/test_tools_extended.py | 2 +- tests/client/test_tools_file_ops.py | 133 +++++++++++------- tests/client/test_tools_validation.py | 11 +- tests/client/tools/test_command_validation.py | 2 +- tests/client/tools/test_user_interaction.py | 42 +++--- 6 files changed, 125 insertions(+), 93 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index f3cb545..d2daabc 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -242,7 +242,7 @@ def __init__( self._mode = mode or Modes.wcgw self._whitelist_for_overwrite: set[str] = whitelist_for_overwrite or set() - self._init() + self._init_shell() @property def mode(self) -> Modes: @@ -260,7 +260,7 @@ def file_edit_mode(self) -> FileEditMode: def write_if_empty_mode(self) -> WriteIfEmptyMode: return self._write_if_empty_mode - def _init(self) -> None: + def _init_shell(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" self._is_in_docker: Optional[str] = "" self._shell = start_shell( @@ -314,9 +314,9 @@ def update_cwd(self) -> str: self._cwd = current_dir return current_dir - def reset(self) -> None: + def reset_shell(self) -> None: self.shell.close(True) - self._init() + self._init_shell() def serialize(self) -> dict[str, Any]: """Serialize BashState to a dictionary for saving""" @@ -350,14 +350,13 @@ def load_state( cwd: str, ) -> None: """Create a new BashState instance from a serialized state dictionary""" - if bash_command_mode != self._bash_command_mode: - self._bash_command_mode = bash_command_mode + self._bash_command_mode = bash_command_mode self._cwd = cwd or self._cwd - self.reset() self._file_edit_mode = file_edit_mode self._write_if_empty_mode = write_if_empty_mode self._whitelist_for_overwrite = set(whitelist_for_overwrite) self._mode = mode + self.reset_shell() def get_pending_for(self) -> str: if isinstance(self._state, datetime.datetime): @@ -415,9 +414,7 @@ def initialize( lambda x: default_enc.decode(x), ) memory = "Following is the retrieved task:\n" + task_mem - if ( - not any_workspace_path or not os.path.exists(any_workspace_path) - ) and os.path.exists(project_root_path): + if os.path.exists(project_root_path): any_workspace_path = project_root_path except Exception: @@ -450,7 +447,7 @@ def initialize( parsed_state[1], parsed_state[2], parsed_state[3], - parsed_state[4], + parsed_state[4] + list(BASH_STATE.whitelist_for_overwrite), str(folder_to_start) if folder_to_start else "", ) else: @@ -479,11 +476,6 @@ def initialize( ) del mode - if folder_to_start: - BASH_STATE.shell.sendline(f"cd {shlex.quote(str(folder_to_start))}") - BASH_STATE.shell.expect(PROMPT, timeout=0.2) - BASH_STATE.update_cwd() - initial_files_context = "" if read_files_: initial_files = read_files(read_files_, max_tokens) @@ -528,7 +520,7 @@ def initialize( def reset_shell() -> str: - BASH_STATE.reset() + BASH_STATE.reset_shell() return "Reset successful" + get_status() @@ -814,7 +806,7 @@ def execute_bash( console.print(traceback.format_exc()) console.print("Malformed output, restarting shell", style="red") # Malformed output, restart shell - BASH_STATE.reset() + BASH_STATE.reset_shell() output = "(exit shell has restarted)" return output, 0 diff --git a/tests/client/test_tools_extended.py b/tests/client/test_tools_extended.py index 175adc4..9be059c 100644 --- a/tests/client/test_tools_extended.py +++ b/tests/client/test_tools_extended.py @@ -41,7 +41,7 @@ def tearDown(self): INITIALIZED = False # Reset initialization state TOOL_CALLS = [] # Clear tool calls try: - BASH_STATE.reset() # Reset bash state + BASH_STATE.reset_shell() # Reset bash state except Exception as e: print(f"Warning: Failed to reset BASH_STATE: {e}") # Clean up any temporary files or directories diff --git a/tests/client/test_tools_file_ops.py b/tests/client/test_tools_file_ops.py index 975899a..268435f 100644 --- a/tests/client/test_tools_file_ops.py +++ b/tests/client/test_tools_file_ops.py @@ -1,49 +1,51 @@ -import unittest -from unittest.mock import MagicMock, patch, mock_open import os -import websockets -from websockets.exceptions import ConnectionClosedError +import unittest +from unittest.mock import MagicMock, mock_open, patch + from websockets import frames -from pathlib import Path +from websockets.exceptions import ConnectionClosedError + from wcgw.client.tools import ( BASH_STATE, - read_image_from_shell, - write_file, + ImageData, expand_user, + read_image_from_shell, serve_image_in_bg, - ImageData, + write_file, ) from wcgw.types_ import WriteIfEmpty + class TestToolsFileOps(unittest.TestCase): def setUp(self): - from wcgw.client.tools import INITIALIZED, TOOL_CALLS, BASH_STATE + from wcgw.client.tools import BASH_STATE, INITIALIZED, TOOL_CALLS + global INITIALIZED, TOOL_CALLS INITIALIZED = False TOOL_CALLS = [] - if hasattr(BASH_STATE, 'reset'): + if hasattr(BASH_STATE, "reset"): try: - BASH_STATE.reset() + BASH_STATE.reset_shell() except Exception: pass self.mock_tokenizer = MagicMock() self.mock_tokenizer.encode.return_value.ids = [1, 2, 3] self.mock_tokenizer.decode.return_value = "decoded text" - @patch('os.path.exists') + @patch("os.path.exists") def test_read_image_with_docker(self, mock_exists): # Set up docker environment BASH_STATE.set_in_docker("test_container") - + mock_exists.return_value = True - with patch('builtins.open', mock_open(read_data=b'test_image_data')): - with patch('os.system', return_value=0): + with patch("builtins.open", mock_open(read_data=b"test_image_data")): + with patch("os.system", return_value=0): result = read_image_from_shell("/test/image.png") self.assertIsInstance(result, ImageData) self.assertEqual(result.media_type, "image/png") # Test docker copy failure - with patch('os.system', return_value=1): + with patch("os.system", return_value=1): with self.assertRaises(Exception): read_image_from_shell("/test/image.png") @@ -63,7 +65,7 @@ def test_expand_user(self): result = expand_user("/absolute/path", None) self.assertEqual(result, "/absolute/path") - @patch('wcgw.client.tools.syncconnect') + @patch("wcgw.client.tools.syncconnect") def test_serve_image_in_bg(self, mock_connect): # Test successful image serving mock_websocket1 = MagicMock() @@ -73,13 +75,17 @@ def test_serve_image_in_bg(self, mock_connect): # First successful case mock_connect.side_effect = [ - type('Context', (), { - '__enter__': lambda x: mock_websocket1, - '__exit__': lambda x, exc_type, exc_val, exc_tb: None - })() + type( + "Context", + (), + { + "__enter__": lambda x: mock_websocket1, + "__exit__": lambda x, exc_type, exc_val, exc_tb: None, + }, + )() ] - with patch('builtins.open', mock_open(read_data=b'test_image_data')): + with patch("builtins.open", mock_open(read_data=b"test_image_data")): serve_image_in_bg("/test/image.jpg", "test-uuid", "test-name") mock_websocket1.send.assert_called_once() @@ -87,50 +93,66 @@ def test_serve_image_in_bg(self, mock_connect): mock_websocket3 = MagicMock() rcvd_frame = frames.Close(code=1006, reason="Connection closed abnormally") sent_frame = frames.Close(code=1000, reason="Normal close") - mock_websocket3.send.side_effect = ConnectionClosedError(rcvd_frame, sent_frame, True) + mock_websocket3.send.side_effect = ConnectionClosedError( + rcvd_frame, sent_frame, True + ) mock_websocket4 = MagicMock() mock_websocket4.send = MagicMock() - + mock_connect.side_effect = [ - type('Context', (), { - '__enter__': lambda x: mock_websocket3, - '__exit__': lambda x, exc_type, exc_val, exc_tb: None - })(), - type('Context', (), { - '__enter__': lambda x: mock_websocket4, - '__exit__': lambda x, exc_type, exc_val, exc_tb: None - })() + type( + "Context", + (), + { + "__enter__": lambda x: mock_websocket3, + "__exit__": lambda x, exc_type, exc_val, exc_tb: None, + }, + )(), + type( + "Context", + (), + { + "__enter__": lambda x: mock_websocket4, + "__exit__": lambda x, exc_type, exc_val, exc_tb: None, + }, + )(), ] - with patch('builtins.open', mock_open(read_data=b'test_image_data')): + with patch("builtins.open", mock_open(read_data=b"test_image_data")): serve_image_in_bg("/test/image.jpg", "test-uuid", "test-name") self.assertEqual(mock_websocket4.send.call_count, 1) - @patch('pathlib.Path.mkdir') - @patch('pathlib.Path.open', new_callable=mock_open) + @patch("pathlib.Path.mkdir") + @patch("pathlib.Path.open", new_callable=mock_open) def test_write_file_with_overwrite(self, mock_file, mock_mkdir): # Test overwriting whitelisted file - test_file = WriteIfEmpty(file_path="/test/file.txt", file_content="test content") + test_file = WriteIfEmpty( + file_path="/test/file.txt", file_content="test content" + ) BASH_STATE.add_to_whitelist_for_overwrite("/test/file.txt") - - with patch('os.path.exists', return_value=True): - with patch('pathlib.Path.read_text', return_value="old content"): + + with patch("os.path.exists", return_value=True): + with patch("pathlib.Path.read_text", return_value="old content"): result = write_file(test_file, error_on_exist=True, max_tokens=100) self.assertIn("Success", result) # Test overwriting non-whitelisted file - test_file = WriteIfEmpty(file_path="/test/new_file.txt", file_content="test content") - with patch('os.path.exists', return_value=True): - with patch('pathlib.Path.read_text', return_value="existing content"): + test_file = WriteIfEmpty( + file_path="/test/new_file.txt", file_content="test content" + ) + with patch("os.path.exists", return_value=True): + with patch("pathlib.Path.read_text", return_value="existing content"): result = write_file(test_file, error_on_exist=True, max_tokens=100) self.assertIn("Error: can't write to existing file", result) - @patch('os.system') + @patch("os.system") def test_write_file_docker(self, mock_system): # Set up docker environment BASH_STATE.set_in_docker("test_container") - test_file = WriteIfEmpty(file_path="/test/file.txt", file_content="test content") + test_file = WriteIfEmpty( + file_path="/test/file.txt", file_content="test content" + ) # Test successful write mock_system.return_value = 0 @@ -147,18 +169,25 @@ def test_write_file_docker(self, mock_system): def test_write_file_validation(self): # Test relative path - test_file = WriteIfEmpty(file_path="relative/path.txt", file_content="test content") + test_file = WriteIfEmpty( + file_path="relative/path.txt", file_content="test content" + ) result = write_file(test_file, error_on_exist=False, max_tokens=100) self.assertIn("Failure: file_path should be absolute path", result) # Test OSError handling - test_file = WriteIfEmpty(file_path="/test/file.txt", file_content="test content") - with patch('pathlib.Path.open', side_effect=OSError("Permission denied")): - with patch('pathlib.Path.mkdir'): - with patch('os.path.exists', return_value=False): - with patch('os.path.isabs', return_value=True): - result = write_file(test_file, error_on_exist=False, max_tokens=100) + test_file = WriteIfEmpty( + file_path="/test/file.txt", file_content="test content" + ) + with patch("pathlib.Path.open", side_effect=OSError("Permission denied")): + with patch("pathlib.Path.mkdir"): + with patch("os.path.exists", return_value=False): + with patch("os.path.isabs", return_value=True): + result = write_file( + test_file, error_on_exist=False, max_tokens=100 + ) self.assertIn("Error: Permission denied", result) -if __name__ == '__main__': + +if __name__ == "__main__": unittest.main() diff --git a/tests/client/test_tools_validation.py b/tests/client/test_tools_validation.py index dbc94ec..5d6de31 100644 --- a/tests/client/test_tools_validation.py +++ b/tests/client/test_tools_validation.py @@ -23,24 +23,25 @@ class TestToolsValidation(unittest.TestCase): def setUp(self): self.maxDiff = None from wcgw.client.tools import BASH_STATE, initialize - - BASH_STATE.reset() + + BASH_STATE.reset_shell() # Properly initialize tools for testing initialize( any_workspace_path="", read_files_=[], task_id_to_resume="", max_tokens=None, - mode="wcgw" + mode="wcgw", ) def tearDown(self): - from wcgw.client.tools import INITIALIZED, TOOL_CALLS, BASH_STATE + from wcgw.client.tools import BASH_STATE, INITIALIZED, TOOL_CALLS + global INITIALIZED, TOOL_CALLS INITIALIZED = False # Reset initialization state TOOL_CALLS = [] # Clear tool calls try: - BASH_STATE.reset() # Reset bash state + BASH_STATE.reset_shell() # Reset bash state except: pass diff --git a/tests/client/tools/test_command_validation.py b/tests/client/tools/test_command_validation.py index 9ae1a25..1d918dd 100644 --- a/tests/client/tools/test_command_validation.py +++ b/tests/client/tools/test_command_validation.py @@ -23,7 +23,7 @@ class TestCommandValidation(unittest.TestCase): def setUp(self): self.maxDiff = None - BASH_STATE.reset() + BASH_STATE.reset_shell() def test_which_tool(self): """Test tool type determination""" diff --git a/tests/client/tools/test_user_interaction.py b/tests/client/tools/test_user_interaction.py index 3561232..54055db 100644 --- a/tests/client/tools/test_user_interaction.py +++ b/tests/client/tools/test_user_interaction.py @@ -1,24 +1,27 @@ """Tests for user interaction functionality in tools.py""" -import unittest -from unittest.mock import patch, MagicMock + import os +import unittest from pathlib import Path +from unittest.mock import MagicMock, patch + from wcgw.client.tools import ( - update_repl_prompt, + BASH_STATE, get_status, save_out_of_context, truncate_if_over, - BASH_STATE, + update_repl_prompt, ) + class TestUserInteraction(unittest.TestCase): def setUp(self): self.maxDiff = None - BASH_STATE.reset() + BASH_STATE.reset_shell() def test_update_repl_prompt(self): """Test REPL prompt updating""" - with patch('wcgw.client.tools.BASH_STATE') as mock_state: + with patch("wcgw.client.tools.BASH_STATE") as mock_state: mock_state.shell = MagicMock() mock_state.shell.before = "new_prompt" mock_state.shell.expect.return_value = 1 @@ -33,7 +36,7 @@ def test_update_repl_prompt(self): def test_get_status(self): """Test status reporting""" - with patch('wcgw.client.tools.BASH_STATE') as mock_state: + with patch("wcgw.client.tools.BASH_STATE") as mock_state: # Test pending state mock_state.state = "pending" mock_state.cwd = "/test/dir" @@ -47,9 +50,11 @@ def test_get_status(self): # Test completed state mock_state.state = "repl" mock_state.update_cwd.return_value = "/test/dir2" - with patch('wcgw.client.tools._ensure_env_and_bg_jobs', return_value=2): + with patch("wcgw.client.tools._ensure_env_and_bg_jobs", return_value=2): status = get_status() - self.assertIn("status = process exited; 2 background jobs running", status) + self.assertIn( + "status = process exited; 2 background jobs running", status + ) self.assertIn("cwd = /test/dir2", status) def test_save_out_of_context(self): @@ -80,13 +85,17 @@ def test_save_out_of_context(self): def test_truncate_if_over(self): """Test content truncation based on token limits""" - with patch('wcgw.client.tools.default_enc') as mock_enc: + with patch("wcgw.client.tools.default_enc") as mock_enc: # Test content under limit content = "short content" mock_enc.encode.return_value = MagicMock(ids=list(range(5))) # Under limit result = truncate_if_over(content, max_tokens=10) - self.assertEqual(result, content) # Should return original content when under limit - self.assertEqual(mock_enc.decode.call_count, 0) # Decode shouldn't be called + self.assertEqual( + result, content + ) # Should return original content when under limit + self.assertEqual( + mock_enc.decode.call_count, 0 + ) # Decode shouldn't be called # Test with content over limit long_content = "very long content" * 50 @@ -94,10 +103,10 @@ def test_truncate_if_over(self): mock_encoding.ids = list(range(200)) # Over limit mock_encoding.__len__.return_value = 200 # Make len(tokens) return 200 mock_enc.encode.return_value = mock_encoding - mock_enc.decode.return_value = "truncated content" + mock_enc.decode.return_value = "truncated content" result = truncate_if_over(long_content, max_tokens=100) - + # In truncate_if_over: max(0, max_tokens - 100) = max(0, 100-100) truncated_ids = [] # Since 100-100 = 0, max(0, 0) = 0 mock_enc.decode.assert_called_once_with(truncated_ids) @@ -112,5 +121,6 @@ def test_truncate_if_over(self): result = truncate_if_over(content, max_tokens=0) self.assertEqual(result, content) -if __name__ == '__main__': - unittest.main() \ No newline at end of file + +if __name__ == "__main__": + unittest.main() From d040c24910343b71b7651d9102167fe2e94b7f2e Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 22:38:06 +0530 Subject: [PATCH 30/40] updated code writer prompt --- src/wcgw/client/modes.py | 62 +++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 2469121..57cfa72 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -4,11 +4,6 @@ from ..types_ import Modes, ModesConfig -@dataclass -class RestrictedGlobs: - allowed_globs: list[str] - - class BashCommandMode(NamedTuple): bash_mode: Literal["normal_mode", "restricted_mode"] allowed_commands: Literal["all", "none"] @@ -55,16 +50,22 @@ def code_writer_prompt( all_write_new_globs: Literal["all"] | list[str], allowed_commands: Literal["all"] | list[str], ) -> str: - base = """You have to run in "code_writer" mode. This means + base = """ +You have to run in "code_writer" mode. """ path_prompt = """ - You are allowed to run FileEdit in the provided repository only. """ - if allowed_file_edit_globs != "all" and allowed_file_edit_globs: - path_prompt = f""" -- You are allowed to run FileEdit for files matching only the following globs: {', '.join(allowed_file_edit_globs)} + if allowed_file_edit_globs != "all": + if allowed_file_edit_globs: + path_prompt = f""" + - You are allowed to run FileEdit for files matching only the following globs: {', '.join(allowed_file_edit_globs)} +""" + else: + path_prompt = """ + - You are not allowed to run FileEdit. """ base += path_prompt @@ -72,25 +73,40 @@ def code_writer_prompt( - You are allowed to run WriteIfEmpty in the provided repository only. """ - if all_write_new_globs != "all" and all_write_new_globs: - path_prompt = f""" -- You are allowed to run WriteIfEmpty files matching only the following globs: {', '.join(allowed_file_edit_globs)} + if all_write_new_globs != "all": + if all_write_new_globs: + path_prompt = f""" + - You are allowed to run WriteIfEmpty files matching only the following globs: {', '.join(allowed_file_edit_globs)} +""" + else: + path_prompt = """ + - You are not allowed to run WriteIfEmpty. """ base += path_prompt - command_prompt = """ -- You are only allowed to run commands for project setup, code writing, editing, updating, testing, running and debugging related to the project. -- Do not run anything that adds or removes packages, changes system configuration or environment. + run_command_common = """ + - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the programs don't show any update but they still are running. + - Do not use echo to write multi-line files, always use FileEdit tool to update a code. + - Do not provide code snippets unless asked by the user, instead directly add/edit the code. + - You should use the provided bash execution, reading and writing file tools to complete objective. + - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) + - Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using wcgw tools. +""" + + command_prompt = f""" + - You are only allowed to run commands for project setup, code writing, editing, updating, testing, running and debugging related to the project. + - Do not run anything that adds or removes packages, changes system configuration or environment. +{run_command_common} """ if allowed_commands != "all": - command_prompt = f""" -- You are only allowed to run the following commands: {', '.join(allowed_commands)} -- Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the programs don't show any update but they still are running. -- Do not use echo to write multi-line files, always use FileEdit tool to update a code. -- Do not provide code snippets unless asked by the user, instead directly add/edit the code. -- You should use the provided bash execution, reading and writing file tools to complete objective. -- First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.) -- Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using wcgw tools. + if allowed_commands: + command_prompt = f""" + - You are only allowed to run the following commands: {', '.join(allowed_commands)} +{run_command_common} +""" + else: + command_prompt = """ + - You are not allowed to run any commands. """ base += command_prompt From 27bd575c4b9fe3ab321f39014758e14ce740b06e Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 22:41:34 +0530 Subject: [PATCH 31/40] In initialize read files could be relative --- src/wcgw/client/tools.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index d2daabc..b4a84ea 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -478,6 +478,11 @@ def initialize( initial_files_context = "" if read_files_: + if folder_to_start: + read_files_ = [ + os.path.join(folder_to_start, f) if not os.path.isabs(f) else f + for f in read_files_ + ] initial_files = read_files(read_files_, max_tokens) initial_files_context = f"---\n# Requested files\n{initial_files}\n---\n" From 537553b4050362409048cddfadeb746f13e88627 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 22:46:33 +0530 Subject: [PATCH 32/40] remove unused code --- src/wcgw/client/tools.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index b4a84ea..182fe63 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -816,26 +816,6 @@ def execute_bash( return output, 0 -def serve_image_in_bg(file_path: str, client_uuid: str, name: str) -> None: - if not client_uuid: - client_uuid = str(uuid.uuid4()) - - server_url = "wss://wcgw.arcfu.com/register_serve_image" - - with open(file_path, "rb") as image_file: - image_bytes = image_file.read() - media_type = mimetypes.guess_type(file_path)[0] - image_b64 = base64.b64encode(image_bytes).decode("utf-8") - uu = {"name": name, "image_b64": image_b64, "media_type": media_type} - - with syncconnect(f"{server_url}/{client_uuid}") as websocket: - try: - websocket.send(json.dumps(uu)) - except websockets.ConnectionClosed: - console.print(f"Connection closed for UUID: {client_uuid}, retrying") - serve_image_in_bg(file_path, client_uuid, name) - - MEDIA_TYPES = Literal["image/jpeg", "image/png", "image/gif", "image/webp"] From 5ffee5cec99b683b358a2eba6547fcafcd1b31a9 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 22:58:52 +0530 Subject: [PATCH 33/40] Creating working folder in shell init --- src/wcgw/client/tools.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 182fe63..7150dc9 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -263,6 +263,8 @@ def write_if_empty_mode(self) -> WriteIfEmptyMode: def _init_shell(self) -> None: self._state: Literal["repl"] | datetime.datetime = "repl" self._is_in_docker: Optional[str] = "" + # Ensure self._cwd exists + os.makedirs(self._cwd, exist_ok=True) self._shell = start_shell( self._bash_command_mode.bash_mode == "restricted_mode", self._cwd, From 718a938eb23f4f146e061cc2fd1b1dff6dd64acc Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 23:08:11 +0530 Subject: [PATCH 34/40] Changed the structure of types_ since gpt wasn't able to pick it up --- gpt_action_json_schema.json | 64 +++++++++++++++++++++++++++++++++++++ gpt_instructions.txt | 4 +++ src/wcgw/types_.py | 23 +++++++++++-- 3 files changed, 89 insertions(+), 2 deletions(-) diff --git a/gpt_action_json_schema.json b/gpt_action_json_schema.json index 9d299e3..cd652b8 100644 --- a/gpt_action_json_schema.json +++ b/gpt_action_json_schema.json @@ -408,6 +408,47 @@ ], "title": "BashInteractionWithUUID" }, + "CodeWriterMode": { + "properties": { + "allowed_globs": { + "anyOf": [ + { + "type": "string", + "const": "all" + }, + { + "items": { + "type": "string" + }, + "type": "array" + } + ], + "title": "Allowed Globs" + }, + "allowed_commands": { + "anyOf": [ + { + "type": "string", + "const": "all" + }, + { + "items": { + "type": "string" + }, + "type": "array" + } + ], + "title": "Allowed Commands" + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "allowed_globs", + "allowed_commands" + ], + "title": "CodeWriterMode" + }, "CommandWithUUID": { "properties": { "command": { @@ -520,6 +561,19 @@ "type": "string", "title": "Task Id To Resume" }, + "mode_name": { + "$ref": "#/components/schemas/Modes" + }, + "code_writer_config": { + "anyOf": [ + { + "$ref": "#/components/schemas/CodeWriterMode" + }, + { + "type": "null" + } + ] + }, "user_id": { "type": "string", "format": "uuid", @@ -532,10 +586,20 @@ "any_workspace_path", "initial_files_to_read", "task_id_to_resume", + "mode_name", "user_id" ], "title": "InitializeWithUUID" }, + "Modes": { + "type": "string", + "enum": [ + "wcgw", + "architect", + "code_writer" + ], + "title": "Modes" + }, "ReadFileWithUUID": { "properties": { "file_paths": { diff --git a/gpt_instructions.txt b/gpt_instructions.txt index 92bc34e..b539049 100644 --- a/gpt_instructions.txt +++ b/gpt_instructions.txt @@ -15,6 +15,10 @@ Instructions for `Initialize`: - If the user has mentioned a folder or file with unclear project root, use the file or folder as `any_workspace_path`. - If user has mentioned any files use `initial_files_to_read` to read, use absolute paths only. - If `any_workspace_path` is provided, a tree structure of the workspace will be shown. + - Leave `any_workspace_path` as empty if no file or folder is mentioned. + - By default use mode `wcgw` + - In code-writer mode, set the commands and globs which user asked to set, otherwise use 'all'. + - In order to change the mode later, call this tool again but be sure to not provide any other argument like task_id_to_resume unnecessarily. Instructions for `BashCommand`: - Execute a bash command. This is stateful (beware with subsequent calls). diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index 4269891..0d6e07d 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -1,6 +1,6 @@ import os from enum import Enum -from typing import Literal, Optional, Sequence, Union +from typing import Any, Literal, Optional, Sequence, Union from pydantic import BaseModel as PydanticBaseModel @@ -39,7 +39,26 @@ class Initialize(BaseModel): any_workspace_path: str initial_files_to_read: list[str] task_id_to_resume: str - mode: ModesConfig + mode_name: Modes + code_writer_config: Optional[CodeWriterMode] = None + + def model_post_init(self, __context: Any) -> None: + if self.mode_name == Modes.code_writer: + assert ( + self.code_writer_config is not None + ), "code_writer_config can't be null when the mode is code_writer" + return super().model_post_init(__context) + + @property + def mode(self) -> ModesConfig: + if self.mode_name == Modes.wcgw: + return "wcgw" + if self.mode_name == Modes.architect: + return "architect" + assert ( + self.code_writer_config is not None + ), "code_writer_config can't be null when the mode is code_writer" + return self.code_writer_config class BashCommand(BaseModel): From 77681cb1b029e60686941668c4b930ef9024b3c2 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 23:34:10 +0530 Subject: [PATCH 35/40] Prompt improvements --- src/wcgw/client/modes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index 57cfa72..b38547f 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -52,6 +52,7 @@ def code_writer_prompt( ) -> str: base = """ You have to run in "code_writer" mode. + - You are not allowed to change directory (bash will run in -r mode) """ path_prompt = """ @@ -139,6 +140,7 @@ def code_writer_prompt( - You are not allowed to run any commands that may change disk, system configuration, packages or environment. Only read-only commands are allowed. - Only run commands that allows you to explore the repository, understand the system or read anything of relevance. - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the programs don't show any update but they still are running. +- You are not allowed to change directory (bash will run in -r mode) Your response should be in self-critique and brainstorm style. - Read as many relevant files as possible. From e69f3961777326183115e6ed501762939de1c6eb Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 23:39:08 +0530 Subject: [PATCH 36/40] Code writer operates in normal bash mode --- src/wcgw/client/modes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wcgw/client/modes.py b/src/wcgw/client/modes.py index b38547f..5430d3d 100644 --- a/src/wcgw/client/modes.py +++ b/src/wcgw/client/modes.py @@ -52,7 +52,6 @@ def code_writer_prompt( ) -> str: base = """ You have to run in "code_writer" mode. - - You are not allowed to change directory (bash will run in -r mode) """ path_prompt = """ @@ -161,7 +160,7 @@ def code_writer_prompt( file_edit_mode=FileEditMode([]), ), Modes.code_writer: ModeImpl( - bash_command_mode=BashCommandMode("restricted_mode", "all"), + bash_command_mode=BashCommandMode("normal_mode", "all"), write_if_empty_mode=WriteIfEmptyMode("all"), file_edit_mode=FileEditMode("all"), ), From b7fd6aa1bb6482c1e81bbba96b1c170c90da592a Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Tue, 14 Jan 2025 23:52:07 +0530 Subject: [PATCH 37/40] Test case remove unused --- tests/client/test_tools_file_ops.py | 61 ----------------------------- 1 file changed, 61 deletions(-) diff --git a/tests/client/test_tools_file_ops.py b/tests/client/test_tools_file_ops.py index 268435f..beb9840 100644 --- a/tests/client/test_tools_file_ops.py +++ b/tests/client/test_tools_file_ops.py @@ -2,15 +2,11 @@ import unittest from unittest.mock import MagicMock, mock_open, patch -from websockets import frames -from websockets.exceptions import ConnectionClosedError - from wcgw.client.tools import ( BASH_STATE, ImageData, expand_user, read_image_from_shell, - serve_image_in_bg, write_file, ) from wcgw.types_ import WriteIfEmpty @@ -65,63 +61,6 @@ def test_expand_user(self): result = expand_user("/absolute/path", None) self.assertEqual(result, "/absolute/path") - @patch("wcgw.client.tools.syncconnect") - def test_serve_image_in_bg(self, mock_connect): - # Test successful image serving - mock_websocket1 = MagicMock() - mock_websocket1.send = MagicMock() - mock_websocket2 = MagicMock() - mock_websocket2.send = MagicMock() - - # First successful case - mock_connect.side_effect = [ - type( - "Context", - (), - { - "__enter__": lambda x: mock_websocket1, - "__exit__": lambda x, exc_type, exc_val, exc_tb: None, - }, - )() - ] - - with patch("builtins.open", mock_open(read_data=b"test_image_data")): - serve_image_in_bg("/test/image.jpg", "test-uuid", "test-name") - mock_websocket1.send.assert_called_once() - - # Test retry case - first connection fails, second succeeds - mock_websocket3 = MagicMock() - rcvd_frame = frames.Close(code=1006, reason="Connection closed abnormally") - sent_frame = frames.Close(code=1000, reason="Normal close") - mock_websocket3.send.side_effect = ConnectionClosedError( - rcvd_frame, sent_frame, True - ) - mock_websocket4 = MagicMock() - mock_websocket4.send = MagicMock() - - mock_connect.side_effect = [ - type( - "Context", - (), - { - "__enter__": lambda x: mock_websocket3, - "__exit__": lambda x, exc_type, exc_val, exc_tb: None, - }, - )(), - type( - "Context", - (), - { - "__enter__": lambda x: mock_websocket4, - "__exit__": lambda x, exc_type, exc_val, exc_tb: None, - }, - )(), - ] - - with patch("builtins.open", mock_open(read_data=b"test_image_data")): - serve_image_in_bg("/test/image.jpg", "test-uuid", "test-name") - self.assertEqual(mock_websocket4.send.call_count, 1) - @patch("pathlib.Path.mkdir") @patch("pathlib.Path.open", new_callable=mock_open) def test_write_file_with_overwrite(self, mock_file, mock_mkdir): From 87c14e3656a327756463a7f6944de4a21f16fff3 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Wed, 15 Jan 2025 00:03:10 +0530 Subject: [PATCH 38/40] Enum not working well with types_, changed to union of literal --- src/wcgw/client/mcp_server/server.py | 4 ++-- src/wcgw/types_.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/wcgw/client/mcp_server/server.py b/src/wcgw/client/mcp_server/server.py index 0b4c360..a6509d2 100644 --- a/src/wcgw/client/mcp_server/server.py +++ b/src/wcgw/client/mcp_server/server.py @@ -105,8 +105,8 @@ async def handle_list_tools() -> list[types.Tool]: - If user has mentioned any files use `initial_files_to_read` to read, use absolute paths only. - If `any_workspace_path` is provided, a tree structure of the workspace will be shown. - Leave `any_workspace_path` as empty if no file or folder is mentioned. -- By default use mode `wcgw` -- In code-writer mode, set the commands and globs which user asked to set, otherwise use 'all'. +- By default use mode "wcgw" +- In "code-writer" mode, set the commands and globs which user asked to set, otherwise use 'all'. - In order to change the mode later, call this tool again but be sure to not provide any other argument like task_id_to_resume unnecessarily. """, ), diff --git a/src/wcgw/types_.py b/src/wcgw/types_.py index 0d6e07d..24f13b7 100644 --- a/src/wcgw/types_.py +++ b/src/wcgw/types_.py @@ -39,11 +39,11 @@ class Initialize(BaseModel): any_workspace_path: str initial_files_to_read: list[str] task_id_to_resume: str - mode_name: Modes + mode_name: Literal["wcgw", "architect", "code_writer"] code_writer_config: Optional[CodeWriterMode] = None def model_post_init(self, __context: Any) -> None: - if self.mode_name == Modes.code_writer: + if self.mode_name == "code_writer": assert ( self.code_writer_config is not None ), "code_writer_config can't be null when the mode is code_writer" @@ -51,9 +51,9 @@ def model_post_init(self, __context: Any) -> None: @property def mode(self) -> ModesConfig: - if self.mode_name == Modes.wcgw: + if self.mode_name == "wcgw": return "wcgw" - if self.mode_name == Modes.architect: + if self.mode_name == "architect": return "architect" assert ( self.code_writer_config is not None From 92d87f9970d854feb44e660b7b27c2f4fb386b0a Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Wed, 15 Jan 2025 00:06:04 +0530 Subject: [PATCH 39/40] Fixed gpt json schema --- gpt_action_json_schema.json | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/gpt_action_json_schema.json b/gpt_action_json_schema.json index cd652b8..edacd0b 100644 --- a/gpt_action_json_schema.json +++ b/gpt_action_json_schema.json @@ -562,7 +562,13 @@ "title": "Task Id To Resume" }, "mode_name": { - "$ref": "#/components/schemas/Modes" + "type": "string", + "enum": [ + "wcgw", + "architect", + "code_writer" + ], + "title": "Mode Name" }, "code_writer_config": { "anyOf": [ @@ -591,15 +597,6 @@ ], "title": "InitializeWithUUID" }, - "Modes": { - "type": "string", - "enum": [ - "wcgw", - "architect", - "code_writer" - ], - "title": "Modes" - }, "ReadFileWithUUID": { "properties": { "file_paths": { From 9bf30fa08d39ca0e513428119487e8cd157ce606 Mon Sep 17 00:00:00 2001 From: Aman Rusia Date: Wed, 15 Jan 2025 14:25:33 +0530 Subject: [PATCH 40/40] Making repo path in init --- src/wcgw/client/tools.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/wcgw/client/tools.py b/src/wcgw/client/tools.py index 7150dc9..3baa9e0 100644 --- a/src/wcgw/client/tools.py +++ b/src/wcgw/client/tools.py @@ -435,10 +435,13 @@ def initialize( else: assert isinstance(mode, str) else: - repo_context = ( - f"\nInfo: Workspace path {any_workspace_path} does not exist\n" - ) - + if os.path.abspath(any_workspace_path): + os.makedirs(any_workspace_path, exist_ok=True) + repo_context = f"\nInfo: Workspace path {any_workspace_path} did not exist. I've created it for you.\n" + else: + repo_context = ( + f"\nInfo: Workspace path {any_workspace_path} does not exist." + ) # Restore bash state if available if bash_state is not None: try: