mirror of
https://github.com/Significant-Gravitas/Auto-GPT.git
synced 2025-01-09 04:19:02 +08:00
feat(blocks): Add Code extraction Block (#8778)
This adds a code extraction block, this was originally made by https://github.com/SerchioSD I simply updated it and made it into a PR ### Changes 🏗️ Adds a new ``code_extraction_block.py`` block which has the code ![image](https://github.com/user-attachments/assets/f7e61390-94e1-49e3-b8ee-b2dc7ea03bfe) ### Updated video to show it working with latest mapped aliases https://github.com/user-attachments/assets/a96aa708-f06f-4a00-a581-9f64d72f9ee8 --------- Co-authored-by: SerchioSD <69461657+serchiosd@users.noreply.github.com> Co-authored-by: Nicholas Tindle <nicholas.tindle@agpt.co> Co-authored-by: Nicholas Tindle <nicktindle@outlook.com>
This commit is contained in:
parent
29c771ba1b
commit
f090f4ca4a
110
autogpt_platform/backend/backend/blocks/code_extraction_block.py
Normal file
110
autogpt_platform/backend/backend/blocks/code_extraction_block.py
Normal file
@ -0,0 +1,110 @@
|
||||
import re
|
||||
|
||||
from backend.data.block import Block, BlockCategory, BlockOutput, BlockSchema
|
||||
from backend.data.model import SchemaField
|
||||
|
||||
|
||||
class CodeExtractionBlock(Block):
|
||||
class Input(BlockSchema):
|
||||
text: str = SchemaField(
|
||||
description="Text containing code blocks to extract (e.g., AI response)",
|
||||
placeholder="Enter text containing code blocks",
|
||||
)
|
||||
|
||||
class Output(BlockSchema):
|
||||
html: str = SchemaField(description="Extracted HTML code")
|
||||
css: str = SchemaField(description="Extracted CSS code")
|
||||
javascript: str = SchemaField(description="Extracted JavaScript code")
|
||||
python: str = SchemaField(description="Extracted Python code")
|
||||
sql: str = SchemaField(description="Extracted SQL code")
|
||||
java: str = SchemaField(description="Extracted Java code")
|
||||
cpp: str = SchemaField(description="Extracted C++ code")
|
||||
csharp: str = SchemaField(description="Extracted C# code")
|
||||
json_code: str = SchemaField(description="Extracted JSON code")
|
||||
bash: str = SchemaField(description="Extracted Bash code")
|
||||
php: str = SchemaField(description="Extracted PHP code")
|
||||
ruby: str = SchemaField(description="Extracted Ruby code")
|
||||
yaml: str = SchemaField(description="Extracted YAML code")
|
||||
markdown: str = SchemaField(description="Extracted Markdown code")
|
||||
typescript: str = SchemaField(description="Extracted TypeScript code")
|
||||
xml: str = SchemaField(description="Extracted XML code")
|
||||
remaining_text: str = SchemaField(
|
||||
description="Remaining text after code extraction"
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(
|
||||
id="d3a7d896-3b78-4f44-8b4b-48fbf4f0bcd8",
|
||||
description="Extracts code blocks from text and identifies their programming languages",
|
||||
categories={BlockCategory.TEXT},
|
||||
input_schema=CodeExtractionBlock.Input,
|
||||
output_schema=CodeExtractionBlock.Output,
|
||||
test_input={
|
||||
"text": "Here's a Python example:\n```python\nprint('Hello World')\n```\nAnd some HTML:\n```html\n<h1>Title</h1>\n```"
|
||||
},
|
||||
test_output=[
|
||||
("html", "<h1>Title</h1>"),
|
||||
("python", "print('Hello World')"),
|
||||
("remaining_text", "Here's a Python example:\nAnd some HTML:"),
|
||||
],
|
||||
)
|
||||
|
||||
def run(self, input_data: Input, **kwargs) -> BlockOutput:
|
||||
# List of supported programming languages with mapped aliases
|
||||
language_aliases = {
|
||||
"html": ["html", "htm"],
|
||||
"css": ["css"],
|
||||
"javascript": ["javascript", "js"],
|
||||
"python": ["python", "py"],
|
||||
"sql": ["sql"],
|
||||
"java": ["java"],
|
||||
"cpp": ["cpp", "c++"],
|
||||
"csharp": ["csharp", "c#", "cs"],
|
||||
"json_code": ["json"],
|
||||
"bash": ["bash", "shell", "sh"],
|
||||
"php": ["php"],
|
||||
"ruby": ["ruby", "rb"],
|
||||
"yaml": ["yaml", "yml"],
|
||||
"markdown": ["markdown", "md"],
|
||||
"typescript": ["typescript", "ts"],
|
||||
"xml": ["xml"],
|
||||
}
|
||||
|
||||
# Extract code for each language
|
||||
for canonical_name, aliases in language_aliases.items():
|
||||
code = ""
|
||||
# Try each alias for the language
|
||||
for alias in aliases:
|
||||
code_for_alias = self.extract_code(input_data.text, alias)
|
||||
if code_for_alias:
|
||||
code = code + "\n\n" + code_for_alias if code else code_for_alias
|
||||
|
||||
if code: # Only yield if there's actual code content
|
||||
yield canonical_name, code
|
||||
|
||||
# Remove all code blocks from the text to get remaining text
|
||||
pattern = (
|
||||
r"```(?:"
|
||||
+ "|".join(
|
||||
re.escape(alias)
|
||||
for aliases in language_aliases.values()
|
||||
for alias in aliases
|
||||
)
|
||||
+ r")\s+[\s\S]*?```"
|
||||
)
|
||||
|
||||
remaining_text = re.sub(pattern, "", input_data.text).strip()
|
||||
remaining_text = re.sub(r"\n\s*\n", "\n", remaining_text)
|
||||
|
||||
if remaining_text: # Only yield if there's remaining text
|
||||
yield "remaining_text", remaining_text
|
||||
|
||||
def extract_code(self, text: str, language: str) -> str:
|
||||
# Escape special regex characters in the language string
|
||||
language = re.escape(language)
|
||||
# Extract all code blocks enclosed in ```language``` blocks
|
||||
pattern = re.compile(rf"```{language}\s+(.*?)```", re.DOTALL | re.IGNORECASE)
|
||||
matches = pattern.finditer(text)
|
||||
# Combine all code blocks for this language with newlines between them
|
||||
code_blocks = [match.group(1).strip() for match in matches]
|
||||
return "\n\n".join(code_blocks) if code_blocks else ""
|
Loading…
Reference in New Issue
Block a user