Merge branch 'Torantulino:master' into add_ingest_documents_script

This commit is contained in:
Maiko Bossuyt 2023-04-13 13:50:41 +02:00 committed by GitHub
commit 334400edd1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 288 additions and 77 deletions

View File

@ -1,6 +1,7 @@
PINECONE_API_KEY=your-pinecone-api-key
PINECONE_ENV=your-pinecone-region
OPENAI_API_KEY=your-openai-api-key
TEMPERATURE=1
ELEVENLABS_API_KEY=your-elevenlabs-api-key
ELEVENLABS_VOICE_1_ID=your-voice-id
ELEVENLABS_VOICE_2_ID=your-voice-id
@ -9,11 +10,7 @@ FAST_LLM_MODEL=gpt-3.5-turbo
GOOGLE_API_KEY=
CUSTOM_SEARCH_ENGINE_ID=
USE_AZURE=False
OPENAI_AZURE_API_BASE=your-base-url-for-azure
OPENAI_AZURE_API_VERSION=api-version-for-azure
OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure
OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat
OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs
EXECUTE_LOCAL_COMMANDS=False
IMAGE_PROVIDER=dalle
HUGGINGFACE_API_TOKEN=
USE_MAC_OS_TTS=False

5
.gitignore vendored
View File

@ -7,9 +7,11 @@ package-lock.json
auto_gpt_workspace/*
*.mpeg
.env
azure.yaml
*venv/*
outputs/*
ai_settings.yaml
last_run_ai_settings.yaml
.vscode
.idea/*
auto-gpt.json
@ -20,3 +22,6 @@ log-ingestion.txt
.coverage
coverage.xml
htmlcov/
# For Macs Dev Environs: ignoring .Desktop Services_Store
.DS_Store

View File

@ -97,10 +97,15 @@ pip install -r requirements.txt
```
4. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well.
- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys.
- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website.
- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and provide the `OPENAI_AZURE_API_BASE`, `OPENAI_AZURE_API_VERSION` and `OPENAI_AZURE_DEPLOYMENT_ID` values as explained here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section. Additionally you need separate deployments for both embeddings and chat. Add their ID values to `OPENAI_AZURE_CHAT_DEPLOYMENT_ID` and `OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID` respectively
- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys.
- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website.
- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then:
- Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all of the deployment ids for the relevant models in the `azure_model_map` section:
- `fast_llm_model_deployment_id` - your gpt-3.5-turbo or gpt-4 deployment id
- `smart_llm_model_deployment_id` - your gpt-4 deployment id
- `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment id
- Please specify all of these values as double quoted strings
- details can be found here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section and here: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/tutorials/embeddings?tabs=command-line for the embedding model.
## 🔧 Usage
@ -208,7 +213,7 @@ MEMORY_INDEX=whatever
Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time.
1. Go to app.pinecone.io and make an account if you don't already have one.
1. Go to [pinecone](https://app.pinecone.io/) and make an account if you don't already have one.
2. Choose the `Starter` plan to avoid being charged.
3. Find your API key and region under the default project in the left sidebar.
@ -391,4 +396,4 @@ flake8 scripts/ tests/
# Or, if you want to run flake8 with the same configuration as the CI:
flake8 scripts/ tests/ --select E303,W293,W291,W292,E305
```
```

View File

@ -1,7 +0,0 @@
ai_goals:
- Increase net worth.
- Develop and manage multiple businesses autonomously.
- Play to your strengths as a Large Language Model.
ai_name: Entrepreneur-GPT
ai_role: an AI designed to autonomously develop and run businesses with the sole goal
of increasing your net worth.

6
azure.yaml.template Normal file
View File

@ -0,0 +1,6 @@
azure_api_base: your-base-url-for-azure
azure_api_version: api-version-for-azure
azure_model_map:
fast_llm_model_deployment_id: gpt35-deployment-id-for-azure
smart_llm_model_deployment_id: gpt4-deployment-id-for-azure
embedding_model_deployment_id: embedding-deployment-id-for-azure

View File

@ -45,6 +45,7 @@ def improve_code(suggestions: List[str], code: str) -> str:
result_string = call_ai_function(function_string, args, description_string)
return result_string
def write_tests(code: str, focus: List[str]) -> str:
"""
A function that takes in code and focus topics and returns a response from create chat completion api call.

View File

@ -6,6 +6,7 @@ from urllib.parse import urlparse, urljoin
cfg = Config()
# Function to check if the URL is valid
def is_valid_url(url):
try:
@ -14,49 +15,51 @@ def is_valid_url(url):
except ValueError:
return False
# Function to sanitize the URL
def sanitize_url(url):
return urljoin(url, urlparse(url).path)
# Function to make a request with a specified timeout and handle exceptions
def make_request(url, timeout=10):
try:
response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout)
response.raise_for_status()
return response
except requests.exceptions.RequestException as e:
return "Error: " + str(e)
# Define and check for local file address prefixes
def check_local_file_access(url):
local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
return any(url.startswith(prefix) for prefix in local_prefixes)
def get_response(url, headers=cfg.user_agent_header, timeout=10):
try:
# Restrict access to local files
if check_local_file_access(url):
raise ValueError('Access to local files is restricted')
# Most basic check if the URL is valid:
if not url.startswith('http://') and not url.startswith('https://'):
raise ValueError('Invalid URL format')
sanitized_url = sanitize_url(url)
response = requests.get(sanitized_url, headers=headers, timeout=timeout)
# Check if the response contains an HTTP error
if response.status_code >= 400:
return None, "Error: HTTP " + str(response.status_code) + " error"
return response, None
except ValueError as ve:
# Handle invalid URL format
return None, "Error: " + str(ve)
except requests.exceptions.RequestException as re:
# Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.)
return None, "Error: " + str(re)
def scrape_text(url):
"""Scrape text from a webpage"""
# Basic check if the URL is valid
if not url.startswith('http'):
return "Error: Invalid URL"
# Restrict access to local files
if check_local_file_access(url):
return "Error: Access to local files is restricted"
# Validate the input URL
if not is_valid_url(url):
# Sanitize the input URL
sanitized_url = sanitize_url(url)
# Make the request with a timeout and handle exceptions
response = make_request(sanitized_url)
if isinstance(response, str):
return response
else:
# Sanitize the input URL
sanitized_url = sanitize_url(url)
response = requests.get(sanitized_url, headers=cfg.user_agent_header)
response, error_message = get_response(url)
if error_message:
return error_message
soup = BeautifulSoup(response.text, "html.parser")
@ -89,11 +92,9 @@ def format_hyperlinks(hyperlinks):
def scrape_links(url):
"""Scrape links from a webpage"""
response = requests.get(url, headers=cfg.user_agent_header)
# Check if the response contains an HTTP error
if response.status_code >= 400:
return "error"
response, error_message = get_response(url)
if error_message:
return error_message
soup = BeautifulSoup(response.text, "html.parser")
@ -131,6 +132,7 @@ def create_message(chunk, question):
"content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text."
}
def summarize_text(text, question):
"""Summarize text using the LLM model"""
if not text:

View File

@ -7,7 +7,7 @@ import speak
from config import Config
import ai_functions as ai
from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files
from execute_code import execute_python_file
from execute_code import execute_python_file, execute_shell
from json_parser import fix_and_parse_json
from image_gen import generate_image
from duckduckgo_search import ddg
@ -103,6 +103,11 @@ def execute_command(command_name, arguments):
return ai.write_tests(arguments["code"], arguments.get("focus"))
elif command_name == "execute_python_file": # Add this command
return execute_python_file(arguments["file"])
elif command_name == "execute_shell":
if cfg.execute_local_commands:
return execute_shell(arguments["command_line"])
else:
return "You are not allowed to run local shell commands. To execute shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' in your config. Do not attempt to bypass the restriction."
elif command_name == "generate_image":
return generate_image(arguments["prompt"])
elif command_name == "do_nothing":

View File

@ -1,6 +1,7 @@
import abc
import os
import openai
import yaml
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
@ -43,14 +44,13 @@ class Config(metaclass=Singleton):
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
self.openai_api_key = os.getenv("OPENAI_API_KEY")
self.temperature = int(os.getenv("TEMPERATURE", "1"))
self.use_azure = False
self.use_azure = os.getenv("USE_AZURE") == 'True'
self.execute_local_commands = os.getenv('EXECUTE_LOCAL_COMMANDS', 'False') == 'True'
if self.use_azure:
self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE")
self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION")
self.openai_deployment_id = os.getenv("OPENAI_AZURE_DEPLOYMENT_ID")
self.azure_chat_deployment_id = os.getenv("OPENAI_AZURE_CHAT_DEPLOYMENT_ID")
self.azure_embeddigs_deployment_id = os.getenv("OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID")
self.load_azure_config()
openai.api_type = "azure"
openai.api_base = self.openai_api_base
openai.api_version = self.openai_api_version
@ -85,6 +85,46 @@ class Config(metaclass=Singleton):
# Initialize the OpenAI API client
openai.api_key = self.openai_api_key
def get_azure_deployment_id_for_model(self, model: str) -> str:
"""
Returns the relevant deployment id for the model specified.
Parameters:
model(str): The model to map to the deployment id.
Returns:
The matching deployment id if found, otherwise an empty string.
"""
if model == self.fast_llm_model:
return self.azure_model_to_deployment_id_map["fast_llm_model_deployment_id"]
elif model == self.smart_llm_model:
return self.azure_model_to_deployment_id_map["smart_llm_model_deployment_id"]
elif model == "text-embedding-ada-002":
return self.azure_model_to_deployment_id_map["embedding_model_deployment_id"]
else:
return ""
AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), '..', 'azure.yaml')
def load_azure_config(self, config_file: str=AZURE_CONFIG_FILE) -> None:
"""
Loads the configuration parameters for Azure hosting from the specified file path as a yaml file.
Parameters:
config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml"
Returns:
None
"""
try:
with open(config_file) as file:
config_params = yaml.load(file, Loader=yaml.FullLoader)
except FileNotFoundError:
config_params = {}
self.openai_api_base = config_params.get("azure_api_base", "")
self.openai_api_version = config_params.get("azure_api_version", "")
self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", [])
def set_continuous_mode(self, value: bool):
"""Set the continuous mode value."""
self.continuous_mode = value

View File

@ -22,9 +22,10 @@ COMMANDS:
16. Get Improved Code: "improve_code", args: "suggestions": "<list_of_suggestions>", "code": "<full_code_string>"
17. Write Tests: "write_tests", args: "code": "<full_code_string>", "focus": "<list_of_focus_areas>"
18. Execute Python File: "execute_python_file", args: "file": "<file>"
19. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
20. Generate Image: "generate_image", args: "prompt": "<prompt>"
21. Do Nothing: "do_nothing", args: ""
19. Execute Shell Command, non-interactive commands only: "execute_shell", args: "command_line": "<command_line>".
20. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
21. Generate Image: "generate_image", args: "prompt": "<prompt>"
22. Do Nothing: "do_nothing", args: ""
RESOURCES:

View File

@ -1,17 +1,20 @@
import docker
import os
import subprocess
WORKSPACE_FOLDER = "auto_gpt_workspace"
def execute_python_file(file):
"""Execute a Python file in a Docker container and return the output"""
workspace_folder = "auto_gpt_workspace"
print (f"Executing file '{file}' in workspace '{workspace_folder}'")
print (f"Executing file '{file}' in workspace '{WORKSPACE_FOLDER}'")
if not file.endswith(".py"):
return "Error: Invalid file type. Only .py files are allowed."
file_path = os.path.join(workspace_folder, file)
file_path = os.path.join(WORKSPACE_FOLDER, file)
if not os.path.isfile(file_path):
return f"Error: File '{file}' does not exist."
@ -19,14 +22,31 @@ def execute_python_file(file):
try:
client = docker.from_env()
image_name = 'python:3.10'
try:
client.images.get(image_name)
print(f"Image '{image_name}' found locally")
except docker.errors.ImageNotFound:
print(f"Image '{image_name}' not found locally, pulling from Docker Hub")
# Use the low-level API to stream the pull response
low_level_client = docker.APIClient()
for line in low_level_client.pull(image_name, stream=True, decode=True):
# Print the status and progress, if available
status = line.get('status')
progress = line.get('progress')
if status and progress:
print(f"{status}: {progress}")
elif status:
print(status)
# You can replace 'python:3.8' with the desired Python image/version
# You can find available Python images on Docker Hub:
# https://hub.docker.com/_/python
container = client.containers.run(
'python:3.10',
image_name,
f'python {file}',
volumes={
os.path.abspath(workspace_folder): {
os.path.abspath(WORKSPACE_FOLDER): {
'bind': '/workspace',
'mode': 'ro'}},
working_dir='/workspace',
@ -46,3 +66,22 @@ def execute_python_file(file):
except Exception as e:
return f"Error: {str(e)}"
def execute_shell(command_line):
current_dir = os.getcwd()
if not WORKSPACE_FOLDER in current_dir: # Change dir into workspace if necessary
work_dir = os.path.join(os.getcwd(), WORKSPACE_FOLDER)
os.chdir(work_dir)
print (f"Executing command '{command_line}' in working directory '{os.getcwd()}'")
result = subprocess.run(command_line, capture_output=True, shell=True)
output = f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
# Change back to whatever the prior working dir was
os.chdir(current_dir)
return output

View File

@ -5,11 +5,11 @@ cfg = Config()
openai.api_key = cfg.openai_api_key
# Overly simple abstraction until we create something better
def create_chat_completion(messages, model=None, temperature=None, max_tokens=None)->str:
def create_chat_completion(messages, model=None, temperature=cfg.temperature, max_tokens=None)->str:
"""Create a chat completion using the OpenAI API"""
if cfg.use_azure:
response = openai.ChatCompletion.create(
deployment_id=cfg.azure_chat_deployment_id,
deployment_id=cfg.get_azure_deployment_id_for_model(model),
model=model,
messages=messages,
temperature=temperature,

View File

@ -164,8 +164,6 @@ class ConsoleHandler(logging.StreamHandler):
Allows to handle custom placeholders 'title_color' and 'message_no_color'.
To use this formatter, make sure to pass 'color', 'title' as log extras.
'''
class AutoGptFormatter(logging.Formatter):
def format(self, record: LogRecord) -> str:
if (hasattr(record, 'color')):

View File

@ -318,7 +318,6 @@ def parse_arguments():
# TODO: fill in llm values here
check_openai_api_key()
cfg = Config()
parse_arguments()
logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO)
ai_name = ""

View File

@ -4,11 +4,12 @@ from config import AbstractSingleton, Config
import openai
cfg = Config()
cfg = Config()
def get_ada_embedding(text):
text = text.replace("\n", " ")
if cfg.use_azure:
return openai.Embedding.create(input=[text], engine=cfg.azure_embeddigs_deployment_id, model="text-embedding-ada-002")["data"][0]["embedding"]
return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"]
else:
return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"]

View File

@ -3,6 +3,6 @@ import unittest
if __name__ == "__main__":
# Load all tests from the 'scripts/tests' package
suite = unittest.defaultTestLoader.discover('scripts/tests')
# Run the tests
unittest.TextTestRunner().run(suite)

View File

@ -0,0 +1,118 @@
# Generated by CodiumAI
# Dependencies:
# pip install pytest-mock
import pytest
from scripts.browse import scrape_links
"""
Code Analysis
Objective:
The objective of the 'scrape_links' function is to scrape hyperlinks from a
given URL and return them in a formatted way.
Inputs:
- url: a string representing the URL to be scraped.
Flow:
1. Send a GET request to the given URL using the requests library and the user agent header from the config file.
2. Check if the response contains an HTTP error. If it does, return "error".
3. Parse the HTML content of the response using the BeautifulSoup library.
4. Remove any script and style tags from the parsed HTML.
5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function.
6. Format the extracted hyperlinks using the 'format_hyperlinks' function.
7. Return the formatted hyperlinks.
Outputs:
- A list of formatted hyperlinks.
Additional aspects:
- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP
requests and parse HTML content, respectively.
- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
- The 'format_hyperlinks' function is called to format the extracted hyperlinks.
- The function checks for HTTP errors and returns "error" if any are found.
"""
class TestScrapeLinks:
# Tests that the function returns a list of formatted hyperlinks when
# provided with a valid url that returns a webpage with hyperlinks.
def test_valid_url_with_hyperlinks(self):
url = "https://www.google.com"
result = scrape_links(url)
assert len(result) > 0
assert isinstance(result, list)
assert isinstance(result[0], str)
# Tests that the function returns correctly formatted hyperlinks when given a valid url.
def test_valid_url(self, mocker):
# Mock the requests.get() function to return a response with sample HTML containing hyperlinks
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = "<html><body><a href='https://www.google.com'>Google</a></body></html>"
mocker.patch('requests.get', return_value=mock_response)
# Call the function with a valid URL
result = scrape_links("https://www.example.com")
# Assert that the function returns correctly formatted hyperlinks
assert result == ["Google (https://www.google.com)"]
# Tests that the function returns "error" when given an invalid url.
def test_invalid_url(self, mocker):
# Mock the requests.get() function to return an HTTP error response
mock_response = mocker.Mock()
mock_response.status_code = 404
mocker.patch('requests.get', return_value=mock_response)
# Call the function with an invalid URL
result = scrape_links("https://www.invalidurl.com")
# Assert that the function returns "error"
assert "Error:" in result
# Tests that the function returns an empty list when the html contains no hyperlinks.
def test_no_hyperlinks(self, mocker):
# Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>"
mocker.patch('requests.get', return_value=mock_response)
# Call the function with a URL containing no hyperlinks
result = scrape_links("https://www.example.com")
# Assert that the function returns an empty list
assert result == []
# Tests that scrape_links() correctly extracts and formats hyperlinks from
# a sample HTML containing a few hyperlinks.
def test_scrape_links_with_few_hyperlinks(self, mocker):
# Mock the requests.get() function to return a response with a sample HTML containing hyperlinks
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.text = """
<html>
<body>
<div id="google-link"><a href="https://www.google.com">Google</a></div>
<div id="github"><a href="https://github.com">GitHub</a></div>
<div id="CodiumAI"><a href="https://www.codium.ai">CodiumAI</a></div>
</body>
</html>
"""
mocker.patch('requests.get', return_value=mock_response)
# Call the function being tested
result = scrape_links("https://www.example.com")
# Assert that the function returns a list of formatted hyperlinks
assert isinstance(result, list)
assert len(result) == 3
assert result[0] == "Google (https://www.google.com)"
assert result[1] == "GitHub (https://github.com)"
assert result[2] == "CodiumAI (https://www.codium.ai)"

View File

@ -2,7 +2,6 @@
# Generated by CodiumAI
import requests
import tests.context
from scripts.browse import scrape_text
@ -10,7 +9,8 @@ from scripts.browse import scrape_text
Code Analysis
Objective:
The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
The objective of the "scrape_text" function is to scrape the text content from
a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
Inputs:
- url: a string representing the URL of the webpage to be scraped.
@ -33,6 +33,7 @@ Additional aspects:
- The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text.
"""
class TestScrapeText:
# Tests that scrape_text() returns the expected text when given a valid URL.