mirror of
https://github.com/Significant-Gravitas/Auto-GPT.git
synced 2025-01-08 11:57:32 +08:00
Merge branch 'Torantulino:master' into add_ingest_documents_script
This commit is contained in:
commit
334400edd1
@ -1,6 +1,7 @@
|
||||
PINECONE_API_KEY=your-pinecone-api-key
|
||||
PINECONE_ENV=your-pinecone-region
|
||||
OPENAI_API_KEY=your-openai-api-key
|
||||
TEMPERATURE=1
|
||||
ELEVENLABS_API_KEY=your-elevenlabs-api-key
|
||||
ELEVENLABS_VOICE_1_ID=your-voice-id
|
||||
ELEVENLABS_VOICE_2_ID=your-voice-id
|
||||
@ -9,11 +10,7 @@ FAST_LLM_MODEL=gpt-3.5-turbo
|
||||
GOOGLE_API_KEY=
|
||||
CUSTOM_SEARCH_ENGINE_ID=
|
||||
USE_AZURE=False
|
||||
OPENAI_AZURE_API_BASE=your-base-url-for-azure
|
||||
OPENAI_AZURE_API_VERSION=api-version-for-azure
|
||||
OPENAI_AZURE_DEPLOYMENT_ID=deployment-id-for-azure
|
||||
OPENAI_AZURE_CHAT_DEPLOYMENT_ID=deployment-id-for-azure-chat
|
||||
OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID=deployment-id-for-azure-embeddigs
|
||||
EXECUTE_LOCAL_COMMANDS=False
|
||||
IMAGE_PROVIDER=dalle
|
||||
HUGGINGFACE_API_TOKEN=
|
||||
USE_MAC_OS_TTS=False
|
||||
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -7,9 +7,11 @@ package-lock.json
|
||||
auto_gpt_workspace/*
|
||||
*.mpeg
|
||||
.env
|
||||
azure.yaml
|
||||
*venv/*
|
||||
outputs/*
|
||||
ai_settings.yaml
|
||||
last_run_ai_settings.yaml
|
||||
.vscode
|
||||
.idea/*
|
||||
auto-gpt.json
|
||||
@ -20,3 +22,6 @@ log-ingestion.txt
|
||||
.coverage
|
||||
coverage.xml
|
||||
htmlcov/
|
||||
|
||||
# For Macs Dev Environs: ignoring .Desktop Services_Store
|
||||
.DS_Store
|
||||
|
17
README.md
17
README.md
@ -97,10 +97,15 @@ pip install -r requirements.txt
|
||||
```
|
||||
|
||||
4. Rename `.env.template` to `.env` and fill in your `OPENAI_API_KEY`. If you plan to use Speech Mode, fill in your `ELEVEN_LABS_API_KEY` as well.
|
||||
|
||||
- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys.
|
||||
- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website.
|
||||
- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and provide the `OPENAI_AZURE_API_BASE`, `OPENAI_AZURE_API_VERSION` and `OPENAI_AZURE_DEPLOYMENT_ID` values as explained here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section. Additionally you need separate deployments for both embeddings and chat. Add their ID values to `OPENAI_AZURE_CHAT_DEPLOYMENT_ID` and `OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID` respectively
|
||||
- Obtain your OpenAI API key from: https://platform.openai.com/account/api-keys.
|
||||
- Obtain your ElevenLabs API key from: https://elevenlabs.io. You can view your xi-api-key using the "Profile" tab on the website.
|
||||
- If you want to use GPT on an Azure instance, set `USE_AZURE` to `True` and then:
|
||||
- Rename `azure.yaml.template` to `azure.yaml` and provide the relevant `azure_api_base`, `azure_api_version` and all of the deployment ids for the relevant models in the `azure_model_map` section:
|
||||
- `fast_llm_model_deployment_id` - your gpt-3.5-turbo or gpt-4 deployment id
|
||||
- `smart_llm_model_deployment_id` - your gpt-4 deployment id
|
||||
- `embedding_model_deployment_id` - your text-embedding-ada-002 v2 deployment id
|
||||
- Please specify all of these values as double quoted strings
|
||||
- details can be found here: https://pypi.org/project/openai/ in the `Microsoft Azure Endpoints` section and here: https://learn.microsoft.com/en-us/azure/cognitive-services/openai/tutorials/embeddings?tabs=command-line for the embedding model.
|
||||
|
||||
## 🔧 Usage
|
||||
|
||||
@ -208,7 +213,7 @@ MEMORY_INDEX=whatever
|
||||
|
||||
Pinecone enables the storage of vast amounts of vector-based memory, allowing for only relevant memories to be loaded for the agent at any given time.
|
||||
|
||||
1. Go to app.pinecone.io and make an account if you don't already have one.
|
||||
1. Go to [pinecone](https://app.pinecone.io/) and make an account if you don't already have one.
|
||||
2. Choose the `Starter` plan to avoid being charged.
|
||||
3. Find your API key and region under the default project in the left sidebar.
|
||||
|
||||
@ -391,4 +396,4 @@ flake8 scripts/ tests/
|
||||
|
||||
# Or, if you want to run flake8 with the same configuration as the CI:
|
||||
flake8 scripts/ tests/ --select E303,W293,W291,W292,E305
|
||||
```
|
||||
```
|
||||
|
@ -1,7 +0,0 @@
|
||||
ai_goals:
|
||||
- Increase net worth.
|
||||
- Develop and manage multiple businesses autonomously.
|
||||
- Play to your strengths as a Large Language Model.
|
||||
ai_name: Entrepreneur-GPT
|
||||
ai_role: an AI designed to autonomously develop and run businesses with the sole goal
|
||||
of increasing your net worth.
|
6
azure.yaml.template
Normal file
6
azure.yaml.template
Normal file
@ -0,0 +1,6 @@
|
||||
azure_api_base: your-base-url-for-azure
|
||||
azure_api_version: api-version-for-azure
|
||||
azure_model_map:
|
||||
fast_llm_model_deployment_id: gpt35-deployment-id-for-azure
|
||||
smart_llm_model_deployment_id: gpt4-deployment-id-for-azure
|
||||
embedding_model_deployment_id: embedding-deployment-id-for-azure
|
@ -45,6 +45,7 @@ def improve_code(suggestions: List[str], code: str) -> str:
|
||||
result_string = call_ai_function(function_string, args, description_string)
|
||||
return result_string
|
||||
|
||||
|
||||
def write_tests(code: str, focus: List[str]) -> str:
|
||||
"""
|
||||
A function that takes in code and focus topics and returns a response from create chat completion api call.
|
||||
|
@ -6,6 +6,7 @@ from urllib.parse import urlparse, urljoin
|
||||
|
||||
cfg = Config()
|
||||
|
||||
|
||||
# Function to check if the URL is valid
|
||||
def is_valid_url(url):
|
||||
try:
|
||||
@ -14,49 +15,51 @@ def is_valid_url(url):
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
# Function to sanitize the URL
|
||||
def sanitize_url(url):
|
||||
return urljoin(url, urlparse(url).path)
|
||||
|
||||
# Function to make a request with a specified timeout and handle exceptions
|
||||
def make_request(url, timeout=10):
|
||||
try:
|
||||
response = requests.get(url, headers=cfg.user_agent_header, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
return "Error: " + str(e)
|
||||
|
||||
# Define and check for local file address prefixes
|
||||
def check_local_file_access(url):
|
||||
local_prefixes = ['file:///', 'file://localhost', 'http://localhost', 'https://localhost']
|
||||
return any(url.startswith(prefix) for prefix in local_prefixes)
|
||||
|
||||
|
||||
def get_response(url, headers=cfg.user_agent_header, timeout=10):
|
||||
try:
|
||||
# Restrict access to local files
|
||||
if check_local_file_access(url):
|
||||
raise ValueError('Access to local files is restricted')
|
||||
|
||||
# Most basic check if the URL is valid:
|
||||
if not url.startswith('http://') and not url.startswith('https://'):
|
||||
raise ValueError('Invalid URL format')
|
||||
|
||||
sanitized_url = sanitize_url(url)
|
||||
|
||||
response = requests.get(sanitized_url, headers=headers, timeout=timeout)
|
||||
|
||||
# Check if the response contains an HTTP error
|
||||
if response.status_code >= 400:
|
||||
return None, "Error: HTTP " + str(response.status_code) + " error"
|
||||
|
||||
return response, None
|
||||
except ValueError as ve:
|
||||
# Handle invalid URL format
|
||||
return None, "Error: " + str(ve)
|
||||
|
||||
except requests.exceptions.RequestException as re:
|
||||
# Handle exceptions related to the HTTP request (e.g., connection errors, timeouts, etc.)
|
||||
return None, "Error: " + str(re)
|
||||
|
||||
|
||||
def scrape_text(url):
|
||||
"""Scrape text from a webpage"""
|
||||
# Basic check if the URL is valid
|
||||
if not url.startswith('http'):
|
||||
return "Error: Invalid URL"
|
||||
|
||||
# Restrict access to local files
|
||||
if check_local_file_access(url):
|
||||
return "Error: Access to local files is restricted"
|
||||
|
||||
# Validate the input URL
|
||||
if not is_valid_url(url):
|
||||
# Sanitize the input URL
|
||||
sanitized_url = sanitize_url(url)
|
||||
|
||||
# Make the request with a timeout and handle exceptions
|
||||
response = make_request(sanitized_url)
|
||||
|
||||
if isinstance(response, str):
|
||||
return response
|
||||
else:
|
||||
# Sanitize the input URL
|
||||
sanitized_url = sanitize_url(url)
|
||||
|
||||
response = requests.get(sanitized_url, headers=cfg.user_agent_header)
|
||||
response, error_message = get_response(url)
|
||||
if error_message:
|
||||
return error_message
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
@ -89,11 +92,9 @@ def format_hyperlinks(hyperlinks):
|
||||
|
||||
def scrape_links(url):
|
||||
"""Scrape links from a webpage"""
|
||||
response = requests.get(url, headers=cfg.user_agent_header)
|
||||
|
||||
# Check if the response contains an HTTP error
|
||||
if response.status_code >= 400:
|
||||
return "error"
|
||||
response, error_message = get_response(url)
|
||||
if error_message:
|
||||
return error_message
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
@ -131,6 +132,7 @@ def create_message(chunk, question):
|
||||
"content": f"\"\"\"{chunk}\"\"\" Using the above text, please answer the following question: \"{question}\" -- if the question cannot be answered using the text, please summarize the text."
|
||||
}
|
||||
|
||||
|
||||
def summarize_text(text, question):
|
||||
"""Summarize text using the LLM model"""
|
||||
if not text:
|
||||
|
@ -7,7 +7,7 @@ import speak
|
||||
from config import Config
|
||||
import ai_functions as ai
|
||||
from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files
|
||||
from execute_code import execute_python_file
|
||||
from execute_code import execute_python_file, execute_shell
|
||||
from json_parser import fix_and_parse_json
|
||||
from image_gen import generate_image
|
||||
from duckduckgo_search import ddg
|
||||
@ -103,6 +103,11 @@ def execute_command(command_name, arguments):
|
||||
return ai.write_tests(arguments["code"], arguments.get("focus"))
|
||||
elif command_name == "execute_python_file": # Add this command
|
||||
return execute_python_file(arguments["file"])
|
||||
elif command_name == "execute_shell":
|
||||
if cfg.execute_local_commands:
|
||||
return execute_shell(arguments["command_line"])
|
||||
else:
|
||||
return "You are not allowed to run local shell commands. To execute shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' in your config. Do not attempt to bypass the restriction."
|
||||
elif command_name == "generate_image":
|
||||
return generate_image(arguments["prompt"])
|
||||
elif command_name == "do_nothing":
|
||||
|
@ -1,6 +1,7 @@
|
||||
import abc
|
||||
import os
|
||||
import openai
|
||||
import yaml
|
||||
from dotenv import load_dotenv
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
@ -43,14 +44,13 @@ class Config(metaclass=Singleton):
|
||||
self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
|
||||
|
||||
self.openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
self.temperature = int(os.getenv("TEMPERATURE", "1"))
|
||||
self.use_azure = False
|
||||
self.use_azure = os.getenv("USE_AZURE") == 'True'
|
||||
self.execute_local_commands = os.getenv('EXECUTE_LOCAL_COMMANDS', 'False') == 'True'
|
||||
|
||||
if self.use_azure:
|
||||
self.openai_api_base = os.getenv("OPENAI_AZURE_API_BASE")
|
||||
self.openai_api_version = os.getenv("OPENAI_AZURE_API_VERSION")
|
||||
self.openai_deployment_id = os.getenv("OPENAI_AZURE_DEPLOYMENT_ID")
|
||||
self.azure_chat_deployment_id = os.getenv("OPENAI_AZURE_CHAT_DEPLOYMENT_ID")
|
||||
self.azure_embeddigs_deployment_id = os.getenv("OPENAI_AZURE_EMBEDDINGS_DEPLOYMENT_ID")
|
||||
self.load_azure_config()
|
||||
openai.api_type = "azure"
|
||||
openai.api_base = self.openai_api_base
|
||||
openai.api_version = self.openai_api_version
|
||||
@ -85,6 +85,46 @@ class Config(metaclass=Singleton):
|
||||
# Initialize the OpenAI API client
|
||||
openai.api_key = self.openai_api_key
|
||||
|
||||
def get_azure_deployment_id_for_model(self, model: str) -> str:
|
||||
"""
|
||||
Returns the relevant deployment id for the model specified.
|
||||
|
||||
Parameters:
|
||||
model(str): The model to map to the deployment id.
|
||||
|
||||
Returns:
|
||||
The matching deployment id if found, otherwise an empty string.
|
||||
"""
|
||||
if model == self.fast_llm_model:
|
||||
return self.azure_model_to_deployment_id_map["fast_llm_model_deployment_id"]
|
||||
elif model == self.smart_llm_model:
|
||||
return self.azure_model_to_deployment_id_map["smart_llm_model_deployment_id"]
|
||||
elif model == "text-embedding-ada-002":
|
||||
return self.azure_model_to_deployment_id_map["embedding_model_deployment_id"]
|
||||
else:
|
||||
return ""
|
||||
|
||||
AZURE_CONFIG_FILE = os.path.join(os.path.dirname(__file__), '..', 'azure.yaml')
|
||||
|
||||
def load_azure_config(self, config_file: str=AZURE_CONFIG_FILE) -> None:
|
||||
"""
|
||||
Loads the configuration parameters for Azure hosting from the specified file path as a yaml file.
|
||||
|
||||
Parameters:
|
||||
config_file(str): The path to the config yaml file. DEFAULT: "../azure.yaml"
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
try:
|
||||
with open(config_file) as file:
|
||||
config_params = yaml.load(file, Loader=yaml.FullLoader)
|
||||
except FileNotFoundError:
|
||||
config_params = {}
|
||||
self.openai_api_base = config_params.get("azure_api_base", "")
|
||||
self.openai_api_version = config_params.get("azure_api_version", "")
|
||||
self.azure_model_to_deployment_id_map = config_params.get("azure_model_map", [])
|
||||
|
||||
def set_continuous_mode(self, value: bool):
|
||||
"""Set the continuous mode value."""
|
||||
self.continuous_mode = value
|
||||
|
@ -22,9 +22,10 @@ COMMANDS:
|
||||
16. Get Improved Code: "improve_code", args: "suggestions": "<list_of_suggestions>", "code": "<full_code_string>"
|
||||
17. Write Tests: "write_tests", args: "code": "<full_code_string>", "focus": "<list_of_focus_areas>"
|
||||
18. Execute Python File: "execute_python_file", args: "file": "<file>"
|
||||
19. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
|
||||
20. Generate Image: "generate_image", args: "prompt": "<prompt>"
|
||||
21. Do Nothing: "do_nothing", args: ""
|
||||
19. Execute Shell Command, non-interactive commands only: "execute_shell", args: "command_line": "<command_line>".
|
||||
20. Task Complete (Shutdown): "task_complete", args: "reason": "<reason>"
|
||||
21. Generate Image: "generate_image", args: "prompt": "<prompt>"
|
||||
22. Do Nothing: "do_nothing", args: ""
|
||||
|
||||
RESOURCES:
|
||||
|
||||
|
@ -1,17 +1,20 @@
|
||||
import docker
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
|
||||
WORKSPACE_FOLDER = "auto_gpt_workspace"
|
||||
|
||||
|
||||
def execute_python_file(file):
|
||||
"""Execute a Python file in a Docker container and return the output"""
|
||||
workspace_folder = "auto_gpt_workspace"
|
||||
|
||||
print (f"Executing file '{file}' in workspace '{workspace_folder}'")
|
||||
print (f"Executing file '{file}' in workspace '{WORKSPACE_FOLDER}'")
|
||||
|
||||
if not file.endswith(".py"):
|
||||
return "Error: Invalid file type. Only .py files are allowed."
|
||||
|
||||
file_path = os.path.join(workspace_folder, file)
|
||||
file_path = os.path.join(WORKSPACE_FOLDER, file)
|
||||
|
||||
if not os.path.isfile(file_path):
|
||||
return f"Error: File '{file}' does not exist."
|
||||
@ -19,14 +22,31 @@ def execute_python_file(file):
|
||||
try:
|
||||
client = docker.from_env()
|
||||
|
||||
image_name = 'python:3.10'
|
||||
try:
|
||||
client.images.get(image_name)
|
||||
print(f"Image '{image_name}' found locally")
|
||||
except docker.errors.ImageNotFound:
|
||||
print(f"Image '{image_name}' not found locally, pulling from Docker Hub")
|
||||
# Use the low-level API to stream the pull response
|
||||
low_level_client = docker.APIClient()
|
||||
for line in low_level_client.pull(image_name, stream=True, decode=True):
|
||||
# Print the status and progress, if available
|
||||
status = line.get('status')
|
||||
progress = line.get('progress')
|
||||
if status and progress:
|
||||
print(f"{status}: {progress}")
|
||||
elif status:
|
||||
print(status)
|
||||
|
||||
# You can replace 'python:3.8' with the desired Python image/version
|
||||
# You can find available Python images on Docker Hub:
|
||||
# https://hub.docker.com/_/python
|
||||
container = client.containers.run(
|
||||
'python:3.10',
|
||||
image_name,
|
||||
f'python {file}',
|
||||
volumes={
|
||||
os.path.abspath(workspace_folder): {
|
||||
os.path.abspath(WORKSPACE_FOLDER): {
|
||||
'bind': '/workspace',
|
||||
'mode': 'ro'}},
|
||||
working_dir='/workspace',
|
||||
@ -46,3 +66,22 @@ def execute_python_file(file):
|
||||
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
def execute_shell(command_line):
|
||||
|
||||
current_dir = os.getcwd()
|
||||
|
||||
if not WORKSPACE_FOLDER in current_dir: # Change dir into workspace if necessary
|
||||
work_dir = os.path.join(os.getcwd(), WORKSPACE_FOLDER)
|
||||
os.chdir(work_dir)
|
||||
|
||||
print (f"Executing command '{command_line}' in working directory '{os.getcwd()}'")
|
||||
|
||||
result = subprocess.run(command_line, capture_output=True, shell=True)
|
||||
output = f"STDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
|
||||
|
||||
# Change back to whatever the prior working dir was
|
||||
|
||||
os.chdir(current_dir)
|
||||
|
||||
return output
|
||||
|
@ -5,11 +5,11 @@ cfg = Config()
|
||||
openai.api_key = cfg.openai_api_key
|
||||
|
||||
# Overly simple abstraction until we create something better
|
||||
def create_chat_completion(messages, model=None, temperature=None, max_tokens=None)->str:
|
||||
def create_chat_completion(messages, model=None, temperature=cfg.temperature, max_tokens=None)->str:
|
||||
"""Create a chat completion using the OpenAI API"""
|
||||
if cfg.use_azure:
|
||||
response = openai.ChatCompletion.create(
|
||||
deployment_id=cfg.azure_chat_deployment_id,
|
||||
deployment_id=cfg.get_azure_deployment_id_for_model(model),
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
|
@ -164,8 +164,6 @@ class ConsoleHandler(logging.StreamHandler):
|
||||
Allows to handle custom placeholders 'title_color' and 'message_no_color'.
|
||||
To use this formatter, make sure to pass 'color', 'title' as log extras.
|
||||
'''
|
||||
|
||||
|
||||
class AutoGptFormatter(logging.Formatter):
|
||||
def format(self, record: LogRecord) -> str:
|
||||
if (hasattr(record, 'color')):
|
||||
|
@ -318,7 +318,6 @@ def parse_arguments():
|
||||
|
||||
# TODO: fill in llm values here
|
||||
check_openai_api_key()
|
||||
cfg = Config()
|
||||
parse_arguments()
|
||||
logger.set_level(logging.DEBUG if cfg.debug_mode else logging.INFO)
|
||||
ai_name = ""
|
||||
|
@ -4,11 +4,12 @@ from config import AbstractSingleton, Config
|
||||
import openai
|
||||
cfg = Config()
|
||||
|
||||
cfg = Config()
|
||||
|
||||
def get_ada_embedding(text):
|
||||
text = text.replace("\n", " ")
|
||||
if cfg.use_azure:
|
||||
return openai.Embedding.create(input=[text], engine=cfg.azure_embeddigs_deployment_id, model="text-embedding-ada-002")["data"][0]["embedding"]
|
||||
return openai.Embedding.create(input=[text], engine=cfg.get_azure_deployment_id_for_model("text-embedding-ada-002"))["data"][0]["embedding"]
|
||||
else:
|
||||
return openai.Embedding.create(input=[text], model="text-embedding-ada-002")["data"][0]["embedding"]
|
||||
|
||||
|
2
tests.py
2
tests.py
@ -3,6 +3,6 @@ import unittest
|
||||
if __name__ == "__main__":
|
||||
# Load all tests from the 'scripts/tests' package
|
||||
suite = unittest.defaultTestLoader.discover('scripts/tests')
|
||||
|
||||
|
||||
# Run the tests
|
||||
unittest.TextTestRunner().run(suite)
|
||||
|
118
tests/unit/test_browse_scrape_links.py
Normal file
118
tests/unit/test_browse_scrape_links.py
Normal file
@ -0,0 +1,118 @@
|
||||
|
||||
# Generated by CodiumAI
|
||||
|
||||
# Dependencies:
|
||||
# pip install pytest-mock
|
||||
import pytest
|
||||
|
||||
from scripts.browse import scrape_links
|
||||
|
||||
"""
|
||||
Code Analysis
|
||||
|
||||
Objective:
|
||||
The objective of the 'scrape_links' function is to scrape hyperlinks from a
|
||||
given URL and return them in a formatted way.
|
||||
|
||||
Inputs:
|
||||
- url: a string representing the URL to be scraped.
|
||||
|
||||
Flow:
|
||||
1. Send a GET request to the given URL using the requests library and the user agent header from the config file.
|
||||
2. Check if the response contains an HTTP error. If it does, return "error".
|
||||
3. Parse the HTML content of the response using the BeautifulSoup library.
|
||||
4. Remove any script and style tags from the parsed HTML.
|
||||
5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function.
|
||||
6. Format the extracted hyperlinks using the 'format_hyperlinks' function.
|
||||
7. Return the formatted hyperlinks.
|
||||
|
||||
Outputs:
|
||||
- A list of formatted hyperlinks.
|
||||
|
||||
Additional aspects:
|
||||
- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP
|
||||
requests and parse HTML content, respectively.
|
||||
- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
|
||||
- The 'format_hyperlinks' function is called to format the extracted hyperlinks.
|
||||
- The function checks for HTTP errors and returns "error" if any are found.
|
||||
"""
|
||||
|
||||
|
||||
class TestScrapeLinks:
|
||||
|
||||
# Tests that the function returns a list of formatted hyperlinks when
|
||||
# provided with a valid url that returns a webpage with hyperlinks.
|
||||
def test_valid_url_with_hyperlinks(self):
|
||||
url = "https://www.google.com"
|
||||
result = scrape_links(url)
|
||||
assert len(result) > 0
|
||||
assert isinstance(result, list)
|
||||
assert isinstance(result[0], str)
|
||||
|
||||
# Tests that the function returns correctly formatted hyperlinks when given a valid url.
|
||||
def test_valid_url(self, mocker):
|
||||
# Mock the requests.get() function to return a response with sample HTML containing hyperlinks
|
||||
mock_response = mocker.Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html><body><a href='https://www.google.com'>Google</a></body></html>"
|
||||
mocker.patch('requests.get', return_value=mock_response)
|
||||
|
||||
# Call the function with a valid URL
|
||||
result = scrape_links("https://www.example.com")
|
||||
|
||||
# Assert that the function returns correctly formatted hyperlinks
|
||||
assert result == ["Google (https://www.google.com)"]
|
||||
|
||||
# Tests that the function returns "error" when given an invalid url.
|
||||
def test_invalid_url(self, mocker):
|
||||
# Mock the requests.get() function to return an HTTP error response
|
||||
mock_response = mocker.Mock()
|
||||
mock_response.status_code = 404
|
||||
mocker.patch('requests.get', return_value=mock_response)
|
||||
|
||||
# Call the function with an invalid URL
|
||||
result = scrape_links("https://www.invalidurl.com")
|
||||
|
||||
# Assert that the function returns "error"
|
||||
assert "Error:" in result
|
||||
|
||||
# Tests that the function returns an empty list when the html contains no hyperlinks.
|
||||
def test_no_hyperlinks(self, mocker):
|
||||
# Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
|
||||
mock_response = mocker.Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>"
|
||||
mocker.patch('requests.get', return_value=mock_response)
|
||||
|
||||
# Call the function with a URL containing no hyperlinks
|
||||
result = scrape_links("https://www.example.com")
|
||||
|
||||
# Assert that the function returns an empty list
|
||||
assert result == []
|
||||
|
||||
# Tests that scrape_links() correctly extracts and formats hyperlinks from
|
||||
# a sample HTML containing a few hyperlinks.
|
||||
def test_scrape_links_with_few_hyperlinks(self, mocker):
|
||||
# Mock the requests.get() function to return a response with a sample HTML containing hyperlinks
|
||||
mock_response = mocker.Mock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.text = """
|
||||
<html>
|
||||
<body>
|
||||
<div id="google-link"><a href="https://www.google.com">Google</a></div>
|
||||
<div id="github"><a href="https://github.com">GitHub</a></div>
|
||||
<div id="CodiumAI"><a href="https://www.codium.ai">CodiumAI</a></div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
mocker.patch('requests.get', return_value=mock_response)
|
||||
|
||||
# Call the function being tested
|
||||
result = scrape_links("https://www.example.com")
|
||||
|
||||
# Assert that the function returns a list of formatted hyperlinks
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 3
|
||||
assert result[0] == "Google (https://www.google.com)"
|
||||
assert result[1] == "GitHub (https://github.com)"
|
||||
assert result[2] == "CodiumAI (https://www.codium.ai)"
|
@ -2,7 +2,6 @@
|
||||
# Generated by CodiumAI
|
||||
|
||||
import requests
|
||||
import tests.context
|
||||
|
||||
from scripts.browse import scrape_text
|
||||
|
||||
@ -10,7 +9,8 @@ from scripts.browse import scrape_text
|
||||
Code Analysis
|
||||
|
||||
Objective:
|
||||
The objective of the "scrape_text" function is to scrape the text content from a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
|
||||
The objective of the "scrape_text" function is to scrape the text content from
|
||||
a given URL and return it as a string, after removing any unwanted HTML tags and scripts.
|
||||
|
||||
Inputs:
|
||||
- url: a string representing the URL of the webpage to be scraped.
|
||||
@ -33,6 +33,7 @@ Additional aspects:
|
||||
- The function uses a generator expression to split the text into lines and chunks, which can improve performance for large amounts of text.
|
||||
"""
|
||||
|
||||
|
||||
class TestScrapeText:
|
||||
|
||||
# Tests that scrape_text() returns the expected text when given a valid URL.
|
Loading…
Reference in New Issue
Block a user