feat(froge): Improvement in web components (#7068)

- Add `duckduckgo_backend` field to `WebSearchComponent` configuration
- Add `selenium_proxy` to `WebSeleniumComponent` configuration
- Update docs
This commit is contained in:
A.Daee 2024-07-25 12:32:34 +03:30 committed by GitHub
parent 22b6dbbf6a
commit 3b0cd9518d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 7 deletions

View File

@ -105,6 +105,7 @@
## HUGGINGFACE_API_TOKEN - HuggingFace API token (Default: None)
# HUGGINGFACE_API_TOKEN=
### Stable Diffusion (IMAGE_PROVIDER=sdwebui)
## SD_WEBUI_AUTH - Stable Diffusion Web UI username:password pair (Default: None)

View File

@ -155,11 +155,12 @@ Allows agent to search the web. Google credentials aren't required for DuckDuckG
### `WebSearchConfiguration`
| Config variable | Details | Type | Default |
| -------------------------------- | ----------------------------------------------------------------------- | ----- | ------- |
| `google_api_key` | Google API key, *ENV:* `GOOGLE_API_KEY` | `str` | `None` |
| `google_custom_search_engine_id` | Google Custom Search Engine ID, *ENV:* `GOOGLE_CUSTOM_SEARCH_ENGINE_ID` | `str` | `None` |
| `duckduckgo_max_attempts` | Maximum number of attempts to search using DuckDuckGo | `int` | `3` |
| Config variable | Details | Type | Default |
| -------------------------------- | ----------------------------------------------------------------------- | --------------------------- | ------- |
| `google_api_key` | Google API key, *ENV:* `GOOGLE_API_KEY` | `str` | `None` |
| `google_custom_search_engine_id` | Google Custom Search Engine ID, *ENV:* `GOOGLE_CUSTOM_SEARCH_ENGINE_ID` | `str` | `None` |
| `duckduckgo_max_attempts` | Maximum number of attempts to search using DuckDuckGo | `int` | `3` |
| `duckduckgo_backend` | Backend to be used for DDG sdk | `"api" \| "html" \| "lite"` | `"api"` |
### DirectiveProvider
@ -183,6 +184,7 @@ Allows agent to read websites using Selenium.
| `headless` | Run browser in headless mode | `bool` | `True` |
| `user_agent` | User agent used by the browser | `str` | `"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"` |
| `browse_spacy_language_model` | Spacy language model used for chunking text | `str` | `"en_core_web_sm"` |
| `selenium_proxy` | Http proxy to use with Selenium | `str` | `None` |
### DirectiveProvider

View File

@ -1,7 +1,7 @@
import json
import logging
import time
from typing import Iterator, Optional
from typing import Iterator, Literal, Optional
from duckduckgo_search import DDGS
from pydantic import BaseModel, SecretStr
@ -24,6 +24,7 @@ class WebSearchConfiguration(BaseModel):
None, from_env="GOOGLE_CUSTOM_SEARCH_ENGINE_ID", exclude=True
)
duckduckgo_max_attempts: int = 3
duckduckgo_backend: Literal["api", "html", "lite"] = "api"
class WebSearchComponent(
@ -89,7 +90,9 @@ class WebSearchComponent(
if not query:
return json.dumps(search_results)
search_results = DDGS().text(query, max_results=num_results)
search_results = DDGS().text(
query, max_results=num_results, backend=self.config.duckduckgo_backend
)
if search_results:
break

View File

@ -68,6 +68,8 @@ class WebSeleniumConfiguration(BaseModel):
"""User agent used by the browser"""
browse_spacy_language_model: str = "en_core_web_sm"
"""Spacy language model used for chunking text"""
selenium_proxy: Optional[str] = None
"""Http proxy to use with Selenium"""
class WebSeleniumComponent(
@ -301,6 +303,9 @@ class WebSeleniumComponent(
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
if self.config.selenium_proxy:
options.add_argument(f"--proxy-server={self.config.selenium_proxy}")
self._sideload_chrome_extensions(options, self.data_dir / "assets" / "crx")
if (chromium_driver_path := Path("/usr/bin/chromedriver")).exists():