# SPDX-License-Identifier: AGPL-3.0-or-later
"""
DuckDuckGo Instant Answer API
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The `DDG-API <https://duckduckgo.com/api>`__ is no longer documented but from
reverse engineering we can see that some services (e.g. instant answers) still
in use from the DDG search engine.
As far we can say the *instant answers* API does not support languages, or at
least we could not find out how language support should work. It seems that
most of the features are based on English terms.
"""
import typing as t
from urllib.parse import urlencode, urlparse, urljoin
from lxml import html
from searx.data import WIKIDATA_UNITS
from searx.utils import extract_text, html_to_text, get_string_replaces_function
from searx.external_urls import (
get_external_url,
get_earth_coordinates_url,
area_to_osm_zoom,
)
from searx.result_types import EngineResults
if t.TYPE_CHECKING:
from searx.extended_types import SXNG_Response
from searx.search.processors import OnlineParams
about = {
"website": "https://duckduckgo.com/",
"wikidata_id": "Q12805",
"official_api_documentation": "https://duckduckgo.com/api",
"use_official_api": True,
"require_api_key": False,
"results": "JSON",
}
URL = "https://api.duckduckgo.com/" + "?{query}&format=json&pretty=0&no_redirect=1&d=1"
WIKIDATA_PREFIX = ["http://www.wikidata.org/entity/", "https://www.wikidata.org/entity/"]
replace_http_by_https = get_string_replaces_function({"http:": "https:"})
[docs]
def is_broken_text(text: str) -> bool:
"""duckduckgo may return something like ``<a href="xxxx">http://somewhere Related website<a/>``
The href URL is broken, the "Related website" may contains some HTML.
The best solution seems to ignore these results.
"""
return text.startswith("http") and " " in text
def result_to_text(text: str, htmlResult: str) -> str | None:
# TODO : remove result ending with "Meaning" or "Category" # pylint: disable=fixme
result = ""
dom = html.fromstring(htmlResult)
a = dom.xpath("//a")
if len(a) >= 1:
result = extract_text(a[0])
else:
result = text
if result and not is_broken_text(result):
return result
return None
def request(query: str, params: "OnlineParams") -> None:
params["url"] = URL.format(query=urlencode({"q": query}))
def response(resp: "SXNG_Response") -> EngineResults:
# pylint: disable=too-many-locals, too-many-branches, too-many-statements
results = EngineResults()
search_res: dict[str, str] = resp.json()
# search_res.get("Entity") possible values (not exhaustive) :
# * continent / country / department / location / waterfall
# * actor / musician / artist
# * book / performing art / film / television / media franchise / concert tour / playwright
# * prepared food
# * website / software / os / programming language / file format / software engineer
# * company
content: str = ""
heading: str = search_res.get("Heading", "")
attributes: list[dict[str, str | dict[str, str]]] = []
urls: list[dict[str, str | bool]] = []
infobox_id = None
relatedTopics: list[dict[str, str | list[str]]] = []
# add answer if there is one
answer: str = search_res.get("Answer", "")
if answer:
answer_type = search_res.get("AnswerType")
logger.debug("AnswerType='%s' Answer='%s'", answer_type, answer)
if isinstance(answer, str) and answer_type not in ["calc", "ip"]:
results.add(
results.types.Answer(
answer=html_to_text(answer),
url=search_res.get("AbstractURL", ""),
)
)
# add infobox
if "Definition" in search_res:
content = content + search_res.get("Definition", "")
if "Abstract" in search_res:
content = content + search_res.get("Abstract", "")
# image
image = search_res.get("Image")
image = None if image == "" else image
if image is not None and urlparse(image).netloc == "":
image = urljoin("https://duckduckgo.com", image)
# Official website, Wikipedia page
_result_list: list[dict[str, str]] = search_res.get("Results", []) # pyright: ignore[reportAssignmentType]
for ddg_result in _result_list:
firstURL = ddg_result.get("FirstURL")
text = ddg_result.get("Text")
if firstURL is not None and text is not None:
urls.append({"title": text, "url": firstURL})
results.add(results.types.LegacyResult({"title": heading, "url": firstURL}))
# related topics
_result_list = search_res.get("RelatedTopics", []) # pyright: ignore[reportAssignmentType]
for ddg_result in _result_list:
if "FirstURL" in ddg_result:
firstURL = ddg_result.get("FirstURL")
text = ddg_result.get("Text", "")
if not is_broken_text(text):
suggestion = result_to_text(text, ddg_result.get("Result", ""))
if suggestion != heading and suggestion is not None:
results.add(results.types.LegacyResult({"suggestion": suggestion}))
elif "Topics" in ddg_result:
suggestions: list[str] = []
relatedTopics.append({"name": ddg_result.get("Name", ""), "suggestions": suggestions})
_topic_results: list[dict[str, str]] = ddg_result.get("Topics", []) # pyright: ignore[reportAssignmentType]
for topic_result in _topic_results:
suggestion = result_to_text(topic_result.get("Text", ""), topic_result.get("Result", ""))
if suggestion != heading and suggestion is not None:
suggestions.append(suggestion)
# abstract
abstractURL = search_res.get("AbstractURL", "")
if abstractURL != "":
# add as result ? problem always in english
infobox_id = abstractURL
urls.append({"title": search_res.get("AbstractSource", ""), "url": abstractURL, "official": True})
results.add(results.types.LegacyResult({"url": abstractURL, "title": heading}))
# definition
definitionURL = search_res.get("DefinitionURL", "")
if definitionURL != "":
# add as result ? as answer ? problem always in english
infobox_id = definitionURL
urls.append({"title": search_res.get("DefinitionSource", ""), "url": definitionURL})
# to merge with wikidata's infobox
if infobox_id:
infobox_id = replace_http_by_https(infobox_id)
# attributes
# some will be converted to urls
if "Infobox" in search_res:
infobox: dict[str, t.Any] = search_res.get("Infobox", {}) # pyright: ignore[reportAssignmentType]
if "content" in infobox:
osm_zoom = 17
coordinates = None
for info in infobox.get("content", {}):
data_type: str = info.get("data_type", "")
data_label = info.get("label")
data_value = info.get("value")
# Workaround: ddg may return a double quote
if data_value == '""':
continue
# Is it an external URL ?
# * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile
# * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id
# * netflix_id
external_url: str | None = get_external_url(data_type, data_value) # type: ignore
if external_url is not None:
urls.append({"title": data_label, "url": external_url})
elif data_type in ["instance", "wiki_maps_trigger", "google_play_artist_id"]:
# ignore instance: Wikidata value from "Instance Of" (Qxxxx)
# ignore wiki_maps_trigger: reference to a javascript
# ignore google_play_artist_id: service shutdown
pass
elif data_type == "string" and data_label == "Website":
# There is already an URL for the website
pass
elif data_type == "area":
attributes.append({"label": data_label, "value": area_to_str(data_value), "entity": "P2046"})
osm_zoom = area_to_osm_zoom(data_value.get("amount"))
elif data_type == "coordinates":
if data_value.get("globe") == "http://www.wikidata.org/entity/Q2":
# coordinate on Earth
# get the zoom information from the area
coordinates = info
else:
# coordinate NOT on Earth
attributes.append({"label": data_label, "value": data_value, "entity": "P625"})
elif data_type == "string":
attributes.append({"label": data_label, "value": data_value})
if coordinates:
data_label = coordinates.get("label")
data_value = coordinates.get("value")
latitude = data_value.get("latitude")
longitude = data_value.get("longitude")
_url: str = get_earth_coordinates_url(latitude, longitude, osm_zoom) # type: ignore
urls.append({"title": "OpenStreetMap", "url": _url, "entity": "P625"})
if len(heading) > 0:
# TODO get infobox.meta.value where .label="article_title" # pylint: disable=fixme
if image is None and len(attributes) == 0 and len(urls) == 1 and len(relatedTopics) == 0 and len(content) == 0:
results.add(results.types.LegacyResult({"url": urls[0]["url"], "title": heading, "content": content}))
else:
results.add(
results.types.LegacyResult(
{
"infobox": heading,
"id": infobox_id,
"content": content,
"img_src": image,
"attributes": attributes,
"urls": urls,
"relatedTopics": relatedTopics,
}
)
)
return results
def unit_to_str(unit: str) -> str:
for prefix in WIKIDATA_PREFIX:
if unit.startswith(prefix):
wikidata_entity = unit[len(prefix) :]
real_unit = WIKIDATA_UNITS.get(wikidata_entity)
if real_unit is None:
return unit
return real_unit["symbol"]
return unit
[docs]
def area_to_str(area: dict[str, str]) -> str:
"""parse ``{"unit": "https://www.wikidata.org/entity/Q712226", "amount": "+20.99"}``"""
unit = unit_to_str(area.get("unit", ""))
if unit:
try:
amount = float(area.get("amount", ""))
return "{} {}".format(amount, unit)
except ValueError:
pass
return "{} {}".format(area.get("amount", ""), area.get("unit", ""))