Source code for searx.result_types._base

# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=too-few-public-methods, missing-module-docstring
"""Basic types for the typification of results.

- :py:obj:`Result` base class
- :py:obj:`LegacyResult` for internal use only

----

.. autoclass:: Result
   :members:

.. autoclass:: LegacyResult
   :members:
"""


from __future__ import annotations

__all__ = ["Result"]

import re
import urllib.parse
import warnings

import msgspec


[docs] class Result(msgspec.Struct, kw_only=True): """Base class of all result types :ref:`result types`.""" url: str | None = None """A link related to this *result*""" template: str = "default.html" """Name of the template used to render the result. By default :origin:`result_templates/default.html <searx/templates/simple/result_templates/default.html>` is used. """ engine: str | None = "" """Name of the engine *this* result comes from. In case of *plugins* a prefix ``plugin:`` is set, in case of *answerer* prefix ``answerer:`` is set. The field is optional and is initialized from the context if necessary. """ parsed_url: urllib.parse.ParseResult | None = None """:py:obj:`urllib.parse.ParseResult` of :py:obj:`Result.url`. The field is optional and is initialized from the context if necessary. """
[docs] def normalize_result_fields(self): """Normalize a result .. - if field ``url`` is set and field ``parse_url`` is unset, init ``parse_url`` from field ``url``. This method can be extended in the inheritance. """ if not self.parsed_url and self.url: self.parsed_url = urllib.parse.urlparse(self.url) # if the result has no scheme, use http as default if not self.parsed_url.scheme: self.parsed_url = self.parsed_url._replace(scheme="http") self.url = self.parsed_url.geturl()
def __post_init__(self): pass def __hash__(self) -> int: """Generates a hash value that uniquely identifies the content of *this* result. The method can be adapted in the inheritance to compare results from different sources. If two result objects are not identical but have the same content, their hash values should also be identical. The hash value is used in contexts, e.g. when checking for equality to identify identical results from different sources (engines). """ return id(self) def __eq__(self, other): """py:obj:`Result` objects are equal if the hash values of the two objects are equal. If needed, its recommended to overwrite "py:obj:`Result.__hash__`.""" return hash(self) == hash(other) # for legacy code where a result is treated as a Python dict def __setitem__(self, field_name, value): return setattr(self, field_name, value) def __getitem__(self, field_name): if field_name not in self.__struct_fields__: raise KeyError(f"{field_name}") return getattr(self, field_name) def __iter__(self): return iter(self.__struct_fields__) def as_dict(self): return {f: getattr(self, f) for f in self.__struct_fields__}
class MainResult(Result): # pylint: disable=missing-class-docstring # open_group and close_group should not manged in the Result class (we should rop it from here!) open_group: bool = False close_group: bool = False title: str = "" """Link title of the result item.""" content: str = "" """Extract or description of the result item""" img_src: str = "" """URL of a image that is displayed in the result item.""" thumbnail: str = "" """URL of a thumbnail that is displayed in the result item."""
[docs] class LegacyResult(dict): """A wrapper around a legacy result item. The SearXNG core uses this class for untyped dictionaries / to be downward compatible. This class is needed until we have implemented an :py:obj:`Result` class for each result type and the old usages in the codebase have been fully migrated. There is only one place where this class is used, in the :py:obj:`searx.results.ResultContainer`. .. attention:: Do not use this class in your own implementations! """ UNSET = object() WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) def as_dict(self): return self def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Init fields with defaults / compare with defaults of the fields in class Result self.engine = self.get("engine", "") self.template = self.get("template", "default.html") self.url = self.get("url", None) self.parsed_url = self.get("parsed_url", None) self.content = self.get("content", "") self.title = self.get("title", "") # Legacy types that have already been ported to a type .. if "answer" in self: warnings.warn( f"engine {self.engine} is using deprecated `dict` for answers" f" / use a class from searx.result_types.answer", DeprecationWarning, ) self.template = "answer/legacy.html" def __hash__(self) -> int: # type: ignore if "answer" in self: return hash(self["answer"]) if not any(cls in self for cls in ["suggestion", "correction", "infobox", "number_of_results", "engine_data"]): # it is a commun url-result .. return hash(self.url) return id(self) def __eq__(self, other): return hash(self) == hash(other) def __repr__(self) -> str: return f"LegacyResult: {super().__repr__()}" def __getattr__(self, name: str, default=UNSET): if default == self.UNSET and name not in self: raise AttributeError(f"LegacyResult object has no field named: {name}") return self[name] def __setattr__(self, name: str, val): self[name] = val def normalize_result_fields(self): self.title = self.WHITESPACE_REGEX.sub(" ", self.title) if not self.parsed_url and self.url: self.parsed_url = urllib.parse.urlparse(self.url) # if the result has no scheme, use http as default if not self.parsed_url.scheme: self.parsed_url = self.parsed_url._replace(scheme="http") self.url = self.parsed_url.geturl() if self.content: self.content = self.WHITESPACE_REGEX.sub(" ", self.content) if self.content == self.title: # avoid duplicate content between the content and title fields self.content = ""