Source code for searx.result_types.code

# SPDX-License-Identifier: AGPL-3.0-or-later
"""Typification of the *code* results.  Results of this type are rendered in
the :origin:`code.html <searx/templates/simple/result_templates/code.html>`
template.  For highlighting the code passages, Pygments_ is used.

.. _Pygments:  https://pygments.org

----

.. autoclass:: Code
   :members:
   :show-inheritance:

"""
# pylint: disable=too-few-public-methods, disable=invalid-name

# Struct fields aren't discovered in Python 3.14
# - https://github.com/searxng/searxng/issues/5284
from __future__ import annotations

__all__ = ["Code"]

import typing as t

from pygments import highlight  # pyright: ignore[reportUnknownVariableType]
from pygments.lexers._mapping import LEXERS  # pyright: ignore[reportMissingTypeStubs]
from pygments.lexers import guess_lexer, get_lexer_by_name, guess_lexer_for_filename
from pygments.util import ClassNotFound
from pygments.formatters import HtmlFormatter  # pylint: disable=no-name-in-module

from ._base import MainResult


_pygments_languages: list[str] = []


def is_valid_language(code_language: str) -> bool:
    """Checks if the specified ``code_language`` is known in Pygments."""
    if not _pygments_languages:
        for l in LEXERS.values():
            # l[2] is the tuple with the alias names
            for alias_name in l[2]:
                _pygments_languages.append(alias_name.lower())
    return code_language.lower() in _pygments_languages



[docs]
@t.final
class Code(MainResult, kw_only=True):
    """Result type suitable for displaying code passages."""

    template: str = "code.html"

    repository: str | None = None
    """A link related to a repository related to the *result*."""

    codelines: list[tuple[int, str]] = []
    """A list of two digit tuples where the first item is the line number and
    the second item is the code line."""

    hl_lines: set[int] = set()
    """A list of line numbers to highlight."""

    code_language: str = "<guess>"
    """Pygment's short name of the lexer, e.g. ``text`` for the
    :py:obj:`pygments.lexers.special.TextLexer`.  For a list of available
    languages consult: `Pygments languages`_.  If the language is not in this
    list, a :py:obj:`ValueError` is raised.

    The default is ``<guess>`` which has a special meaning;

    - If :py:obj:`Code.filename` is set, Pygment's factory method
      :py:obj:`pygments.lexers.guess_lexer_for_filename` is used to determine
      the language of the ``codelines``.

    - else Pygment's :py:obj:`pygments.lexers.guess_lexer` factory is used.

    In case the language can't be detected, the fallback is ``text``.

    .. _Pygments languages:  https://pygments.org/languages/
    """

    filename: str | None = None
    """Optional file name, can help to ``<guess>`` the language of the code (in
    case of ambiguous short code examples).  If :py:obj:`Code.title` is not set,
    its default is the filename."""

    strip_new_lines: bool = True
    """Strip leading and trailing newlines for each returned fragment (default:
    ``True``).  Single file might return multiple code fragments."""

    strip_whitespace: bool = False
    """Strip all leading and trailing whitespace for each returned fragment
    (default: ``False``).  Single file might return multiple code fragments.
    Enabling this might break code indentation."""

    def __post_init__(self):
        super().__post_init__()

        if not self.title and self.filename:
            self.title = self.filename

        if self.code_language != "<guess>" and not is_valid_language(self.code_language):
            raise ValueError(f"unknown code_language: {self.code_language}")

    def __hash__(self):
        """The hash value is build up from URL and code lines. :py:obj:`Code
        <Result.__eq__>` objects are equal, when the hash values of both objects
        are equal.
        """
        return hash(f"{self.url} {self.codelines}")

    def get_lexer(self):
        if self.code_language != "<guess>":
            return get_lexer_by_name(self.code_language)

        src_code = "\n".join([l[1] for l in self.codelines])
        if self.filename:
            try:
                return guess_lexer_for_filename(self.filename, src_code)
            except ClassNotFound:
                pass
        try:
            return guess_lexer(src_code)
        except ClassNotFound:
            pass
        return get_lexer_by_name("text")


[docs]
    def HTML(self, **options) -> str:  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
        """Rendered HTML, additional options are accepted, for more details have
        a look at HtmlFormatter_.

        .. _HtmlFormatter: https://pygments.org/docs/formatters/#HtmlFormatter
        """
        lexer = self.get_lexer()

        line_no: int = 0  # current line number
        code_block_start: int = 0  # line where the current code block starts
        code_block_end: int | None = None  # line where the current code ends
        code_block: list[str] = []  # lines of the current code block
        html_code_blocks: list[str] = []  # HTML representation of all code blocks

        def _render(**kwargs):  # pyright: ignore[reportUnknownParameterType, reportMissingParameterType]
            for k, default in [
                ("linenos", "inline"),
                ("linenostart", code_block_start),
                ("cssclass", "code-highlight"),
                ("hl_lines", [hl - code_block_start + 1 for hl in self.hl_lines]),
            ]:
                kwargs[k] = kwargs.get(k, default)  # pyright: ignore[reportUnknownMemberType]

            # Wrap the code inside <pre> blocks using <code>, as recommended by
            # the HTML5 specification (default is False).  Do we need this?
            kwargs["wrapcode"] = kwargs.get("wrapcode", True)

            html_code_blocks.append(
                highlight(
                    "\n".join(code_block),
                    lexer,
                    HtmlFormatter(**kwargs),  # pyright: ignore[reportUnknownArgumentType]
                )
            )

        for line_no, code_line in self.codelines:
            if code_block_end is None:
                # initial start condition
                code_block_start = line_no

            if code_block_end is not None and code_block_end + 1 != line_no:
                # new code block is detected, render current code block
                _render(**options)  # pyright: ignore[reportUnknownArgumentType]
                # reset conditions for next code block, which first line is the
                # current code line
                code_block = [code_line]
                code_block_start = line_no
                code_block_end = line_no
                continue

            # add line to the current code block and update last line n
            code_block.append(code_line)
            code_block_end = line_no

        # highlight (last) code block
        _render(**options)  # pyright: ignore[reportUnknownArgumentType]
        return "\n".join(html_code_blocks)
Source code for searx.result_types.code

Table of Contents

Project Links

Navigation