Update ashboard, dashboard, memory +1 more (+2 ~3)

2026-02-02 22:27:24 +00:00
parent 4f00131184
commit b0c9b254f1
65 changed files with 42112 additions and 53 deletions
--- a/venv/lib/python3.12/site-packages/pypdf/generic/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/init.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Implementation of generic PDF objects (dictionary, number, string, ...)."""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+from ..constants import OutlineFontFlag
+from ._base import (
+    BooleanObject,
+    ByteStringObject,
+    FloatObject,
+    IndirectObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    PdfObject,
+    TextStringObject,
+    encode_pdfdocencoding,
+    is_null_or_none,
+)
+from ._data_structures import (
+    ArrayObject,
+    ContentStream,
+    DecodedStreamObject,
+    Destination,
+    DictionaryObject,
+    EncodedStreamObject,
+    Field,
+    StreamObject,
+    TreeObject,
+    read_object,
+)
+from ._files import EmbeddedFile
+from ._fit import Fit
+from ._link import DirectReferenceLink, NamedReferenceLink, ReferenceLink, extract_links
+from ._outline import OutlineItem
+from ._rectangle import RectangleObject
+from ._utils import (
+    create_string_object,
+    decode_pdfdocencoding,
+    hex_to_rgb,
+    read_hex_string_from_stream,
+    read_string_from_stream,
+)
+from ._viewerpref import ViewerPreferences
+
+PAGE_FIT = Fit.fit()
+
+
+__all__ = [
+    "PAGE_FIT",
+    "ArrayObject",
+    "BooleanObject",
+    "ByteStringObject",
+    "ContentStream",
+    "DecodedStreamObject",
+    "Destination",
+    "DictionaryObject",
+    "DirectReferenceLink",
+    "EmbeddedFile",
+    "EncodedStreamObject",
+    "Field",
+    "Fit",
+    "FloatObject",
+    "IndirectObject",
+    "NameObject",
+    "NamedReferenceLink",
+    "NullObject",
+    "NumberObject",
+    "OutlineFontFlag",
+    "OutlineItem",
+    "PdfObject",
+    "RectangleObject",
+    "ReferenceLink",
+    "StreamObject",
+    "TextStringObject",
+    "TreeObject",
+    "ViewerPreferences",
+    # Utility functions
+    "create_string_object",
+    "decode_pdfdocencoding",
+    "encode_pdfdocencoding",
+    "extract_links",
+    "hex_to_rgb",
+    "is_null_or_none",
+    "read_hex_string_from_stream",
+    # Data structures core functions
+    "read_object",
+    "read_string_from_stream",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_appearance_stream.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_appearance_stream.py
@@ -0,0 +1,547 @@
+import re
+from dataclasses import dataclass
+from enum import IntEnum
+from typing import Any, Optional, Union, cast
+
+from .._codecs import fill_from_encoding
+from .._codecs.core_fontmetrics import CORE_FONT_METRICS
+from .._font import Font
+from .._utils import logger_warning
+from ..constants import AnnotationDictionaryAttributes, BorderStyles, FieldDictionaryAttributes
+from ..generic import (
+    DecodedStreamObject,
+    DictionaryObject,
+    NameObject,
+    NumberObject,
+    RectangleObject,
+)
+from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
+
+DEFAULT_FONT_SIZE_IN_MULTILINE = 12
+
+
+@dataclass
+class BaseStreamConfig:
+    """A container representing the basic layout of an appearance stream."""
+    rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0)
+    border_width: int = 1  # The width of the border in points
+    border_style: str = BorderStyles.SOLID
+
+
+class BaseStreamAppearance(DecodedStreamObject):
+    """A class representing the very base of an appearance stream, that is, a rectangle and a border."""
+
+    def __init__(self, layout: Optional[BaseStreamConfig] = None) -> None:
+        """
+        Takes the appearance stream layout as an argument.
+
+        Args:
+            layout: The basic layout parameters.
+        """
+        super().__init__()
+        self._layout = layout or BaseStreamConfig()
+        self[NameObject("/Type")] = NameObject("/XObject")
+        self[NameObject("/Subtype")] = NameObject("/Form")
+        self[NameObject("/BBox")] = RectangleObject(self._layout.rectangle)
+
+
+class TextAlignment(IntEnum):
+    """Defines the alignment options for text within a form field's appearance stream."""
+
+    LEFT = 0
+    CENTER = 1
+    RIGHT = 2
+
+
+class TextStreamAppearance(BaseStreamAppearance):
+    """
+    A class representing the appearance stream for a text-based form field.
+
+    This class generates the content stream (the `ap_stream_data`) that dictates
+    how text is rendered within a form field's bounding box. It handles properties
+    like font, font size, color, multiline text, and text selection highlighting.
+    """
+
+    def _scale_text(
+        self,
+        font: Font,
+        font_size: float,
+        leading_factor: float,
+        field_width: float,
+        field_height: float,
+        text: str,
+        min_font_size: float,
+        font_size_step: float = 0.2
+    ) -> tuple[list[tuple[float, str]], float]:
+        """
+        Takes a piece of text and scales it to field_width or field_height, given font_name
+        and font_size. Wraps text where necessary.
+
+        Args:
+            font: The font to be used.
+            font_size: The font size in points.
+            leading_factor: The line distance.
+            field_width: The width of the field in which to fit the text.
+            field_height: The height of the field in which to fit the text.
+            text: The text to fit with the field.
+            min_font_size: The minimum font size at which to scale the text.
+            font_size_step: The amount by which to decrement font size per step while scaling.
+
+        Returns:
+            The text in the form of list of tuples, each tuple containing the length of a line
+            and its contents, and the font_size for these lines and lengths.
+        """
+        orig_text = text
+        paragraphs = text.replace("\n", "\r").split("\r")
+        wrapped_lines = []
+        current_line_words: list[str] = []
+        current_line_width: float = 0
+        space_width = font.space_width * font_size / 1000
+        for paragraph in paragraphs:
+            if not paragraph.strip():
+                wrapped_lines.append((0.0, ""))
+                continue
+            words = paragraph.split(" ")
+            for i, word in enumerate(words):
+                word_width = font.text_width(word) * font_size / 1000
+                test_width = current_line_width + word_width + (space_width if i else 0)
+                if test_width > field_width and current_line_words:
+                    wrapped_lines.append((current_line_width, " ".join(current_line_words)))
+                    current_line_words = [word]
+                    current_line_width = word_width
+                elif not current_line_words and word_width > field_width:
+                    wrapped_lines.append((word_width, word))
+                    current_line_words = []
+                    current_line_width = 0
+                else:
+                    if current_line_words:
+                        current_line_width += space_width
+                    current_line_words.append(word)
+                    current_line_width += word_width
+            if current_line_words:
+                wrapped_lines.append((current_line_width, " ".join(current_line_words)))
+                current_line_words = []
+                current_line_width = 0
+        # Estimate total height.
+        estimated_total_height = font_size + (len(wrapped_lines) - 1) * leading_factor * font_size
+        if estimated_total_height > field_height:
+            # Text overflows height; Retry with smaller font size.
+            new_font_size = font_size - font_size_step
+            if new_font_size >= min_font_size:
+                return self._scale_text(
+                    font,
+                    new_font_size,
+                    leading_factor,
+                    field_width,
+                    field_height,
+                    orig_text,
+                    min_font_size,
+                    font_size_step
+                )
+        return wrapped_lines, round(font_size, 1)
+
+    def _generate_appearance_stream_data(
+        self,
+        text: str,
+        selection: Union[list[str], None],
+        font: Font,
+        font_glyph_byte_map: Optional[dict[str, bytes]] = None,
+        font_name: str = "/Helv",
+        font_size: float = 0.0,
+        font_color: str = "0 g",
+        is_multiline: bool = False,
+        alignment: TextAlignment = TextAlignment.LEFT,
+        is_comb: bool = False,
+        max_length: Optional[int] = None
+    ) -> bytes:
+        """
+        Generates the raw bytes of the PDF appearance stream for a text field.
+
+        This private method assembles the PDF content stream operators to draw
+        the provided text within the specified rectangle. It handles text positioning,
+        font application, color, and special formatting like selected text.
+
+        Args:
+            text: The text to be rendered in the form field.
+            selection: An optional list of strings that should be highlighted as selected.
+            font: The font to use.
+            font_glyph_byte_map: An optional dictionary mapping characters to their
+                byte representation for glyph encoding.
+            font_name: The name of the font resource to use (e.g., "/Helv").
+            font_size: The font size. If 0, it is automatically calculated
+                based on whether the field is multiline or not.
+            font_color: The color to apply to the font, represented as a PDF
+                graphics state string (e.g., "0 g" for black).
+            is_multiline: A boolean indicating if the text field is multiline.
+            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
+            is_comb: Boolean that designates fixed-length fields, where every character
+                fills one "cell", such as in a postcode.
+            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
+                length field.
+
+        Returns:
+            A byte string containing the PDF content stream data.
+
+        """
+        rectangle = self._layout.rectangle
+        font_glyph_byte_map = font_glyph_byte_map or {}
+        if isinstance(rectangle, tuple):
+            rectangle = RectangleObject(rectangle)
+        leading_factor = (font.font_descriptor.bbox[3] - font.font_descriptor.bbox[1]) / 1000.0
+
+        # Set margins based on border width and style, but never less than 1 point
+        factor = 2 if self._layout.border_style in {"/B", "/I"} else 1
+        margin = max(self._layout.border_width * factor, 1)
+        field_height = rectangle.height - 2 * margin
+        field_width = rectangle.width - 4 * margin
+
+        # If font_size is 0, apply the logic for multiline or large-as-possible font
+        if font_size == 0:
+            min_font_size = 4.0       # The mininum font size
+            if selection:             # Don't wrap text when dealing with a /Ch field, in order to prevent problems
+                is_multiline = False  # with matching "selection" with "line" later on.
+            if is_multiline:
+                font_size = DEFAULT_FONT_SIZE_IN_MULTILINE
+                lines, font_size = self._scale_text(
+                    font,
+                    font_size,
+                    leading_factor,
+                    field_width,
+                    field_height,
+                    text,
+                    min_font_size
+                )
+            else:
+                max_vertical_size = field_height / leading_factor
+                text_width_unscaled = font.text_width(text) / 1000
+                max_horizontal_size = field_width / (text_width_unscaled or 1)
+                font_size = round(max(min(max_vertical_size, max_horizontal_size), min_font_size), 1)
+                lines = [(text_width_unscaled * font_size, text)]
+        elif is_comb:
+            if max_length and len(text) > max_length:
+                logger_warning (
+                    f"Length of text {text} exceeds maximum length ({max_length}) of field, input truncated.",
+                    __name__
+                )
+            # We act as if each character is one line, because we draw it separately later on
+            lines = [(
+                font.text_width(char) * font_size / 1000,
+                char
+            ) for index, char in enumerate(text) if index < (max_length or len(text))]
+        else:
+            lines = [(
+                font.text_width(line) * font_size / 1000,
+                line
+            ) for line in text.replace("\n", "\r").split("\r")]
+
+        # Set the vertical offset
+        if is_multiline:
+            y_offset = rectangle.height + margin - font.font_descriptor.bbox[3] * font_size / 1000.0
+        else:
+            y_offset = margin + ((field_height - font.font_descriptor.ascent * font_size / 1000) / 2)
+        default_appearance = f"{font_name} {font_size} Tf {font_color}"
+
+        ap_stream = (
+            f"q\n/Tx BMC \nq\n{2 * margin} {margin} {field_width} {field_height} "
+            f"re\nW\nBT\n{default_appearance}\n"
+        ).encode()
+        current_x_pos: float = 0  # Initial virtual position within the text object.
+
+        for line_number, (line_width, line) in enumerate(lines):
+            if selection and line in selection:
+                # Might be improved, but cannot find how to get fill working => replaced with lined box
+                ap_stream += (
+                    f"1 {y_offset - (line_number * font_size * leading_factor) - 1} "
+                    f"{rectangle.width - 2} {font_size + 2} re\n"
+                    f"0.5 0.5 0.5 rg s\n{default_appearance}\n"
+                ).encode()
+
+            # Calculate the desired absolute starting X for the current line
+            desired_abs_x_start: float = 0
+            if is_comb and max_length:
+                # Calculate the width of a cell for one character
+                cell_width = rectangle.width / max_length
+                # Space from the left edge of the cell to the character's baseline start
+                # line_width here is the *actual* character width in points for the single character 'line'
+                centering_offset_in_cell = (cell_width - line_width) / 2
+                # Absolute start X = (Cell Index, i.e., line_number * Cell Width) + Centering Offset
+                desired_abs_x_start = (line_number * cell_width) + centering_offset_in_cell
+            elif alignment == TextAlignment.RIGHT:
+                desired_abs_x_start = rectangle.width - margin * 2 - line_width
+            elif alignment == TextAlignment.CENTER:
+                desired_abs_x_start = (rectangle.width - line_width) / 2
+            else:  # Left aligned; default
+                desired_abs_x_start = margin * 2
+            # Calculate x_rel_offset: how much to move from the current_x_pos
+            # to reach the desired_abs_x_start.
+            x_rel_offset = desired_abs_x_start - current_x_pos
+
+            # Y-offset:
+            y_rel_offset: float = 0
+            if line_number == 0:
+                y_rel_offset = y_offset  # Initial vertical position
+            elif is_comb:
+                y_rel_offset = 0.0  # DO NOT move vertically for subsequent characters
+            else:
+                y_rel_offset = - font_size * leading_factor  # Move down by line height
+
+            # Td is a relative translation (Tx and Ty).
+            # It updates the current text position.
+            ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode()
+            # Update current_x_pos based on the Td operation for the next iteration.
+            # This is the X position where the *current line* will start.
+            current_x_pos = desired_abs_x_start
+
+            encoded_line: list[bytes] = [
+                font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line
+            ]
+            if any(len(c) >= 2 for c in encoded_line):
+                ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"
+            else:
+                ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"
+        ap_stream += b"ET\nQ\nEMC\nQ\n"
+        return ap_stream
+
+    def __init__(
+        self,
+        layout: Optional[BaseStreamConfig] = None,
+        text: str = "",
+        selection: Optional[list[str]] = None,
+        font_resource: Optional[DictionaryObject] = None,
+        font_name: str = "/Helv",
+        font_size: float = 0.0,
+        font_color: str = "0 g",
+        is_multiline: bool = False,
+        alignment: TextAlignment = TextAlignment.LEFT,
+        is_comb: bool = False,
+        max_length: Optional[int] = None
+    ) -> None:
+        """
+        Initializes a TextStreamAppearance object.
+
+        This constructor creates a new PDF stream object configured as an XObject
+        of subtype Form. It uses the `_appearance_stream_data` method to generate
+        the content for the stream.
+
+        Args:
+            layout: The basic layout parameters.
+            text: The text to be rendered in the form field.
+            selection: An optional list of strings that should be highlighted as selected.
+            font_resource: An optional variable that represents a PDF font dictionary.
+            font_name: The name of the font resource, e.g., "/Helv".
+            font_size: The font size. If 0, it's auto-calculated.
+            font_color: The font color string.
+            is_multiline: A boolean indicating if the text field is multiline.
+            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
+            is_comb: Boolean that designates fixed-length fields, where every character
+                fills one "cell", such as in a postcode.
+            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
+                length field.
+
+        """
+        super().__init__(layout)
+
+        # If a font resource was added, get the font character map
+        if font_resource:
+            font_resource = cast(DictionaryObject, font_resource.get_object())
+            font = Font.from_font_resource(font_resource)
+        else:
+            logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__)
+            font_name = "/Helv"
+            font_resource = DictionaryObject({
+                NameObject("/Subtype"): NameObject("/Type1"),
+                NameObject("/Name"): NameObject("/Helv"),
+                NameObject("/Type"): NameObject("/Font"),
+                NameObject("/BaseFont"): NameObject("/Helvetica"),
+                NameObject("/Encoding"): NameObject("/WinAnsiEncoding")
+            })
+            font_descriptor = CORE_FONT_METRICS["Helvetica"]
+            font_descriptor.character_widths["default"] = 2 * font_descriptor.character_widths[" "]
+            font = Font(
+                name="Helvetica",
+                character_map={},
+                encoding=dict(zip(range(256), fill_from_encoding("cp1252"))),  # WinAnsiEncoding
+                sub_type="Type1",
+                font_descriptor = font_descriptor,
+                character_widths = font_descriptor.character_widths
+            )
+
+        font_glyph_byte_map: dict[str, bytes]
+        if isinstance(font.encoding, str):
+            font_glyph_byte_map = {
+                v: k.encode(font.encoding) for k, v in font.character_map.items()
+            }
+        else:
+            font_glyph_byte_map = {v: bytes((k,)) for k, v in font.encoding.items()}
+            font_encoding_rev = {v: bytes((k,)) for k, v in font.encoding.items()}
+            for key, value in font.character_map.items():
+                font_glyph_byte_map[value] = font_encoding_rev.get(key, key)
+
+        ap_stream_data = self._generate_appearance_stream_data(
+            text,
+            selection,
+            font,
+            font_glyph_byte_map,
+            font_name=font_name,
+            font_size=font_size,
+            font_color=font_color,
+            is_multiline=is_multiline,
+            alignment=alignment,
+            is_comb=is_comb,
+            max_length=max_length
+        )
+
+        self.set_data(ByteStringObject(ap_stream_data))
+        self[NameObject("/Length")] = NumberObject(len(ap_stream_data))
+        # Update Resources with font information
+        self[NameObject("/Resources")] = DictionaryObject({
+            NameObject("/Font"): DictionaryObject({
+                NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)
+            })
+        })
+
+    @classmethod
+    def from_text_annotation(
+        cls,
+        acro_form: DictionaryObject,  # _root_object[CatalogDictionary.ACRO_FORM])
+        field: DictionaryObject,
+        annotation: DictionaryObject,
+        user_font_name: str = "",
+        user_font_size: float = -1,
+    ) -> "TextStreamAppearance":
+        """
+        Creates a TextStreamAppearance object from a text field annotation.
+
+        This class method is a factory for creating a `TextStreamAppearance`
+        instance by extracting all necessary information (bounding box, font,
+        text content, etc.) from the PDF field and annotation dictionaries.
+        It respects inheritance for properties like default appearance (`/DA`).
+
+        Args:
+            acro_form: The root AcroForm dictionary from the PDF catalog.
+            field: The field dictionary object.
+            annotation: The widget annotation dictionary object associated with the field.
+            user_font_name: An optional user-provided font name to override the
+                default. Defaults to an empty string.
+            user_font_size: An optional user-provided font size to override the
+                default. A value of -1 indicates no override.
+
+        Returns:
+            A new `TextStreamAppearance` instance configured for the given field.
+
+        """
+        # Calculate rectangle dimensions
+        _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])
+        rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))
+
+        # Get default appearance dictionary from annotation
+        default_appearance = annotation.get_inherited(
+            AnnotationDictionaryAttributes.DA,
+            acro_form.get(AnnotationDictionaryAttributes.DA, None),
+        )
+        if not default_appearance:
+            # Create a default appearance if none was found in the annotation
+            default_appearance = TextStringObject("/Helv 0 Tf 0 g")
+        else:
+            default_appearance = default_appearance.get_object()
+
+        # Derive font name, size and color from the default appearance. Also set
+        # user-provided font name and font size in the default appearance, if given.
+        # For a font name, this presumes that we can find an associated font resource
+        # dictionary. Uses the variable font_properties as an intermediate.
+        # As per the PDF spec:
+        # "At a minimum, the string [that is, default_appearance] shall include a Tf (text
+        # font) operator along with its two operands, font and size" (Section 12.7.4.3
+        # "Variable text" of the PDF 2.0 specification).
+        font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]
+        font_name = font_properties.pop(font_properties.index("Tf") - 2)
+        font_size = float(font_properties.pop(font_properties.index("Tf") - 1))
+        font_properties.remove("Tf")
+        font_color = " ".join(font_properties)
+        # Determine the font name to use, prioritizing the user's input
+        if user_font_name:
+            font_name = user_font_name
+        # Determine the font size to use, prioritizing the user's input
+        if user_font_size > 0:
+            font_size = user_font_size
+
+        # Try to find a resource dictionary for the font
+        document_resources: Any = cast(
+            DictionaryObject,
+            cast(
+                DictionaryObject,
+                annotation.get_inherited(
+                    "/DR",
+                    acro_form.get("/DR", DictionaryObject()),
+                ),
+            ).get_object(),
+        )
+        document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object()
+        # CORE_FONT_METRICS is the dict with Standard font metrics
+        if font_name not in document_font_resources and font_name.removeprefix("/") not in CORE_FONT_METRICS:
+            # ...or AcroForm dictionary
+            document_resources = cast(
+                dict[Any, Any],
+                acro_form.get("/DR", {}),
+            )
+            document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object()
+        font_resource = document_font_resources.get(font_name, None)
+        if not is_null_or_none(font_resource):
+            font_resource = cast(DictionaryObject, font_resource.get_object())
+
+        # Retrieve field text and selected values
+        field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
+        if (
+                field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
+                field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
+        ):
+            text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))
+            selection = field.get("/V", [])
+            if not isinstance(selection, list):
+                selection = [selection]
+        else:  # /Tx
+            text = field.get("/V", "")
+            selection = []
+
+        # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
+        text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
+
+        # Retrieve formatting information
+        is_comb = False
+        max_length = None
+        if field_flags & FieldDictionaryAttributes.FfBits.Comb:
+            is_comb = True
+            max_length = annotation.get("/MaxLen")
+        is_multiline = False
+        if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
+            is_multiline = True
+        alignment = field.get("/Q", TextAlignment.LEFT)
+        border_width = 1
+        border_style = BorderStyles.SOLID
+        if "/BS" in field:
+            border_width = cast(DictionaryObject, field["/BS"]).get("/W", border_width)
+            border_style = cast(DictionaryObject, field["/BS"]).get("/S", border_style)
+
+        # Create the TextStreamAppearance instance
+        layout = BaseStreamConfig(rectangle=rectangle, border_width=border_width, border_style=border_style)
+        new_appearance_stream = cls(
+            layout,
+            text,
+            selection,
+            font_resource,
+            font_name=font_name,
+            font_size=font_size,
+            font_color=font_color,
+            is_multiline=is_multiline,
+            alignment=alignment,
+            is_comb=is_comb,
+            max_length=max_length
+        )
+        if AnnotationDictionaryAttributes.AP in annotation:
+            for key, value in (
+                cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()
+            ):
+                if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
+                    new_appearance_stream[key] = value
+
+        return new_appearance_stream
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_base.py
@@ -0,0 +1,937 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+import binascii
+import codecs
+import hashlib
+import re
+import sys
+from binascii import unhexlify
+from collections.abc import Sequence
+from math import log10
+from struct import iter_unpack
+from typing import Any, Callable, ClassVar, Optional, Union, cast
+
+if sys.version_info[:2] >= (3, 10):
+    from typing import TypeGuard
+else:
+    from typing_extensions import TypeGuard  # PEP 647
+
+from .._codecs import _pdfdoc_encoding_rev
+from .._protocols import PdfObjectProtocol, PdfWriterProtocol
+from .._utils import (
+    StreamType,
+    classproperty,
+    deprecation_no_replacement,
+    deprecation_with_replacement,
+    logger_warning,
+    read_non_whitespace,
+    read_until_regex,
+)
+from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
+
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+
+class PdfObject(PdfObjectProtocol):
+    # function for calculating a hash value
+    hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
+    indirect_reference: Optional["IndirectObject"]
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement .hash_bin() so far"
+        )
+
+    def hash_value_data(self) -> bytes:
+        return f"{self}".encode()
+
+    def hash_value(self) -> bytes:
+        return (
+            f"{self.__class__.__name__}:"
+            f"{self.hash_func(self.hash_value_data()).hexdigest()}"
+        ).encode()
+
+    def replicate(
+        self,
+        pdf_dest: PdfWriterProtocol,
+    ) -> "PdfObject":
+        """
+        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
+        without ensuring links. This is used in clone_document_from_root with incremental = True.
+
+        Args:
+          pdf_dest: Target to clone to.
+
+        Returns:
+          The cloned PdfObject
+
+        """
+        return self.clone(pdf_dest)
+
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "PdfObject":
+        """
+        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).
+
+        By default, this method will call ``_reference_clone`` (see ``_reference``).
+
+
+        Args:
+          pdf_dest: Target to clone to.
+          force_duplicate: By default, if the object has already been cloned and referenced,
+            the copy will be returned; when ``True``, a new copy will be created.
+            (Default value = ``False``)
+          ignore_fields: List/tuple of field names (for dictionaries) that will be ignored
+            during cloning (applies to children duplication as well). If fields are to be
+            considered for a limited number of levels, you have to add it as integer, for
+            example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first
+            level only but ``"/TOTO"`` on all levels.
+
+        Returns:
+          The cloned PdfObject
+
+        """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement .clone so far"
+        )
+
+    def _reference_clone(
+        self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False
+    ) -> PdfObjectProtocol:
+        """
+        Reference the object within the _objects of pdf_dest only if
+        indirect_reference attribute exists (which means the objects was
+        already identified in xref/xobjstm) if object has been already
+        referenced do nothing.
+
+        Args:
+          clone:
+          pdf_dest:
+
+        Returns:
+          The clone
+
+        """
+        try:
+            if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:
+                return clone
+        except Exception:
+            pass
+        # if hasattr(clone, "indirect_reference"):
+        try:
+            ind = self.indirect_reference
+        except AttributeError:
+            return clone
+        if (
+            pdf_dest.incremental
+            and ind is not None
+            and ind.pdf == pdf_dest._reader
+            and ind.idnum <= len(pdf_dest._objects)
+        ):
+            i = ind.idnum
+        else:
+            i = len(pdf_dest._objects) + 1
+        if ind is not None:
+            if id(ind.pdf) not in pdf_dest._id_translated:
+                pdf_dest._id_translated[id(ind.pdf)] = {}
+                pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf  # type: ignore[index]
+            if (
+                not force_duplicate
+                and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]
+            ):
+                obj = pdf_dest.get_object(
+                    pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
+                )
+                assert obj is not None
+                return obj
+            pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
+        try:
+            pdf_dest._objects[i - 1] = clone
+        except IndexError:
+            pdf_dest._objects.append(clone)
+            i = len(pdf_dest._objects)
+        clone.indirect_reference = IndirectObject(i, 0, pdf_dest)
+        return clone
+
+    def get_object(self) -> Optional["PdfObject"]:
+        """Resolve indirect references."""
+        return self
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        raise NotImplementedError
+
+
+class NullObject(PdfObject):
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "NullObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__,))
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(b"null")
+
+    @staticmethod
+    def read_from_stream(stream: StreamType) -> "NullObject":
+        nulltxt = stream.read(4)
+        if nulltxt != b"null":
+            raise PdfReadError("Could not read Null object")
+        return NullObject()
+
+    def __repr__(self) -> str:
+        return "NullObject"
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, NullObject)
+
+    def __hash__(self) -> int:
+        return self.hash_bin()
+
+
+class BooleanObject(PdfObject):
+    def __init__(self, value: Any) -> None:
+        self.value = value
+
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "BooleanObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "BooleanObject",
+            self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.value))
+
+    def __eq__(self, o: object, /) -> bool:
+        if isinstance(o, BooleanObject):
+            return self.value == o.value
+        if isinstance(o, bool):
+            return self.value == o
+        return False
+
+    def __hash__(self) -> int:
+        return self.hash_bin()
+
+    def __repr__(self) -> str:
+        return "True" if self.value else "False"
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        if self.value:
+            stream.write(b"true")
+        else:
+            stream.write(b"false")
+
+    @staticmethod
+    def read_from_stream(stream: StreamType) -> "BooleanObject":
+        word = stream.read(4)
+        if word == b"true":
+            return BooleanObject(True)
+        if word == b"fals":
+            stream.read(1)
+            return BooleanObject(False)
+        raise PdfReadError("Could not read Boolean object")
+
+
+class IndirectObject(PdfObject):
+    def __init__(self, idnum: int, generation: int, pdf: Any) -> None:  # PdfReader
+        self.idnum = idnum
+        self.generation = generation
+        self.pdf = pdf
+
+    def __hash__(self) -> int:
+        return hash((self.idnum, self.generation, id(self.pdf)))
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))
+
+    def replicate(
+        self,
+        pdf_dest: PdfWriterProtocol,
+    ) -> "PdfObject":
+        return IndirectObject(self.idnum, self.generation, pdf_dest)
+
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "IndirectObject":
+        """Clone object into pdf_dest."""
+        if self.pdf == pdf_dest and not force_duplicate:
+            # Already duplicated and no extra duplication required
+            return self
+        if id(self.pdf) not in pdf_dest._id_translated:
+            pdf_dest._id_translated[id(self.pdf)] = {}
+            pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf  # type: ignore[index]
+
+        if self.idnum in pdf_dest._id_translated[id(self.pdf)]:
+            dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
+            if force_duplicate:
+                assert dup is not None
+                assert dup.indirect_reference is not None
+                idref = dup.indirect_reference
+                return IndirectObject(idref.idnum, idref.generation, idref.pdf)
+        else:
+            obj = self.get_object()
+            # case observed : a pointed object can not be found
+            if obj is None:
+                # this normally
+                obj = NullObject()
+                assert isinstance(self, (IndirectObject,))
+                obj.indirect_reference = self
+            dup = pdf_dest._add_object(
+                obj.clone(pdf_dest, force_duplicate, ignore_fields)
+            )
+        assert dup is not None, "mypy"
+        assert dup.indirect_reference is not None, "mypy"
+        return dup.indirect_reference
+
+    @property
+    def indirect_reference(self) -> "IndirectObject":  # type: ignore[override]
+        return self
+
+    def get_object(self) -> Optional["PdfObject"]:
+        return self.pdf.get_object(self)
+
+    def __deepcopy__(self, memo: Any) -> "IndirectObject":
+        return IndirectObject(self.idnum, self.generation, self.pdf)
+
+    def _get_object_with_check(self) -> Optional["PdfObject"]:
+        o = self.get_object()
+        # the check is done here to not slow down get_object()
+        if isinstance(o, IndirectObject):
+            raise PdfStreamError(
+                f"{self.__repr__()} references an IndirectObject {o.__repr__()}"
+            )
+        return o
+
+    def __getattr__(self, name: str) -> Any:
+        # Attribute not found in object: look in pointed object
+        try:
+            return getattr(self._get_object_with_check(), name)
+        except AttributeError:
+            raise AttributeError(
+                f"No attribute {name} found in IndirectObject or pointed object"
+            )
+
+    def __getitem__(self, key: Any) -> Any:
+        # items should be extracted from pointed Object
+        return self._get_object_with_check()[key]  # type: ignore
+
+    def __contains__(self, key: Any) -> bool:
+        return key in self._get_object_with_check()  # type: ignore
+
+    def __iter__(self) -> Any:
+        return self._get_object_with_check().__iter__()  # type: ignore
+
+    def __float__(self) -> str:
+        # in this case we are looking for the pointed data
+        return self.get_object().__float__()  # type: ignore
+
+    def __int__(self) -> int:
+        # in this case we are looking for the pointed data
+        return self.get_object().__int__()  # type: ignore
+
+    def __str__(self) -> str:
+        # in this case we are looking for the pointed data
+        return self.get_object().__str__()
+
+    def __repr__(self) -> str:
+        return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
+
+    def __eq__(self, other: object) -> bool:
+        return (
+            other is not None
+            and isinstance(other, IndirectObject)
+            and self.idnum == other.idnum
+            and self.generation == other.generation
+            and self.pdf is other.pdf
+        )
+
+    def __ne__(self, other: object) -> bool:
+        return not self.__eq__(other)
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(f"{self.idnum} {self.generation} R".encode())
+
+    @staticmethod
+    def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject":  # PdfReader
+        idnum = b""
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+            if tok.isspace():
+                break
+            idnum += tok
+        generation = b""
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+            if tok.isspace():
+                if not generation:
+                    continue
+                break
+            generation += tok
+        r = read_non_whitespace(stream)
+        if r != b"R":
+            raise PdfReadError(
+                f"Error reading indirect object reference at byte {hex(stream.tell())}"
+            )
+        return IndirectObject(int(idnum), int(generation), pdf)
+
+
+FLOAT_WRITE_PRECISION = 8  # shall be min 5 digits max, allow user adj
+
+
+class FloatObject(float, PdfObject):
+    def __new__(
+        cls, value: Any = "0.0", context: Optional[Any] = None
+    ) -> "FloatObject":
+        try:
+            value = float(value)
+            return float.__new__(cls, value)
+        except Exception as e:
+            # If this isn't a valid decimal (happens in malformed PDFs)
+            # fallback to 0
+            logger_warning(
+                f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__
+            )
+            return float.__new__(cls, 0.0)
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "FloatObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "FloatObject",
+            self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.as_numeric))
+
+    def myrepr(self) -> str:
+        if self == 0:
+            return "0.0"
+        nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))
+        return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")
+
+    def __repr__(self) -> str:
+        return self.myrepr()  # repr(float(self))
+
+    def as_numeric(self) -> float:
+        return float(self)
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(self.myrepr().encode("utf8"))
+
+
+class NumberObject(int, PdfObject):
+    NumberPattern = re.compile(b"[^+-.0-9]")
+
+    def __new__(cls, value: Any) -> "NumberObject":
+        try:
+            return int.__new__(cls, int(value))
+        except ValueError:
+            logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)
+            return int.__new__(cls, 0)
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "NumberObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "NumberObject",
+            self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.as_numeric()))
+
+    def as_numeric(self) -> int:
+        return int(repr(self).encode("utf8"))
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(repr(self).encode("utf8"))
+
+    @staticmethod
+    def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
+        num = read_until_regex(stream, NumberObject.NumberPattern)
+        if b"." in num:
+            return FloatObject(num)
+        return NumberObject(num)
+
+
+class ByteStringObject(bytes, PdfObject):
+    """
+    Represents a string object where the text encoding could not be determined.
+
+    This occurs quite often, as the PDF spec doesn't provide an alternate way to
+    represent strings -- for example, the encryption data stored in files (like
+    /O) is clearly not text, but is still stored in a "String" object.
+    """
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "ByteStringObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "ByteStringObject",
+            self._reference_clone(
+                ByteStringObject(bytes(self)), pdf_dest, force_duplicate
+            ),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, bytes(self)))
+
+    @property
+    def original_bytes(self) -> bytes:
+        """For compatibility with TextStringObject.original_bytes."""
+        return self
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(b"<")
+        stream.write(binascii.hexlify(self))
+        stream.write(b">")
+
+    def __str__(self) -> str:
+        charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
+        for enc in charset_to_try:
+            try:
+                return self.decode(enc)
+            except UnicodeDecodeError:
+                pass
+        raise PdfReadError("Cannot decode ByteStringObject.")
+
+
+class TextStringObject(str, PdfObject):  # noqa: SLOT000
+    """
+    A string object that has been decoded into a real unicode string.
+
+    If read from a PDF document, this string appeared to match the
+    PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding
+    to occur.
+    """
+
+    autodetect_pdfdocencoding: bool
+    autodetect_utf16: bool
+    utf16_bom: bytes
+    _original_bytes: Optional[bytes] = None
+
+    def __new__(cls, value: Any) -> "TextStringObject":
+        original_bytes = None
+        if isinstance(value, bytes):
+            original_bytes = value
+            value = value.decode("charmap")
+        text_string_object = str.__new__(cls, value)
+        text_string_object._original_bytes = original_bytes
+        text_string_object.autodetect_utf16 = False
+        text_string_object.autodetect_pdfdocencoding = False
+        text_string_object.utf16_bom = b""
+        if original_bytes is not None and original_bytes[:2] in {codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE}:
+            # The value of `original_bytes` is only set for inputs being `bytes`.
+            # If this is UTF-16 data according to the BOM (first two characters),
+            # perform special handling. All other cases should not need any special conversion
+            # due to already being a string.
+            try:
+                text_string_object = str.__new__(cls, original_bytes.decode("utf-16"))
+            except UnicodeDecodeError as exception:
+                logger_warning(
+                    f"{exception!s}\ninitial string:{exception.object!r}",
+                    __name__,
+                )
+                text_string_object = str.__new__(cls, exception.object[: exception.start].decode("utf-16"))
+            text_string_object._original_bytes = original_bytes
+            text_string_object.autodetect_utf16 = True
+            text_string_object.utf16_bom = original_bytes[:2]
+        else:
+            try:
+                encode_pdfdocencoding(text_string_object)
+                text_string_object.autodetect_pdfdocencoding = True
+            except UnicodeEncodeError:
+                text_string_object.autodetect_utf16 = True
+                text_string_object.utf16_bom = codecs.BOM_UTF16_BE
+        return text_string_object
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "TextStringObject":
+        """Clone object into pdf_dest."""
+        obj = TextStringObject(self)
+        obj._original_bytes = self._original_bytes
+        obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding
+        obj.autodetect_utf16 = self.autodetect_utf16
+        obj.utf16_bom = self.utf16_bom
+        return cast(
+            "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.original_bytes))
+
+    @property
+    def original_bytes(self) -> bytes:
+        """
+        It is occasionally possible that a text string object gets created where
+        a byte string object was expected due to the autodetection mechanism --
+        if that occurs, this "original_bytes" property can be used to
+        back-calculate what the original encoded bytes were.
+        """
+        if self._original_bytes is not None:
+            return self._original_bytes
+        return self.get_original_bytes()
+
+    def get_original_bytes(self) -> bytes:
+        # We're a text string object, but the library is trying to get our raw
+        # bytes. This can happen if we auto-detected this string as text, but
+        # we were wrong. It's pretty common. Return the original bytes that
+        # would have been used to create this object, based upon the autodetect
+        # method.
+        if self.autodetect_utf16:
+            if self.utf16_bom == codecs.BOM_UTF16_LE:
+                return codecs.BOM_UTF16_LE + self.encode("utf-16le")
+            if self.utf16_bom == codecs.BOM_UTF16_BE:
+                return codecs.BOM_UTF16_BE + self.encode("utf-16be")
+            return self.encode("utf-16be")
+        if self.autodetect_pdfdocencoding:
+            return encode_pdfdocencoding(self)
+        raise Exception("no information about original bytes")  # pragma: no cover
+
+    def get_encoded_bytes(self) -> bytes:
+        # Try to write the string out as a PDFDocEncoding encoded string. It's
+        # nicer to look at in the PDF file. Sadly, we take a performance hit
+        # here for trying...
+        try:
+            if self._original_bytes is not None:
+                return self._original_bytes
+            if self.autodetect_utf16:
+                raise UnicodeEncodeError("", "forced", -1, -1, "")
+            bytearr = encode_pdfdocencoding(self)
+        except UnicodeEncodeError:
+            if self.utf16_bom == codecs.BOM_UTF16_LE:
+                bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")
+            elif self.utf16_bom == codecs.BOM_UTF16_BE:
+                bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
+            else:
+                bytearr = self.encode("utf-16be")
+        return bytearr
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        bytearr = self.get_encoded_bytes()
+        stream.write(b"(")
+        for c_ in iter_unpack("c", bytearr):
+            c = cast(bytes, c_[0])
+            if not c.isalnum() and c != b" ":
+                # This:
+                #   stream.write(rf"\{c:0>3o}".encode())
+                # gives
+                #   https://github.com/davidhalter/parso/issues/207
+                stream.write(b"\\%03o" % ord(c))
+            else:
+                stream.write(c)
+        stream.write(b")")
+
+
+class NameObject(str, PdfObject):  # noqa: SLOT000
+    delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
+    prefix = b"/"
+    renumber_table: ClassVar[dict[str, bytes]] = {
+        **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},
+        **{chr(i): f"#{i:02X}".encode() for i in range(33)},
+    }
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "NameObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "NameObject",
+            self._reference_clone(NameObject(self), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self))
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(self.renumber())
+
+    def renumber(self) -> bytes:
+        out = self[0].encode("utf-8")
+        if out != b"/":
+            deprecation_no_replacement(
+                f"Incorrect first char in NameObject, should start with '/': ({self})",
+                "5.0.0",
+            )
+        for c in self[1:]:
+            if c > "~":
+                for x in c.encode("utf-8"):
+                    out += f"#{x:02X}".encode()
+            else:
+                try:
+                    out += self.renumber_table[c]
+                except KeyError:
+                    out += c.encode("utf-8")
+        return out
+
+    def _sanitize(self) -> "NameObject":
+        """
+        Sanitize the NameObject's name to be a valid PDF name part
+        (alphanumeric, underscore, hyphen). The _sanitize method replaces
+        spaces and any non-alphanumeric/non-underscore/non-hyphen with
+        underscores.
+
+        Returns:
+            NameObject with sanitized name.
+        """
+        name = str(self).removeprefix("/")
+        name = re.sub(r"\ ", "_", name)
+        name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)
+        return NameObject("/" + name)
+
+    @classproperty
+    def surfix(cls) -> bytes:  # noqa: N805
+        deprecation_with_replacement("surfix", "prefix", "5.0.0")
+        return b"/"
+
+    @staticmethod
+    def unnumber(sin: bytes) -> bytes:
+        i = sin.find(b"#", 0)
+        while i >= 0:
+            try:
+                sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]
+                i = sin.find(b"#", i + 1)
+            except ValueError:
+                # if the 2 characters after # can not be converted to hex
+                # we change nothing and carry on
+                i = i + 1
+        return sin
+
+    CHARSETS = ("utf-8", "gbk", "latin1")
+
+    @staticmethod
+    def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject":  # PdfReader
+        name = stream.read(1)
+        if name != NameObject.prefix:
+            raise PdfReadError("Name read error")
+        name += read_until_regex(stream, NameObject.delimiter_pattern)
+        try:
+            # Name objects should represent irregular characters
+            # with a '#' followed by the symbol's hex number
+            name = NameObject.unnumber(name)
+            for enc in NameObject.CHARSETS:
+                try:
+                    ret = name.decode(enc)
+                    return NameObject(ret)
+                except Exception:
+                    pass
+            raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")
+        except (UnicodeEncodeError, UnicodeDecodeError) as e:
+            if not pdf.strict:
+                logger_warning(
+                    f"Illegal character in NameObject ({name!r}), "
+                    "you may need to adjust NameObject.CHARSETS",
+                    __name__,
+                )
+                return NameObject(name.decode("charmap"))
+            raise PdfReadError(
+                f"Illegal character in NameObject ({name!r}). "
+                "You may need to adjust NameObject.CHARSETS.",
+            ) from e
+
+
+def encode_pdfdocencoding(unicode_string: str) -> bytes:
+    try:
+        return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])
+    except KeyError:
+        raise UnicodeEncodeError(
+            "pdfdocencoding",
+            unicode_string,
+            -1,
+            -1,
+            "does not exist in translation table",
+        )
+
+
+def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:
+    """
+    Returns:
+        True if x is None or NullObject.
+
+    """
+    return x is None or (
+        isinstance(x, PdfObject)
+        and (x.get_object() is None or isinstance(x.get_object(), NullObject))
+    )
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_data_structures.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_data_structures.py
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_files.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_files.py
@@ -0,0 +1,401 @@
+from __future__ import annotations
+
+import bisect
+from functools import cached_property
+from typing import TYPE_CHECKING, cast
+
+from pypdf._utils import format_iso8824_date, parse_iso8824_date
+from pypdf.constants import CatalogAttributes as CA
+from pypdf.constants import FileSpecificationDictionaryEntries
+from pypdf.constants import PageAttributes as PG
+from pypdf.errors import PdfReadError, PyPdfError
+from pypdf.generic import (
+    ArrayObject,
+    ByteStringObject,
+    DecodedStreamObject,
+    DictionaryObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    StreamObject,
+    TextStringObject,
+    is_null_or_none,
+)
+
+if TYPE_CHECKING:
+    import datetime
+    from collections.abc import Generator
+
+    from pypdf._writer import PdfWriter
+
+
+class EmbeddedFile:
+    """
+    Container holding the information on an embedded file.
+
+    Attributes are evaluated lazily if possible.
+
+    Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
+    """
+    def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
+        """
+        Args:
+            name: The (primary) name as provided in the name tree.
+            pdf_object: The corresponding PDF object to allow retrieving further data.
+            parent: The parent list.
+        """
+        self._name = name
+        self.pdf_object = pdf_object
+        self._parent = parent
+
+    @property
+    def name(self) -> str:
+        """The (primary) name of the embedded file as provided in the name tree."""
+        return self._name
+
+    @classmethod
+    def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> EmbeddedFile:
+        """
+        Create a new embedded file and add it to the PdfWriter.
+
+        Args:
+            writer: The PdfWriter instance to add the embedded file to.
+            name: The filename to display.
+            content: The data in the file.
+
+        Returns:
+            EmbeddedFile instance for the newly created embedded file.
+        """
+        # Convert string content to bytes if needed
+        if isinstance(content, str):
+            content = content.encode("latin-1")
+
+        # Create the file entry (the actual embedded file stream)
+        file_entry = DecodedStreamObject()
+        file_entry.set_data(content)
+        file_entry.update({NameObject(PG.TYPE): NameObject("/EmbeddedFile")})
+
+        # Create the /EF entry
+        ef_entry = DictionaryObject()
+        ef_entry.update({NameObject("/F"): writer._add_object(file_entry)})
+
+        # Create the filespec dictionary
+        from pypdf.generic import create_string_object  # noqa: PLC0415
+        filespec = DictionaryObject()
+        filespec_reference = writer._add_object(filespec)
+        name_object = cast(TextStringObject, create_string_object(name))
+        filespec.update(
+            {
+                NameObject(PG.TYPE): NameObject("/Filespec"),
+                NameObject(FileSpecificationDictionaryEntries.F): name_object,
+                NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
+            }
+        )
+
+        # Add the name and filespec to the names array.
+        # We use the inverse order for insertion, as this allows us to re-use the
+        # same index.
+        names_array = cls._get_names_array(writer)
+        insertion_index = cls._get_insertion_index(names_array, name_object)
+        names_array.insert(insertion_index, filespec_reference)
+        names_array.insert(insertion_index, name_object)
+
+        # Return an EmbeddedFile instance
+        return cls(name=name, pdf_object=filespec, parent=names_array)
+
+    @classmethod
+    def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
+        """Get the names array for embedded files, possibly creating and flattening it."""
+        if CA.NAMES not in writer.root_object:
+            # Add the /Names entry to the catalog.
+            writer.root_object[NameObject(CA.NAMES)] = writer._add_object(DictionaryObject())
+
+        names_dict = cast(DictionaryObject, writer.root_object[CA.NAMES])
+        if "/EmbeddedFiles" not in names_dict:
+            # We do not yet have an entry for embedded files. Create and return it.
+            names = ArrayObject()
+            embedded_files_names_dictionary = DictionaryObject(
+                {NameObject(CA.NAMES): names}
+            )
+            names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
+            return names
+
+        # We have an existing embedded files entry.
+        embedded_files_names_tree = cast(DictionaryObject, names_dict["/EmbeddedFiles"])
+        if "/Names" in embedded_files_names_tree:
+            # Simple case: We already have a flat list.
+            return cast(ArrayObject, embedded_files_names_tree[NameObject(CA.NAMES)])
+        if "/Kids" not in embedded_files_names_tree:
+            # Invalid case: This is no name tree.
+            raise PdfReadError("Got neither Names nor Kids in embedded files tree.")
+
+        # Complex case: Convert a /Kids-based name tree to a /Names-based one.
+        # /Name-based ones are much easier to handle and allow us to simplify the
+        # actual insertion logic by only having to consider one case.
+        names = ArrayObject()
+        kids = cast(ArrayObject, embedded_files_names_tree["/Kids"].get_object())
+        embedded_files_names_dictionary = DictionaryObject(
+            {NameObject(CA.NAMES): names}
+        )
+        names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
+        for kid in kids:
+            # Write the flattened file entries. As we do not change the actual files,
+            # this should not have any impact on references to them.
+            # There might be further (nested) kids here.
+            # Wait for an example before evaluating an implementation.
+            for name in kid.get_object().get("/Names", []):
+                names.append(name)
+        return names
+
+    @classmethod
+    def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
+        keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
+        name_bytes = name.encode("utf-8")
+
+        start = bisect.bisect_left(keys, name_bytes)
+        end = bisect.bisect_right(keys, name_bytes)
+
+        if start != end:
+            return end * 2
+        if start == 0:
+            return 0
+        if start == (key_count := len(keys)):
+            return key_count * 2
+        return end * 2
+
+    @property
+    def alternative_name(self) -> str | None:
+        """Retrieve the alternative name (file specification)."""
+        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
+            # PDF 2.0 reference, table 43:
+            #   > A PDF reader shall use the value of the UF key, when present, instead of the F key.
+            if key in self.pdf_object:
+                value = self.pdf_object[key].get_object()
+                if not is_null_or_none(value):
+                    return cast(str, value)
+        return None
+
+    @alternative_name.setter
+    def alternative_name(self, value: TextStringObject | None) -> None:
+        """Set the alternative name (file specification)."""
+        if value is None:
+            if FileSpecificationDictionaryEntries.UF in self.pdf_object:
+                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = NullObject()
+            if FileSpecificationDictionaryEntries.F in self.pdf_object:
+                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = NullObject()
+        else:
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = value
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = value
+
+    @property
+    def description(self) -> str | None:
+        """Retrieve the description."""
+        value = self.pdf_object.get(FileSpecificationDictionaryEntries.DESC)
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @description.setter
+    def description(self, value: TextStringObject | None) -> None:
+        """Set the description."""
+        if value is None:
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = NullObject()
+        else:
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = value
+
+    @property
+    def associated_file_relationship(self) -> str:
+        """Retrieve the relationship of the referring document to this embedded file."""
+        return self.pdf_object.get("/AFRelationship", "/Unspecified")
+
+    @associated_file_relationship.setter
+    def associated_file_relationship(self, value: NameObject) -> None:
+        """Set the relationship of the referring document to this embedded file."""
+        self.pdf_object[NameObject("/AFRelationship")] = value
+
+    @property
+    def _embedded_file(self) -> StreamObject:
+        """Retrieve the actual embedded file stream."""
+        if "/EF" not in self.pdf_object:
+            raise PdfReadError(f"/EF entry not found: {self.pdf_object}")
+        ef = cast(DictionaryObject, self.pdf_object["/EF"])
+        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
+            if key in ef:
+                return cast(StreamObject, ef[key].get_object())
+        raise PdfReadError(f"No /(U)F key found in file dictionary: {ef}")
+
+    @property
+    def _params(self) -> DictionaryObject:
+        """Retrieve the file-specific parameters."""
+        return self._embedded_file.get("/Params", DictionaryObject()).get_object()
+
+    @cached_property
+    def _ensure_params(self) -> DictionaryObject:
+        """Ensure the /Params dictionary exists and return it."""
+        embedded_file = self._embedded_file
+        if "/Params" not in embedded_file:
+            embedded_file[NameObject("/Params")] = DictionaryObject()
+        return cast(DictionaryObject, embedded_file["/Params"])
+
+    @property
+    def subtype(self) -> str | None:
+        """Retrieve the subtype. This is a MIME media type, prefixed by a slash."""
+        value = self._embedded_file.get("/Subtype")
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @subtype.setter
+    def subtype(self, value: NameObject | None) -> None:
+        """Set the subtype. This should be a MIME media type, prefixed by a slash."""
+        embedded_file = self._embedded_file
+        if value is None:
+            embedded_file[NameObject("/Subtype")] = NullObject()
+        else:
+            embedded_file[NameObject("/Subtype")] = value
+
+    @property
+    def content(self) -> bytes:
+        """Retrieve the actual file content."""
+        return self._embedded_file.get_data()
+
+    @content.setter
+    def content(self, value: str | bytes) -> None:
+        """Set the file content."""
+        if isinstance(value, str):
+            value = value.encode("latin-1")
+        self._embedded_file.set_data(value)
+
+    @property
+    def size(self) -> int | None:
+        """Retrieve the size of the uncompressed file in bytes."""
+        value = self._params.get("/Size")
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @size.setter
+    def size(self, value: NumberObject | None) -> None:
+        """Set the size of the uncompressed file in bytes."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/Size")] = NullObject()
+        else:
+            params[NameObject("/Size")] = value
+
+    @property
+    def creation_date(self) -> datetime.datetime | None:
+        """Retrieve the file creation datetime."""
+        return parse_iso8824_date(self._params.get("/CreationDate"))
+
+    @creation_date.setter
+    def creation_date(self, value: datetime.datetime | None) -> None:
+        """Set the file creation datetime."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/CreationDate")] = NullObject()
+        else:
+            date_str = format_iso8824_date(value)
+            params[NameObject("/CreationDate")] = TextStringObject(date_str)
+
+    @property
+    def modification_date(self) -> datetime.datetime | None:
+        """Retrieve the datetime of the last file modification."""
+        return parse_iso8824_date(self._params.get("/ModDate"))
+
+    @modification_date.setter
+    def modification_date(self, value: datetime.datetime | None) -> None:
+        """Set the datetime of the last file modification."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/ModDate")] = NullObject()
+        else:
+            date_str = format_iso8824_date(value)
+            params[NameObject("/ModDate")] = TextStringObject(date_str)
+
+    @property
+    def checksum(self) -> bytes | None:
+        """Retrieve the MD5 checksum of the (uncompressed) file."""
+        value = self._params.get("/CheckSum")
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @checksum.setter
+    def checksum(self, value: ByteStringObject | None) -> None:
+        """Set the MD5 checksum of the (uncompressed) file."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/CheckSum")] = NullObject()
+        else:
+            params[NameObject("/CheckSum")] = value
+
+    def delete(self) -> None:
+        """Delete the file from the document."""
+        if not self._parent:
+            raise PyPdfError("Parent required to delete file from document.")
+        if self.pdf_object in self._parent:
+            index = self._parent.index(self.pdf_object)
+        elif (
+                (indirect_reference := getattr(self.pdf_object, "indirect_reference", None)) is not None
+                and indirect_reference in self._parent
+        ):
+            index = self._parent.index(indirect_reference)
+        else:
+            raise PyPdfError("File not found in parent object.")
+        self._parent.pop(index)  # Reference.
+        self._parent.pop(index - 1)  # Name.
+        self.pdf_object = DictionaryObject()  # Invalidate.
+
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} name={self.name!r}>"
+
+    @classmethod
+    def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
+        """
+        Convert the given name tree into class instances.
+
+        Args:
+            names: The name tree to load the data from.
+
+        Returns:
+            Iterable of class instances for the files found.
+        """
+        # This is a name tree of the format [name_1, reference_1, name_2, reference_2, ...]
+        for i, name in enumerate(names):
+            if not isinstance(name, str):
+                # Skip plain strings and retrieve them as `direct_name` by index.
+                file_dictionary = name.get_object()
+                direct_name = names[i - 1].get_object()
+                yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)
+
+    @classmethod
+    def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
+        """
+        Load the embedded files for the given document catalog.
+
+        This method and its signature are considered internal API and thus not exposed publicly for now.
+
+        Args:
+            catalog: The document catalog to load from.
+
+        Returns:
+            Iterable of class instances for the files found.
+        """
+        try:
+            container = cast(
+                DictionaryObject,
+                cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
+            )
+        except KeyError:
+            return
+
+        if "/Kids" in container:
+            for kid in cast(ArrayObject, container["/Kids"].get_object()):
+                # There might be further (nested) kids here.
+                # Wait for an example before evaluating an implementation.
+                kid = kid.get_object()
+                if "/Names" in kid:
+                    yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
+        if "/Names" in container:
+            yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_fit.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_fit.py
@@ -0,0 +1,174 @@
+from typing import Any, Optional, Union
+
+from ._base import is_null_or_none
+
+
+class Fit:
+    def __init__(
+        self, fit_type: str, fit_args: tuple[Union[None, float, Any], ...] = ()
+    ) -> None:
+        from ._base import FloatObject, NameObject, NullObject, NumberObject  # noqa: PLC0415
+
+        self.fit_type = NameObject(fit_type)
+        self.fit_args: list[Union[NullObject, FloatObject, NumberObject]] = [
+            NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
+        ]
+
+    @classmethod
+    def xyz(
+        cls,
+        left: Optional[float] = None,
+        top: Optional[float] = None,
+        zoom: Optional[float] = None,
+    ) -> "Fit":
+        """
+        Display the page designated by page, with the coordinates (left, top)
+        positioned at the upper-left corner of the window and the contents
+        of the page magnified by the factor zoom.
+
+        A null value for any of the parameters left, top, or zoom specifies
+        that the current value of that parameter is to be retained unchanged.
+
+        A zoom value of 0 has the same meaning as a null value.
+
+        Args:
+            left:
+            top:
+            zoom:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/XYZ", fit_args=(left, top, zoom))
+
+    @classmethod
+    def fit(cls) -> "Fit":
+        """
+        Display the page designated by page, with its contents magnified just
+        enough to fit the entire page within the window both horizontally and
+        vertically.
+
+        If the required horizontal and vertical magnification factors are
+        different, use the smaller of the two, centering the page within the
+        window in the other dimension.
+        """
+        return Fit(fit_type="/Fit")
+
+    @classmethod
+    def fit_horizontally(cls, top: Optional[float] = None) -> "Fit":
+        """
+        Display the page designated by page, with the vertical coordinate top
+        positioned at the top edge of the window and the contents of the page
+        magnified just enough to fit the entire width of the page within the
+        window.
+
+        A null value for ``top`` specifies that the current value of that
+        parameter is to be retained unchanged.
+
+        Args:
+            top:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitH", fit_args=(top,))
+
+    @classmethod
+    def fit_vertically(cls, left: Optional[float] = None) -> "Fit":
+        return Fit(fit_type="/FitV", fit_args=(left,))
+
+    @classmethod
+    def fit_rectangle(
+        cls,
+        left: Optional[float] = None,
+        bottom: Optional[float] = None,
+        right: Optional[float] = None,
+        top: Optional[float] = None,
+    ) -> "Fit":
+        """
+        Display the page designated by page, with its contents magnified
+        just enough to fit the rectangle specified by the coordinates
+        left, bottom, right, and top entirely within the window
+        both horizontally and vertically.
+
+        If the required horizontal and vertical magnification factors are
+        different, use the smaller of the two, centering the rectangle within
+        the window in the other dimension.
+
+        A null value for any of the parameters may result in unpredictable
+        behavior.
+
+        Args:
+            left:
+            bottom:
+            right:
+            top:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top))
+
+    @classmethod
+    def fit_box(cls) -> "Fit":
+        """
+        Display the page designated by page, with its contents magnified just
+        enough to fit its bounding box entirely within the window both
+        horizontally and vertically.
+
+        If the required horizontal and vertical magnification factors are
+        different, use the smaller of the two, centering the bounding box
+        within the window in the other dimension.
+        """
+        return Fit(fit_type="/FitB")
+
+    @classmethod
+    def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit":
+        """
+        Display the page designated by page, with the vertical coordinate top
+        positioned at the top edge of the window and the contents of the page
+        magnified just enough to fit the entire width of its bounding box
+        within the window.
+
+        A null value for top specifies that the current value of that parameter
+        is to be retained unchanged.
+
+        Args:
+            top:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitBH", fit_args=(top,))
+
+    @classmethod
+    def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit":
+        """
+        Display the page designated by page, with the horizontal coordinate
+        left positioned at the left edge of the window and the contents of the
+        page magnified just enough to fit the entire height of its bounding box
+        within the window.
+
+        A null value for left specifies that the current value of that
+        parameter is to be retained unchanged.
+
+        Args:
+            left:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitBV", fit_args=(left,))
+
+    def __str__(self) -> str:
+        if not self.fit_args:
+            return f"Fit({self.fit_type})"
+        return f"Fit({self.fit_type}, {self.fit_args})"
+
+
+DEFAULT_FIT = Fit.fit()
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_image_inline.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_image_inline.py
@@ -0,0 +1,314 @@
+# Copyright (c) 2024, pypdf contributors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+from io import BytesIO
+from typing import IO
+
+from .._utils import (
+    WHITESPACES,
+    WHITESPACES_AS_BYTES,
+    StreamType,
+    logger_warning,
+    read_non_whitespace,
+)
+from ..errors import PdfReadError
+
+logger = logging.getLogger(__name__)
+
+# An inline image should be used only for small images (4096 bytes or less),
+# but allow twice this for cases where this has been exceeded.
+BUFFER_SIZE = 8192
+
+
+def _check_end_image_marker(stream: StreamType) -> bool:
+    ei_tok = read_non_whitespace(stream)
+    ei_tok += stream.read(2)
+    stream.seek(-3, 1)
+    return ei_tok[:2] == b"EI" and (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES)
+
+
+def extract_inline__ascii_hex_decode(stream: StreamType) -> bytes:
+    """
+    Extract HexEncoded stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    data_out: bytes = b""
+    # Read data until delimiter > and EI as backup.
+    while True:
+        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_tok = data_buffered.find(b">")
+        if pos_tok >= 0:  # found >
+            data_out += data_buffered[: pos_tok + 1]
+            stream.seek(-len(data_buffered) + pos_tok + 1, 1)
+            break
+        pos_ei = data_buffered.find(b"EI")
+        if pos_ei >= 0:  # found EI
+            stream.seek(-len(data_buffered) + pos_ei - 1, 1)
+            c = stream.read(1)
+            while c in WHITESPACES:
+                stream.seek(-2, 1)
+                c = stream.read(1)
+                pos_ei -= 1
+            data_out += data_buffered[:pos_ei]
+            break
+        if len(data_buffered) == 2:
+            data_out += data_buffered
+            raise PdfReadError("Unexpected end of stream")
+        # Neither > nor EI found
+        data_out += data_buffered[:-2]
+        stream.seek(-2, 1)
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline__ascii85_decode(stream: StreamType) -> bytes:
+    """
+    Extract A85 stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    data_out: bytes = b""
+    # Read data until delimiter ~>
+    while True:
+        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_tok = data_buffered.find(b"~>")
+        if pos_tok >= 0:  # found!
+            data_out += data_buffered[: pos_tok + 2]
+            stream.seek(-len(data_buffered) + pos_tok + 2, 1)
+            break
+        if len(data_buffered) == 2:  # end of buffer
+            data_out += data_buffered
+            raise PdfReadError("Unexpected end of stream")
+        data_out += data_buffered[
+            :-2
+        ]  # back by one char in case of in the middle of ~>
+        stream.seek(-2, 1)
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline__run_length_decode(stream: StreamType) -> bytes:
+    """
+    Extract RL (RunLengthDecode) stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    data_out: bytes = b""
+    # Read data until delimiter 128
+    while True:
+        data_buffered = stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_tok = data_buffered.find(b"\x80")
+        if pos_tok >= 0:  # found
+            # Ideally, we could just use plain run-length decoding here, where 80_16 = 128_10
+            # marks the EOD. But there apparently are cases like in issue #3517, where we have
+            # an inline image with up to 51 EOD markers. In these cases, be resilient here and
+            # use the default `EI` marker detection instead. Please note that this fallback
+            # still omits special `EI` handling within the stream, but for now assume that having
+            # both of these cases occur at the same time is very unlikely (and the image stream
+            # is broken anyway).
+            # For now, do not skip over more than one whitespace character.
+            after_token = data_buffered[pos_tok + 1 : pos_tok + 4]
+            if after_token.startswith(b"EI") or after_token.endswith(b"EI"):
+                data_out += data_buffered[: pos_tok + 1]
+                stream.seek(-len(data_buffered) + pos_tok + 1, 1)
+            else:
+                logger_warning("Early EOD in RunLengthDecode of inline image, using fallback.", __name__)
+                ei_marker = data_buffered.find(b"EI")
+                if ei_marker > 0:
+                    data_out += data_buffered[: ei_marker]
+                    stream.seek(-len(data_buffered) + ei_marker - 1, 1)
+            break
+        data_out += data_buffered
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline__dct_decode(stream: StreamType) -> bytes:
+    """
+    Extract DCT (JPEG) stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    def read(length: int) -> bytes:
+        # If 0 bytes are returned, and *size* was not 0, this indicates end of file.
+        # If the object is in non-blocking mode and no bytes are available, `None` is returned.
+        _result = stream.read(length)
+        if _result is None or len(_result) != length:
+            raise PdfReadError("Unexpected end of stream")
+        return _result
+
+    data_out: bytes = b""
+    # Read Blocks of data (ID/Size/data) up to ID=FF/D9
+    # https://www.digicamsoft.com/itu/itu-t81-36.html
+    not_first = False
+    while True:
+        c = read(1)
+        if not_first or (c == b"\xff"):
+            data_out += c
+        if c != b"\xff":
+            continue
+        not_first = True
+        c = read(1)
+        data_out += c
+        if c == b"\xff":
+            stream.seek(-1, 1)  # pragma: no cover
+        elif c == b"\x00":  # stuffing
+            pass
+        elif c == b"\xd9":  # end
+            break
+        elif c in (
+            b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
+            b"\xda\xdb\xdc\xdd\xde\xdf"
+            b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
+        ):
+            c = read(2)
+            data_out += c
+            sz = c[0] * 256 + c[1]
+            data_out += read(sz - 2)
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline_default(stream: StreamType) -> bytes:
+    """Legacy method, used by default"""
+    stream_out = BytesIO()
+    # Read the inline image, while checking for EI (End Image) operator.
+    while True:
+        data_buffered = stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_ei = data_buffered.find(
+            b"E"
+        )  # We can not look straight for "EI" because it may not have been loaded in the buffer
+
+        if pos_ei == -1:
+            stream_out.write(data_buffered)
+        else:
+            # Write out everything including E (the one from EI to be removed)
+            stream_out.write(data_buffered[0 : pos_ei + 1])
+            sav_pos_ei = stream_out.tell() - 1
+            # Seek back in the stream to read the E next
+            stream.seek(pos_ei + 1 - len(data_buffered), 1)
+            saved_pos = stream.tell()
+            # Check for End Image
+            tok2 = stream.read(1)  # I of "EI"
+            if tok2 != b"I":
+                stream.seek(saved_pos, 0)
+                continue
+            tok3 = stream.read(1)  # possible space after "EI"
+            if tok3 not in WHITESPACES:
+                stream.seek(saved_pos, 0)
+                continue
+            while tok3 in WHITESPACES:
+                tok3 = stream.read(1)
+            if data_buffered[pos_ei - 1 : pos_ei] not in WHITESPACES and tok3 not in {
+                b"Q",
+                b"E",
+            }:  # for Q or EMC
+                stream.seek(saved_pos, 0)
+                continue
+            if is_followed_by_binary_data(stream):
+                # Inline image contains `EI ` sequence usually marking the end of it, but
+                # is followed by binary data which does not make sense for the actual end.
+                stream.seek(saved_pos, 0)
+                continue
+            # Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficient
+            # remove E(I) wrongly inserted earlier
+            stream.seek(saved_pos - 1, 0)
+            stream_out.truncate(sav_pos_ei)
+            break
+
+    return stream_out.getvalue()
+
+
+def is_followed_by_binary_data(stream: IO[bytes], length: int = 10) -> bool:
+    """
+    Check if the next bytes of the stream look like binary image data or regular page content.
+
+    This is just some heuristics due to the PDF specification being too imprecise about
+    inline images containing the `EI` marker which would end an image. Starting with PDF 2.0,
+    we finally get a mandatory length field, but with (proper) PDF 2.0 support being very limited
+    everywhere, we should not expect to be able to remove such hacks in the near future - especially
+    considering legacy documents as well.
+
+    The actual implementation draws some inspiration from
+    https://github.com/itext/itext-java/blob/9.1.0/kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/util/InlineImageParsingUtils.java
+    """
+    position = stream.tell()
+    data = stream.read(length)
+    stream.seek(position)
+    if not data:
+        return False
+    operator_start = None
+    operator_end = None
+
+    for index, byte in enumerate(data):
+        if byte < 32 and byte not in WHITESPACES_AS_BYTES:
+            # This covers all characters not being displayable directly, although omitting whitespace
+            # to allow for operator detection.
+            return True
+        is_whitespace = byte in WHITESPACES_AS_BYTES
+        if operator_start is None and not is_whitespace:
+            # Interpret all other non-whitespace characters as the start of an operation.
+            operator_start = index
+        if operator_start is not None and is_whitespace:
+            # A whitespace stops an operation.
+            # Assume that having an inline image with tons of whitespace is rather unlikely.
+            operator_end = index
+            break
+
+    if operator_start is None:
+        # Inline images should not have tons of whitespaces, which would lead to no operator start.
+        return False
+    if operator_end is None:
+        # We probably are inside an operation.
+        operator_end = length
+    operator_length = operator_end - operator_start
+    operator = data[operator_start:operator_end]
+    if operator.startswith(b"/") and operator_length > 1:
+        # Name object.
+        return False
+    if operator.replace(b".", b"").isdigit():
+        # Graphics operator, for example a move. A number (integer or float).
+        return False
+    if operator_length > 3:  # noqa: SIM103
+        # Usually, the operators inside a content stream should not have more than three characters,
+        # especially after an inline image.
+        return True
+    return False
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_link.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_link.py
@@ -0,0 +1,118 @@
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+# This module contains code used by _writer.py to track links in pages
+# being added to the writer until the links can be resolved.
+
+from typing import TYPE_CHECKING, Optional, Union, cast
+
+from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject
+
+if TYPE_CHECKING:
+    from .._page import PageObject
+    from .._reader import PdfReader
+    from .._writer import PdfWriter
+
+
+class NamedReferenceLink:
+    """Named reference link being preserved until we can resolve it correctly."""
+
+    def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
+        """reference: TextStringObject with named reference"""
+        self._reference = reference
+        self._source_pdf = source_pdf
+
+    def find_referenced_page(self) -> Union[IndirectObject, None]:
+        destination = self._source_pdf.named_destinations.get(str(self._reference))
+        return destination.page if destination else None
+
+    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
+        """target_pdf: PdfWriter which the new link went into"""
+        # point named destination in new PDF to the new page
+        if str(self._reference) not in target_pdf.named_destinations:
+            target_pdf.add_named_destination(str(self._reference), new_page.page_number)
+
+
+class DirectReferenceLink:
+    """Direct reference link being preserved until we can resolve it correctly."""
+
+    def __init__(self, reference: ArrayObject) -> None:
+        """reference: an ArrayObject whose first element is the Page indirect object"""
+        self._reference = reference
+
+    def find_referenced_page(self) -> IndirectObject:
+        return self._reference[0]
+
+    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
+        """target_pdf: PdfWriter which the new link went into"""
+        self._reference[0] = new_page
+
+
+ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
+
+
+def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
+    """Extracts links from two pages on the assumption that the two pages are
+    the same. Produces one list of (new link, old link) tuples.
+    """
+    new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])]
+    old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])]
+
+    return [
+        (new_link, old_link) for (new_link, old_link)
+        in zip(new_links, old_links)
+        if new_link and old_link
+    ]
+
+
+def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
+    src = cast("PdfReader", page.pdf)
+    link = cast(DictionaryObject, indirect_object.get_object())
+    if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
+        return None
+
+    if "/A" in link:
+        action = cast(DictionaryObject, link["/A"])
+        if action.get("/S") != "/GoTo":
+            return None
+
+        if "/D" not in action:
+            return None
+        return _create_link(action["/D"], src)
+
+    if "/Dest" in link:
+        return _create_link(link["/Dest"], src)
+
+    return None  # Nothing to do here
+
+
+def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]:
+    if isinstance(reference, TextStringObject):
+        return NamedReferenceLink(reference, source_pdf)
+    if isinstance(reference, ArrayObject):
+        return DirectReferenceLink(reference)
+    return None
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_outline.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_outline.py
@@ -0,0 +1,33 @@
+from typing import Union
+
+from .._utils import StreamType, deprecation_no_replacement
+from ._base import NameObject
+from ._data_structures import Destination
+
+
+class OutlineItem(Destination):
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(b"<<\n")
+        for key in [
+            NameObject(x)
+            for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"]
+            if x in self
+        ]:
+            key.write_to_stream(stream)
+            stream.write(b" ")
+            value = self.raw_get(key)
+            value.write_to_stream(stream)
+            stream.write(b"\n")
+        key = NameObject("/Dest")
+        key.write_to_stream(stream)
+        stream.write(b" ")
+        value = self.dest_array
+        value.write_to_stream(stream)
+        stream.write(b"\n")
+        stream.write(b">>")
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_rectangle.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_rectangle.py
@@ -0,0 +1,132 @@
+from typing import Any, Union
+
+from ._base import FloatObject, NumberObject
+from ._data_structures import ArrayObject
+
+
+class RectangleObject(ArrayObject):
+    """
+    This class is used to represent *page boxes* in pypdf.
+
+    These boxes include:
+
+    * :attr:`artbox <pypdf._page.PageObject.artbox>`
+    * :attr:`bleedbox <pypdf._page.PageObject.bleedbox>`
+    * :attr:`cropbox <pypdf._page.PageObject.cropbox>`
+    * :attr:`mediabox <pypdf._page.PageObject.mediabox>`
+    * :attr:`trimbox <pypdf._page.PageObject.trimbox>`
+    """
+
+    def __init__(
+        self, arr: Union["RectangleObject", tuple[float, float, float, float]]
+    ) -> None:
+        # must have four points
+        assert len(arr) == 4
+        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
+        ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr])
+
+    def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
+        if not isinstance(value, (FloatObject, NumberObject)):
+            value = FloatObject(value)
+        return value
+
+    def scale(self, sx: float, sy: float) -> "RectangleObject":
+        return RectangleObject(
+            (
+                float(self.left) * sx,
+                float(self.bottom) * sy,
+                float(self.right) * sx,
+                float(self.top) * sy,
+            )
+        )
+
+    def __repr__(self) -> str:
+        return f"RectangleObject({list(self)!r})"
+
+    @property
+    def left(self) -> FloatObject:
+        return self[0]
+
+    @left.setter
+    def left(self, f: float) -> None:
+        self[0] = FloatObject(f)
+
+    @property
+    def bottom(self) -> FloatObject:
+        return self[1]
+
+    @bottom.setter
+    def bottom(self, f: float) -> None:
+        self[1] = FloatObject(f)
+
+    @property
+    def right(self) -> FloatObject:
+        return self[2]
+
+    @right.setter
+    def right(self, f: float) -> None:
+        self[2] = FloatObject(f)
+
+    @property
+    def top(self) -> FloatObject:
+        return self[3]
+
+    @top.setter
+    def top(self, f: float) -> None:
+        self[3] = FloatObject(f)
+
+    @property
+    def lower_left(self) -> tuple[float, float]:
+        """
+        Property to read and modify the lower left coordinate of this box
+        in (x,y) form.
+        """
+        return self.left, self.bottom
+
+    @lower_left.setter
+    def lower_left(self, value: tuple[float, float]) -> None:
+        self[0], self[1] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def lower_right(self) -> tuple[float, float]:
+        """
+        Property to read and modify the lower right coordinate of this box
+        in (x,y) form.
+        """
+        return self.right, self.bottom
+
+    @lower_right.setter
+    def lower_right(self, value: tuple[float, float]) -> None:
+        self[2], self[1] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def upper_left(self) -> tuple[float, float]:
+        """
+        Property to read and modify the upper left coordinate of this box
+        in (x,y) form.
+        """
+        return self.left, self.top
+
+    @upper_left.setter
+    def upper_left(self, value: tuple[float, float]) -> None:
+        self[0], self[3] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def upper_right(self) -> tuple[float, float]:
+        """
+        Property to read and modify the upper right coordinate of this box
+        in (x,y) form.
+        """
+        return self.right, self.top
+
+    @upper_right.setter
+    def upper_right(self, value: tuple[float, float]) -> None:
+        self[2], self[3] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def width(self) -> float:
+        return self.right - self.left
+
+    @property
+    def height(self) -> float:
+        return self.top - self.bottom
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_utils.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_utils.py
@@ -0,0 +1,208 @@
+import codecs
+from typing import Union
+
+from .._codecs import _pdfdoc_encoding
+from .._utils import StreamType, logger_warning, read_non_whitespace
+from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
+from ._base import ByteStringObject, TextStringObject
+
+
+def hex_to_rgb(value: str) -> tuple[float, float, float]:
+    return tuple(int(value.lstrip("#")[i : i + 2], 16) / 255.0 for i in (0, 2, 4))  # type: ignore
+
+
+def read_hex_string_from_stream(
+    stream: StreamType,
+    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
+) -> Union["TextStringObject", "ByteStringObject"]:
+    stream.read(1)
+    arr = []
+    x = b""
+    while True:
+        tok = read_non_whitespace(stream)
+        if not tok:
+            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+        if tok == b">":
+            break
+        x += tok
+        if len(x) == 2:
+            arr.append(int(x, base=16))
+            x = b""
+    if len(x) == 1:
+        x += b"0"
+    if x != b"":
+        arr.append(int(x, base=16))
+    return create_string_object(bytes(arr), forced_encoding)
+
+
+__ESCAPE_DICT__ = {
+    b"n": ord(b"\n"),
+    b"r": ord(b"\r"),
+    b"t": ord(b"\t"),
+    b"b": ord(b"\b"),
+    b"f": ord(b"\f"),
+    b"(": ord(b"("),
+    b")": ord(b")"),
+    b"/": ord(b"/"),
+    b"\\": ord(b"\\"),
+    b" ": ord(b" "),
+    b"%": ord(b"%"),
+    b"<": ord(b"<"),
+    b">": ord(b">"),
+    b"[": ord(b"["),
+    b"]": ord(b"]"),
+    b"#": ord(b"#"),
+    b"_": ord(b"_"),
+    b"&": ord(b"&"),
+    b"$": ord(b"$"),
+}
+__BACKSLASH_CODE__ = 92
+
+
+def read_string_from_stream(
+    stream: StreamType,
+    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
+) -> Union["TextStringObject", "ByteStringObject"]:
+    tok = stream.read(1)
+    parens = 1
+    txt = []
+    while True:
+        tok = stream.read(1)
+        if not tok:
+            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+        if tok == b"(":
+            parens += 1
+        elif tok == b")":
+            parens -= 1
+            if parens == 0:
+                break
+        elif tok == b"\\":
+            tok = stream.read(1)
+            try:
+                txt.append(__ESCAPE_DICT__[tok])
+                continue
+            except KeyError:
+                if b"0" <= tok <= b"7":
+                    # "The number ddd may consist of one, two, or three
+                    # octal digits; high-order overflow shall be ignored.
+                    # Three octal digits shall be used, with leading zeros
+                    # as needed, if the next character of the string is also
+                    # a digit." (PDF reference 7.3.4.2, p 16)
+                    sav = stream.tell() - 1
+                    for _ in range(2):
+                        ntok = stream.read(1)
+                        if b"0" <= ntok <= b"7":
+                            tok += ntok
+                        else:
+                            stream.seek(-1, 1)  # ntok has to be analyzed
+                            break
+                    i = int(tok, base=8)
+                    if i > 255:
+                        txt.append(__BACKSLASH_CODE__)
+                        stream.seek(sav)
+                    else:
+                        txt.append(i)
+                    continue
+                if tok in b"\n\r":
+                    # This case is hit when a backslash followed by a line
+                    # break occurs. If it's a multi-char EOL, consume the
+                    # second character:
+                    tok = stream.read(1)
+                    if tok not in b"\n\r":
+                        stream.seek(-1, 1)
+                    # Then don't add anything to the actual string, since this
+                    # line break was escaped:
+                    continue
+                msg = f"Unexpected escaped string: {tok.decode('utf-8', 'ignore')}"
+                logger_warning(msg, __name__)
+                txt.append(__BACKSLASH_CODE__)
+        txt.append(ord(tok))
+    return create_string_object(bytes(txt), forced_encoding)
+
+
+def create_string_object(
+    string: Union[str, bytes],
+    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
+) -> Union[TextStringObject, ByteStringObject]:
+    """
+    Create a ByteStringObject or a TextStringObject from a string to represent the string.
+
+    Args:
+        string: The data being used
+        forced_encoding: Typically None, or an encoding string
+
+    Returns:
+        A ByteStringObject
+
+    Raises:
+        TypeError: If string is not of type str or bytes.
+
+    """
+    if isinstance(string, str):
+        return TextStringObject(string)
+    if isinstance(string, bytes):
+        if isinstance(forced_encoding, (list, dict)):
+            out = ""
+            for x in string:
+                try:
+                    out += forced_encoding[x]
+                except Exception:
+                    out += bytes((x,)).decode("charmap")
+            obj = TextStringObject(out)
+            obj._original_bytes = string
+            return obj
+        if isinstance(forced_encoding, str):
+            if forced_encoding == "bytes":
+                return ByteStringObject(string)
+            obj = TextStringObject(string.decode(forced_encoding))
+            obj._original_bytes = string
+            return obj
+        try:
+            if string.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+                retval = TextStringObject(string.decode("utf-16"))
+                retval._original_bytes = string
+                retval.autodetect_utf16 = True
+                retval.utf16_bom = string[:2]
+                return retval
+            if string.startswith(b"\x00"):
+                retval = TextStringObject(string.decode("utf-16be"))
+                retval._original_bytes = string
+                retval.autodetect_utf16 = True
+                retval.utf16_bom = codecs.BOM_UTF16_BE
+                return retval
+            if string[1:2] == b"\x00":
+                retval = TextStringObject(string.decode("utf-16le"))
+                retval._original_bytes = string
+                retval.autodetect_utf16 = True
+                retval.utf16_bom = codecs.BOM_UTF16_LE
+                return retval
+
+            # This is probably a big performance hit here, but we need
+            # to convert string objects into the text/unicode-aware
+            # version if possible... and the only way to check if that's
+            # possible is to try.
+            # Some strings are strings, some are just byte arrays.
+            retval = TextStringObject(decode_pdfdocencoding(string))
+            retval._original_bytes = string
+            retval.autodetect_pdfdocencoding = True
+            return retval
+        except UnicodeDecodeError:
+            return ByteStringObject(string)
+    else:
+        raise TypeError("create_string_object should have str or unicode arg")
+
+
+def decode_pdfdocencoding(byte_array: bytes) -> str:
+    retval = ""
+    for b in byte_array:
+        c = _pdfdoc_encoding[b]
+        if c == "\u0000":
+            raise UnicodeDecodeError(
+                "pdfdocencoding",
+                bytearray(b),
+                -1,
+                -1,
+                "does not exist in translation table",
+            )
+        retval += c
+    return retval
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_viewerpref.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_viewerpref.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2023, Pubpub-ZZ
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from typing import (
+    Any,
+    Optional,
+)
+
+from ._base import BooleanObject, NameObject, NumberObject, is_null_or_none
+from ._data_structures import ArrayObject, DictionaryObject
+
+f_obj = BooleanObject(False)
+
+
+class ViewerPreferences(DictionaryObject):
+    def __init__(self, obj: Optional[DictionaryObject] = None) -> None:
+        super().__init__(self)
+        if not is_null_or_none(obj):
+            self.update(obj.items())  # type: ignore
+        try:
+            self.indirect_reference = obj.indirect_reference  # type: ignore
+        except AttributeError:
+            pass
+
+    def _get_bool(self, key: str, default: Optional[BooleanObject]) -> Optional[BooleanObject]:
+        return self.get(key, default)
+
+    def _set_bool(self, key: str, v: bool) -> None:
+        self[NameObject(key)] = BooleanObject(v is True)
+
+    def _get_name(self, key: str, default: Optional[NameObject]) -> Optional[NameObject]:
+        return self.get(key, default)
+
+    def _set_name(self, key: str, lst: list[str], v: NameObject) -> None:
+        if v[0] != "/":
+            raise ValueError(f"{v} does not start with '/'")
+        if lst != [] and v not in lst:
+            raise ValueError(f"{v} is an unacceptable value")
+        self[NameObject(key)] = NameObject(v)
+
+    def _get_arr(self, key: str, default: Optional[list[Any]]) -> Optional[ArrayObject]:
+        return self.get(key, None if default is None else ArrayObject(default))
+
+    def _set_arr(self, key: str, v: Optional[ArrayObject]) -> None:
+        if v is None:
+            try:
+                del self[NameObject(key)]
+            except KeyError:
+                pass
+            return
+        if not isinstance(v, ArrayObject):
+            raise ValueError("ArrayObject is expected")
+        self[NameObject(key)] = v
+
+    def _get_int(self, key: str, default: Optional[NumberObject]) -> Optional[NumberObject]:
+        return self.get(key, default)
+
+    def _set_int(self, key: str, v: int) -> None:
+        self[NameObject(key)] = NumberObject(v)
+
+    @property
+    def PRINT_SCALING(self) -> NameObject:
+        return NameObject("/PrintScaling")
+
+    def __new__(cls: Any, value: Any = None) -> "ViewerPreferences":
+        def _add_prop_bool(key: str, default: Optional[BooleanObject]) -> property:
+            return property(
+                lambda self: self._get_bool(key, default),
+                lambda self, v: self._set_bool(key, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined
+            """,
+            )
+
+        def _add_prop_name(
+            key: str, lst: list[str], default: Optional[NameObject]
+        ) -> property:
+            return property(
+                lambda self: self._get_name(key, default),
+                lambda self, v: self._set_name(key, lst, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined.
+            Acceptable values: {lst}
+            """,
+            )
+
+        def _add_prop_arr(key: str, default: Optional[ArrayObject]) -> property:
+            return property(
+                lambda self: self._get_arr(key, default),
+                lambda self, v: self._set_arr(key, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined
+            """,
+            )
+
+        def _add_prop_int(key: str, default: Optional[int]) -> property:
+            return property(
+                lambda self: self._get_int(key, default),
+                lambda self, v: self._set_int(key, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined
+            """,
+            )
+
+        cls.hide_toolbar = _add_prop_bool("/HideToolbar", f_obj)
+        cls.hide_menubar = _add_prop_bool("/HideMenubar", f_obj)
+        cls.hide_windowui = _add_prop_bool("/HideWindowUI", f_obj)
+        cls.fit_window = _add_prop_bool("/FitWindow", f_obj)
+        cls.center_window = _add_prop_bool("/CenterWindow", f_obj)
+        cls.display_doctitle = _add_prop_bool("/DisplayDocTitle", f_obj)
+
+        cls.non_fullscreen_pagemode = _add_prop_name(
+            "/NonFullScreenPageMode",
+            ["/UseNone", "/UseOutlines", "/UseThumbs", "/UseOC"],
+            NameObject("/UseNone"),
+        )
+        cls.direction = _add_prop_name(
+            "/Direction", ["/L2R", "/R2L"], NameObject("/L2R")
+        )
+        cls.view_area = _add_prop_name("/ViewArea", [], None)
+        cls.view_clip = _add_prop_name("/ViewClip", [], None)
+        cls.print_area = _add_prop_name("/PrintArea", [], None)
+        cls.print_clip = _add_prop_name("/PrintClip", [], None)
+        cls.print_scaling = _add_prop_name("/PrintScaling", [], None)
+        cls.duplex = _add_prop_name(
+            "/Duplex", ["/Simplex", "/DuplexFlipShortEdge", "/DuplexFlipLongEdge"], None
+        )
+        cls.pick_tray_by_pdfsize = _add_prop_bool("/PickTrayByPDFSize", None)
+        cls.print_pagerange = _add_prop_arr("/PrintPageRange", None)
+        cls.num_copies = _add_prop_int("/NumCopies", None)
+
+        cls.enforce = _add_prop_arr("/Enforce", ArrayObject())
+
+        return DictionaryObject.__new__(cls)