Update ashboard, dashboard, memory +1 more (+2 ~3)

This commit is contained in:
Echo
2026-02-02 22:27:24 +00:00
parent 4f00131184
commit b0c9b254f1
65 changed files with 42112 additions and 53 deletions

View File

@@ -0,0 +1,115 @@
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""Implementation of generic PDF objects (dictionary, number, string, ...)."""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
from ..constants import OutlineFontFlag
from ._base import (
BooleanObject,
ByteStringObject,
FloatObject,
IndirectObject,
NameObject,
NullObject,
NumberObject,
PdfObject,
TextStringObject,
encode_pdfdocencoding,
is_null_or_none,
)
from ._data_structures import (
ArrayObject,
ContentStream,
DecodedStreamObject,
Destination,
DictionaryObject,
EncodedStreamObject,
Field,
StreamObject,
TreeObject,
read_object,
)
from ._files import EmbeddedFile
from ._fit import Fit
from ._link import DirectReferenceLink, NamedReferenceLink, ReferenceLink, extract_links
from ._outline import OutlineItem
from ._rectangle import RectangleObject
from ._utils import (
create_string_object,
decode_pdfdocencoding,
hex_to_rgb,
read_hex_string_from_stream,
read_string_from_stream,
)
from ._viewerpref import ViewerPreferences
PAGE_FIT = Fit.fit()
__all__ = [
"PAGE_FIT",
"ArrayObject",
"BooleanObject",
"ByteStringObject",
"ContentStream",
"DecodedStreamObject",
"Destination",
"DictionaryObject",
"DirectReferenceLink",
"EmbeddedFile",
"EncodedStreamObject",
"Field",
"Fit",
"FloatObject",
"IndirectObject",
"NameObject",
"NamedReferenceLink",
"NullObject",
"NumberObject",
"OutlineFontFlag",
"OutlineItem",
"PdfObject",
"RectangleObject",
"ReferenceLink",
"StreamObject",
"TextStringObject",
"TreeObject",
"ViewerPreferences",
# Utility functions
"create_string_object",
"decode_pdfdocencoding",
"encode_pdfdocencoding",
"extract_links",
"hex_to_rgb",
"is_null_or_none",
"read_hex_string_from_stream",
# Data structures core functions
"read_object",
"read_string_from_stream",
]

View File

@@ -0,0 +1,547 @@
import re
from dataclasses import dataclass
from enum import IntEnum
from typing import Any, Optional, Union, cast
from .._codecs import fill_from_encoding
from .._codecs.core_fontmetrics import CORE_FONT_METRICS
from .._font import Font
from .._utils import logger_warning
from ..constants import AnnotationDictionaryAttributes, BorderStyles, FieldDictionaryAttributes
from ..generic import (
DecodedStreamObject,
DictionaryObject,
NameObject,
NumberObject,
RectangleObject,
)
from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
DEFAULT_FONT_SIZE_IN_MULTILINE = 12
@dataclass
class BaseStreamConfig:
"""A container representing the basic layout of an appearance stream."""
rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0)
border_width: int = 1 # The width of the border in points
border_style: str = BorderStyles.SOLID
class BaseStreamAppearance(DecodedStreamObject):
"""A class representing the very base of an appearance stream, that is, a rectangle and a border."""
def __init__(self, layout: Optional[BaseStreamConfig] = None) -> None:
"""
Takes the appearance stream layout as an argument.
Args:
layout: The basic layout parameters.
"""
super().__init__()
self._layout = layout or BaseStreamConfig()
self[NameObject("/Type")] = NameObject("/XObject")
self[NameObject("/Subtype")] = NameObject("/Form")
self[NameObject("/BBox")] = RectangleObject(self._layout.rectangle)
class TextAlignment(IntEnum):
"""Defines the alignment options for text within a form field's appearance stream."""
LEFT = 0
CENTER = 1
RIGHT = 2
class TextStreamAppearance(BaseStreamAppearance):
"""
A class representing the appearance stream for a text-based form field.
This class generates the content stream (the `ap_stream_data`) that dictates
how text is rendered within a form field's bounding box. It handles properties
like font, font size, color, multiline text, and text selection highlighting.
"""
def _scale_text(
self,
font: Font,
font_size: float,
leading_factor: float,
field_width: float,
field_height: float,
text: str,
min_font_size: float,
font_size_step: float = 0.2
) -> tuple[list[tuple[float, str]], float]:
"""
Takes a piece of text and scales it to field_width or field_height, given font_name
and font_size. Wraps text where necessary.
Args:
font: The font to be used.
font_size: The font size in points.
leading_factor: The line distance.
field_width: The width of the field in which to fit the text.
field_height: The height of the field in which to fit the text.
text: The text to fit with the field.
min_font_size: The minimum font size at which to scale the text.
font_size_step: The amount by which to decrement font size per step while scaling.
Returns:
The text in the form of list of tuples, each tuple containing the length of a line
and its contents, and the font_size for these lines and lengths.
"""
orig_text = text
paragraphs = text.replace("\n", "\r").split("\r")
wrapped_lines = []
current_line_words: list[str] = []
current_line_width: float = 0
space_width = font.space_width * font_size / 1000
for paragraph in paragraphs:
if not paragraph.strip():
wrapped_lines.append((0.0, ""))
continue
words = paragraph.split(" ")
for i, word in enumerate(words):
word_width = font.text_width(word) * font_size / 1000
test_width = current_line_width + word_width + (space_width if i else 0)
if test_width > field_width and current_line_words:
wrapped_lines.append((current_line_width, " ".join(current_line_words)))
current_line_words = [word]
current_line_width = word_width
elif not current_line_words and word_width > field_width:
wrapped_lines.append((word_width, word))
current_line_words = []
current_line_width = 0
else:
if current_line_words:
current_line_width += space_width
current_line_words.append(word)
current_line_width += word_width
if current_line_words:
wrapped_lines.append((current_line_width, " ".join(current_line_words)))
current_line_words = []
current_line_width = 0
# Estimate total height.
estimated_total_height = font_size + (len(wrapped_lines) - 1) * leading_factor * font_size
if estimated_total_height > field_height:
# Text overflows height; Retry with smaller font size.
new_font_size = font_size - font_size_step
if new_font_size >= min_font_size:
return self._scale_text(
font,
new_font_size,
leading_factor,
field_width,
field_height,
orig_text,
min_font_size,
font_size_step
)
return wrapped_lines, round(font_size, 1)
def _generate_appearance_stream_data(
self,
text: str,
selection: Union[list[str], None],
font: Font,
font_glyph_byte_map: Optional[dict[str, bytes]] = None,
font_name: str = "/Helv",
font_size: float = 0.0,
font_color: str = "0 g",
is_multiline: bool = False,
alignment: TextAlignment = TextAlignment.LEFT,
is_comb: bool = False,
max_length: Optional[int] = None
) -> bytes:
"""
Generates the raw bytes of the PDF appearance stream for a text field.
This private method assembles the PDF content stream operators to draw
the provided text within the specified rectangle. It handles text positioning,
font application, color, and special formatting like selected text.
Args:
text: The text to be rendered in the form field.
selection: An optional list of strings that should be highlighted as selected.
font: The font to use.
font_glyph_byte_map: An optional dictionary mapping characters to their
byte representation for glyph encoding.
font_name: The name of the font resource to use (e.g., "/Helv").
font_size: The font size. If 0, it is automatically calculated
based on whether the field is multiline or not.
font_color: The color to apply to the font, represented as a PDF
graphics state string (e.g., "0 g" for black).
is_multiline: A boolean indicating if the text field is multiline.
alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
is_comb: Boolean that designates fixed-length fields, where every character
fills one "cell", such as in a postcode.
max_length: Used if is_comb is set. The maximum number of characters for a fixed-
length field.
Returns:
A byte string containing the PDF content stream data.
"""
rectangle = self._layout.rectangle
font_glyph_byte_map = font_glyph_byte_map or {}
if isinstance(rectangle, tuple):
rectangle = RectangleObject(rectangle)
leading_factor = (font.font_descriptor.bbox[3] - font.font_descriptor.bbox[1]) / 1000.0
# Set margins based on border width and style, but never less than 1 point
factor = 2 if self._layout.border_style in {"/B", "/I"} else 1
margin = max(self._layout.border_width * factor, 1)
field_height = rectangle.height - 2 * margin
field_width = rectangle.width - 4 * margin
# If font_size is 0, apply the logic for multiline or large-as-possible font
if font_size == 0:
min_font_size = 4.0 # The mininum font size
if selection: # Don't wrap text when dealing with a /Ch field, in order to prevent problems
is_multiline = False # with matching "selection" with "line" later on.
if is_multiline:
font_size = DEFAULT_FONT_SIZE_IN_MULTILINE
lines, font_size = self._scale_text(
font,
font_size,
leading_factor,
field_width,
field_height,
text,
min_font_size
)
else:
max_vertical_size = field_height / leading_factor
text_width_unscaled = font.text_width(text) / 1000
max_horizontal_size = field_width / (text_width_unscaled or 1)
font_size = round(max(min(max_vertical_size, max_horizontal_size), min_font_size), 1)
lines = [(text_width_unscaled * font_size, text)]
elif is_comb:
if max_length and len(text) > max_length:
logger_warning (
f"Length of text {text} exceeds maximum length ({max_length}) of field, input truncated.",
__name__
)
# We act as if each character is one line, because we draw it separately later on
lines = [(
font.text_width(char) * font_size / 1000,
char
) for index, char in enumerate(text) if index < (max_length or len(text))]
else:
lines = [(
font.text_width(line) * font_size / 1000,
line
) for line in text.replace("\n", "\r").split("\r")]
# Set the vertical offset
if is_multiline:
y_offset = rectangle.height + margin - font.font_descriptor.bbox[3] * font_size / 1000.0
else:
y_offset = margin + ((field_height - font.font_descriptor.ascent * font_size / 1000) / 2)
default_appearance = f"{font_name} {font_size} Tf {font_color}"
ap_stream = (
f"q\n/Tx BMC \nq\n{2 * margin} {margin} {field_width} {field_height} "
f"re\nW\nBT\n{default_appearance}\n"
).encode()
current_x_pos: float = 0 # Initial virtual position within the text object.
for line_number, (line_width, line) in enumerate(lines):
if selection and line in selection:
# Might be improved, but cannot find how to get fill working => replaced with lined box
ap_stream += (
f"1 {y_offset - (line_number * font_size * leading_factor) - 1} "
f"{rectangle.width - 2} {font_size + 2} re\n"
f"0.5 0.5 0.5 rg s\n{default_appearance}\n"
).encode()
# Calculate the desired absolute starting X for the current line
desired_abs_x_start: float = 0
if is_comb and max_length:
# Calculate the width of a cell for one character
cell_width = rectangle.width / max_length
# Space from the left edge of the cell to the character's baseline start
# line_width here is the *actual* character width in points for the single character 'line'
centering_offset_in_cell = (cell_width - line_width) / 2
# Absolute start X = (Cell Index, i.e., line_number * Cell Width) + Centering Offset
desired_abs_x_start = (line_number * cell_width) + centering_offset_in_cell
elif alignment == TextAlignment.RIGHT:
desired_abs_x_start = rectangle.width - margin * 2 - line_width
elif alignment == TextAlignment.CENTER:
desired_abs_x_start = (rectangle.width - line_width) / 2
else: # Left aligned; default
desired_abs_x_start = margin * 2
# Calculate x_rel_offset: how much to move from the current_x_pos
# to reach the desired_abs_x_start.
x_rel_offset = desired_abs_x_start - current_x_pos
# Y-offset:
y_rel_offset: float = 0
if line_number == 0:
y_rel_offset = y_offset # Initial vertical position
elif is_comb:
y_rel_offset = 0.0 # DO NOT move vertically for subsequent characters
else:
y_rel_offset = - font_size * leading_factor # Move down by line height
# Td is a relative translation (Tx and Ty).
# It updates the current text position.
ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode()
# Update current_x_pos based on the Td operation for the next iteration.
# This is the X position where the *current line* will start.
current_x_pos = desired_abs_x_start
encoded_line: list[bytes] = [
font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line
]
if any(len(c) >= 2 for c in encoded_line):
ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"
else:
ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"
ap_stream += b"ET\nQ\nEMC\nQ\n"
return ap_stream
def __init__(
self,
layout: Optional[BaseStreamConfig] = None,
text: str = "",
selection: Optional[list[str]] = None,
font_resource: Optional[DictionaryObject] = None,
font_name: str = "/Helv",
font_size: float = 0.0,
font_color: str = "0 g",
is_multiline: bool = False,
alignment: TextAlignment = TextAlignment.LEFT,
is_comb: bool = False,
max_length: Optional[int] = None
) -> None:
"""
Initializes a TextStreamAppearance object.
This constructor creates a new PDF stream object configured as an XObject
of subtype Form. It uses the `_appearance_stream_data` method to generate
the content for the stream.
Args:
layout: The basic layout parameters.
text: The text to be rendered in the form field.
selection: An optional list of strings that should be highlighted as selected.
font_resource: An optional variable that represents a PDF font dictionary.
font_name: The name of the font resource, e.g., "/Helv".
font_size: The font size. If 0, it's auto-calculated.
font_color: The font color string.
is_multiline: A boolean indicating if the text field is multiline.
alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
is_comb: Boolean that designates fixed-length fields, where every character
fills one "cell", such as in a postcode.
max_length: Used if is_comb is set. The maximum number of characters for a fixed-
length field.
"""
super().__init__(layout)
# If a font resource was added, get the font character map
if font_resource:
font_resource = cast(DictionaryObject, font_resource.get_object())
font = Font.from_font_resource(font_resource)
else:
logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__)
font_name = "/Helv"
font_resource = DictionaryObject({
NameObject("/Subtype"): NameObject("/Type1"),
NameObject("/Name"): NameObject("/Helv"),
NameObject("/Type"): NameObject("/Font"),
NameObject("/BaseFont"): NameObject("/Helvetica"),
NameObject("/Encoding"): NameObject("/WinAnsiEncoding")
})
font_descriptor = CORE_FONT_METRICS["Helvetica"]
font_descriptor.character_widths["default"] = 2 * font_descriptor.character_widths[" "]
font = Font(
name="Helvetica",
character_map={},
encoding=dict(zip(range(256), fill_from_encoding("cp1252"))), # WinAnsiEncoding
sub_type="Type1",
font_descriptor = font_descriptor,
character_widths = font_descriptor.character_widths
)
font_glyph_byte_map: dict[str, bytes]
if isinstance(font.encoding, str):
font_glyph_byte_map = {
v: k.encode(font.encoding) for k, v in font.character_map.items()
}
else:
font_glyph_byte_map = {v: bytes((k,)) for k, v in font.encoding.items()}
font_encoding_rev = {v: bytes((k,)) for k, v in font.encoding.items()}
for key, value in font.character_map.items():
font_glyph_byte_map[value] = font_encoding_rev.get(key, key)
ap_stream_data = self._generate_appearance_stream_data(
text,
selection,
font,
font_glyph_byte_map,
font_name=font_name,
font_size=font_size,
font_color=font_color,
is_multiline=is_multiline,
alignment=alignment,
is_comb=is_comb,
max_length=max_length
)
self.set_data(ByteStringObject(ap_stream_data))
self[NameObject("/Length")] = NumberObject(len(ap_stream_data))
# Update Resources with font information
self[NameObject("/Resources")] = DictionaryObject({
NameObject("/Font"): DictionaryObject({
NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)
})
})
@classmethod
def from_text_annotation(
cls,
acro_form: DictionaryObject, # _root_object[CatalogDictionary.ACRO_FORM])
field: DictionaryObject,
annotation: DictionaryObject,
user_font_name: str = "",
user_font_size: float = -1,
) -> "TextStreamAppearance":
"""
Creates a TextStreamAppearance object from a text field annotation.
This class method is a factory for creating a `TextStreamAppearance`
instance by extracting all necessary information (bounding box, font,
text content, etc.) from the PDF field and annotation dictionaries.
It respects inheritance for properties like default appearance (`/DA`).
Args:
acro_form: The root AcroForm dictionary from the PDF catalog.
field: The field dictionary object.
annotation: The widget annotation dictionary object associated with the field.
user_font_name: An optional user-provided font name to override the
default. Defaults to an empty string.
user_font_size: An optional user-provided font size to override the
default. A value of -1 indicates no override.
Returns:
A new `TextStreamAppearance` instance configured for the given field.
"""
# Calculate rectangle dimensions
_rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])
rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))
# Get default appearance dictionary from annotation
default_appearance = annotation.get_inherited(
AnnotationDictionaryAttributes.DA,
acro_form.get(AnnotationDictionaryAttributes.DA, None),
)
if not default_appearance:
# Create a default appearance if none was found in the annotation
default_appearance = TextStringObject("/Helv 0 Tf 0 g")
else:
default_appearance = default_appearance.get_object()
# Derive font name, size and color from the default appearance. Also set
# user-provided font name and font size in the default appearance, if given.
# For a font name, this presumes that we can find an associated font resource
# dictionary. Uses the variable font_properties as an intermediate.
# As per the PDF spec:
# "At a minimum, the string [that is, default_appearance] shall include a Tf (text
# font) operator along with its two operands, font and size" (Section 12.7.4.3
# "Variable text" of the PDF 2.0 specification).
font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]
font_name = font_properties.pop(font_properties.index("Tf") - 2)
font_size = float(font_properties.pop(font_properties.index("Tf") - 1))
font_properties.remove("Tf")
font_color = " ".join(font_properties)
# Determine the font name to use, prioritizing the user's input
if user_font_name:
font_name = user_font_name
# Determine the font size to use, prioritizing the user's input
if user_font_size > 0:
font_size = user_font_size
# Try to find a resource dictionary for the font
document_resources: Any = cast(
DictionaryObject,
cast(
DictionaryObject,
annotation.get_inherited(
"/DR",
acro_form.get("/DR", DictionaryObject()),
),
).get_object(),
)
document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object()
# CORE_FONT_METRICS is the dict with Standard font metrics
if font_name not in document_font_resources and font_name.removeprefix("/") not in CORE_FONT_METRICS:
# ...or AcroForm dictionary
document_resources = cast(
dict[Any, Any],
acro_form.get("/DR", {}),
)
document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object()
font_resource = document_font_resources.get(font_name, None)
if not is_null_or_none(font_resource):
font_resource = cast(DictionaryObject, font_resource.get_object())
# Retrieve field text and selected values
field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
if (
field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
):
text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))
selection = field.get("/V", [])
if not isinstance(selection, list):
selection = [selection]
else: # /Tx
text = field.get("/V", "")
selection = []
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
# Retrieve formatting information
is_comb = False
max_length = None
if field_flags & FieldDictionaryAttributes.FfBits.Comb:
is_comb = True
max_length = annotation.get("/MaxLen")
is_multiline = False
if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
is_multiline = True
alignment = field.get("/Q", TextAlignment.LEFT)
border_width = 1
border_style = BorderStyles.SOLID
if "/BS" in field:
border_width = cast(DictionaryObject, field["/BS"]).get("/W", border_width)
border_style = cast(DictionaryObject, field["/BS"]).get("/S", border_style)
# Create the TextStreamAppearance instance
layout = BaseStreamConfig(rectangle=rectangle, border_width=border_width, border_style=border_style)
new_appearance_stream = cls(
layout,
text,
selection,
font_resource,
font_name=font_name,
font_size=font_size,
font_color=font_color,
is_multiline=is_multiline,
alignment=alignment,
is_comb=is_comb,
max_length=max_length
)
if AnnotationDictionaryAttributes.AP in annotation:
for key, value in (
cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()
):
if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
new_appearance_stream[key] = value
return new_appearance_stream

View File

@@ -0,0 +1,937 @@
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import binascii
import codecs
import hashlib
import re
import sys
from binascii import unhexlify
from collections.abc import Sequence
from math import log10
from struct import iter_unpack
from typing import Any, Callable, ClassVar, Optional, Union, cast
if sys.version_info[:2] >= (3, 10):
from typing import TypeGuard
else:
from typing_extensions import TypeGuard # PEP 647
from .._codecs import _pdfdoc_encoding_rev
from .._protocols import PdfObjectProtocol, PdfWriterProtocol
from .._utils import (
StreamType,
classproperty,
deprecation_no_replacement,
deprecation_with_replacement,
logger_warning,
read_non_whitespace,
read_until_regex,
)
from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"
class PdfObject(PdfObjectProtocol):
# function for calculating a hash value
hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
indirect_reference: Optional["IndirectObject"]
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
raise NotImplementedError(
f"{self.__class__.__name__} does not implement .hash_bin() so far"
)
def hash_value_data(self) -> bytes:
return f"{self}".encode()
def hash_value(self) -> bytes:
return (
f"{self.__class__.__name__}:"
f"{self.hash_func(self.hash_value_data()).hexdigest()}"
).encode()
def replicate(
self,
pdf_dest: PdfWriterProtocol,
) -> "PdfObject":
"""
Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
without ensuring links. This is used in clone_document_from_root with incremental = True.
Args:
pdf_dest: Target to clone to.
Returns:
The cloned PdfObject
"""
return self.clone(pdf_dest)
def clone(
self,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "PdfObject":
"""
Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).
By default, this method will call ``_reference_clone`` (see ``_reference``).
Args:
pdf_dest: Target to clone to.
force_duplicate: By default, if the object has already been cloned and referenced,
the copy will be returned; when ``True``, a new copy will be created.
(Default value = ``False``)
ignore_fields: List/tuple of field names (for dictionaries) that will be ignored
during cloning (applies to children duplication as well). If fields are to be
considered for a limited number of levels, you have to add it as integer, for
example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first
level only but ``"/TOTO"`` on all levels.
Returns:
The cloned PdfObject
"""
raise NotImplementedError(
f"{self.__class__.__name__} does not implement .clone so far"
)
def _reference_clone(
self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False
) -> PdfObjectProtocol:
"""
Reference the object within the _objects of pdf_dest only if
indirect_reference attribute exists (which means the objects was
already identified in xref/xobjstm) if object has been already
referenced do nothing.
Args:
clone:
pdf_dest:
Returns:
The clone
"""
try:
if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:
return clone
except Exception:
pass
# if hasattr(clone, "indirect_reference"):
try:
ind = self.indirect_reference
except AttributeError:
return clone
if (
pdf_dest.incremental
and ind is not None
and ind.pdf == pdf_dest._reader
and ind.idnum <= len(pdf_dest._objects)
):
i = ind.idnum
else:
i = len(pdf_dest._objects) + 1
if ind is not None:
if id(ind.pdf) not in pdf_dest._id_translated:
pdf_dest._id_translated[id(ind.pdf)] = {}
pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf # type: ignore[index]
if (
not force_duplicate
and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]
):
obj = pdf_dest.get_object(
pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
)
assert obj is not None
return obj
pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
try:
pdf_dest._objects[i - 1] = clone
except IndexError:
pdf_dest._objects.append(clone)
i = len(pdf_dest._objects)
clone.indirect_reference = IndirectObject(i, 0, pdf_dest)
return clone
def get_object(self) -> Optional["PdfObject"]:
"""Resolve indirect references."""
return self
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
raise NotImplementedError
class NullObject(PdfObject):
def clone(
self,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "NullObject":
"""Clone object into pdf_dest."""
return cast(
"NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__,))
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(b"null")
@staticmethod
def read_from_stream(stream: StreamType) -> "NullObject":
nulltxt = stream.read(4)
if nulltxt != b"null":
raise PdfReadError("Could not read Null object")
return NullObject()
def __repr__(self) -> str:
return "NullObject"
def __eq__(self, other: object) -> bool:
return isinstance(other, NullObject)
def __hash__(self) -> int:
return self.hash_bin()
class BooleanObject(PdfObject):
def __init__(self, value: Any) -> None:
self.value = value
def clone(
self,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "BooleanObject":
"""Clone object into pdf_dest."""
return cast(
"BooleanObject",
self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, self.value))
def __eq__(self, o: object, /) -> bool:
if isinstance(o, BooleanObject):
return self.value == o.value
if isinstance(o, bool):
return self.value == o
return False
def __hash__(self) -> int:
return self.hash_bin()
def __repr__(self) -> str:
return "True" if self.value else "False"
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
if self.value:
stream.write(b"true")
else:
stream.write(b"false")
@staticmethod
def read_from_stream(stream: StreamType) -> "BooleanObject":
word = stream.read(4)
if word == b"true":
return BooleanObject(True)
if word == b"fals":
stream.read(1)
return BooleanObject(False)
raise PdfReadError("Could not read Boolean object")
class IndirectObject(PdfObject):
def __init__(self, idnum: int, generation: int, pdf: Any) -> None: # PdfReader
self.idnum = idnum
self.generation = generation
self.pdf = pdf
def __hash__(self) -> int:
return hash((self.idnum, self.generation, id(self.pdf)))
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))
def replicate(
self,
pdf_dest: PdfWriterProtocol,
) -> "PdfObject":
return IndirectObject(self.idnum, self.generation, pdf_dest)
def clone(
self,
pdf_dest: PdfWriterProtocol,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "IndirectObject":
"""Clone object into pdf_dest."""
if self.pdf == pdf_dest and not force_duplicate:
# Already duplicated and no extra duplication required
return self
if id(self.pdf) not in pdf_dest._id_translated:
pdf_dest._id_translated[id(self.pdf)] = {}
pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf # type: ignore[index]
if self.idnum in pdf_dest._id_translated[id(self.pdf)]:
dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
if force_duplicate:
assert dup is not None
assert dup.indirect_reference is not None
idref = dup.indirect_reference
return IndirectObject(idref.idnum, idref.generation, idref.pdf)
else:
obj = self.get_object()
# case observed : a pointed object can not be found
if obj is None:
# this normally
obj = NullObject()
assert isinstance(self, (IndirectObject,))
obj.indirect_reference = self
dup = pdf_dest._add_object(
obj.clone(pdf_dest, force_duplicate, ignore_fields)
)
assert dup is not None, "mypy"
assert dup.indirect_reference is not None, "mypy"
return dup.indirect_reference
@property
def indirect_reference(self) -> "IndirectObject": # type: ignore[override]
return self
def get_object(self) -> Optional["PdfObject"]:
return self.pdf.get_object(self)
def __deepcopy__(self, memo: Any) -> "IndirectObject":
return IndirectObject(self.idnum, self.generation, self.pdf)
def _get_object_with_check(self) -> Optional["PdfObject"]:
o = self.get_object()
# the check is done here to not slow down get_object()
if isinstance(o, IndirectObject):
raise PdfStreamError(
f"{self.__repr__()} references an IndirectObject {o.__repr__()}"
)
return o
def __getattr__(self, name: str) -> Any:
# Attribute not found in object: look in pointed object
try:
return getattr(self._get_object_with_check(), name)
except AttributeError:
raise AttributeError(
f"No attribute {name} found in IndirectObject or pointed object"
)
def __getitem__(self, key: Any) -> Any:
# items should be extracted from pointed Object
return self._get_object_with_check()[key] # type: ignore
def __contains__(self, key: Any) -> bool:
return key in self._get_object_with_check() # type: ignore
def __iter__(self) -> Any:
return self._get_object_with_check().__iter__() # type: ignore
def __float__(self) -> str:
# in this case we are looking for the pointed data
return self.get_object().__float__() # type: ignore
def __int__(self) -> int:
# in this case we are looking for the pointed data
return self.get_object().__int__() # type: ignore
def __str__(self) -> str:
# in this case we are looking for the pointed data
return self.get_object().__str__()
def __repr__(self) -> str:
return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
def __eq__(self, other: object) -> bool:
return (
other is not None
and isinstance(other, IndirectObject)
and self.idnum == other.idnum
and self.generation == other.generation
and self.pdf is other.pdf
)
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(f"{self.idnum} {self.generation} R".encode())
@staticmethod
def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject": # PdfReader
idnum = b""
while True:
tok = stream.read(1)
if not tok:
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
if tok.isspace():
break
idnum += tok
generation = b""
while True:
tok = stream.read(1)
if not tok:
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
if tok.isspace():
if not generation:
continue
break
generation += tok
r = read_non_whitespace(stream)
if r != b"R":
raise PdfReadError(
f"Error reading indirect object reference at byte {hex(stream.tell())}"
)
return IndirectObject(int(idnum), int(generation), pdf)
FLOAT_WRITE_PRECISION = 8 # shall be min 5 digits max, allow user adj
class FloatObject(float, PdfObject):
def __new__(
cls, value: Any = "0.0", context: Optional[Any] = None
) -> "FloatObject":
try:
value = float(value)
return float.__new__(cls, value)
except Exception as e:
# If this isn't a valid decimal (happens in malformed PDFs)
# fallback to 0
logger_warning(
f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__
)
return float.__new__(cls, 0.0)
def clone(
self,
pdf_dest: Any,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "FloatObject":
"""Clone object into pdf_dest."""
return cast(
"FloatObject",
self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, self.as_numeric))
def myrepr(self) -> str:
if self == 0:
return "0.0"
nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))
return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")
def __repr__(self) -> str:
return self.myrepr() # repr(float(self))
def as_numeric(self) -> float:
return float(self)
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(self.myrepr().encode("utf8"))
class NumberObject(int, PdfObject):
NumberPattern = re.compile(b"[^+-.0-9]")
def __new__(cls, value: Any) -> "NumberObject":
try:
return int.__new__(cls, int(value))
except ValueError:
logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)
return int.__new__(cls, 0)
def clone(
self,
pdf_dest: Any,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "NumberObject":
"""Clone object into pdf_dest."""
return cast(
"NumberObject",
self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, self.as_numeric()))
def as_numeric(self) -> int:
return int(repr(self).encode("utf8"))
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(repr(self).encode("utf8"))
@staticmethod
def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
num = read_until_regex(stream, NumberObject.NumberPattern)
if b"." in num:
return FloatObject(num)
return NumberObject(num)
class ByteStringObject(bytes, PdfObject):
"""
Represents a string object where the text encoding could not be determined.
This occurs quite often, as the PDF spec doesn't provide an alternate way to
represent strings -- for example, the encryption data stored in files (like
/O) is clearly not text, but is still stored in a "String" object.
"""
def clone(
self,
pdf_dest: Any,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "ByteStringObject":
"""Clone object into pdf_dest."""
return cast(
"ByteStringObject",
self._reference_clone(
ByteStringObject(bytes(self)), pdf_dest, force_duplicate
),
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, bytes(self)))
@property
def original_bytes(self) -> bytes:
"""For compatibility with TextStringObject.original_bytes."""
return self
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(b"<")
stream.write(binascii.hexlify(self))
stream.write(b">")
def __str__(self) -> str:
charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
for enc in charset_to_try:
try:
return self.decode(enc)
except UnicodeDecodeError:
pass
raise PdfReadError("Cannot decode ByteStringObject.")
class TextStringObject(str, PdfObject): # noqa: SLOT000
"""
A string object that has been decoded into a real unicode string.
If read from a PDF document, this string appeared to match the
PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding
to occur.
"""
autodetect_pdfdocencoding: bool
autodetect_utf16: bool
utf16_bom: bytes
_original_bytes: Optional[bytes] = None
def __new__(cls, value: Any) -> "TextStringObject":
original_bytes = None
if isinstance(value, bytes):
original_bytes = value
value = value.decode("charmap")
text_string_object = str.__new__(cls, value)
text_string_object._original_bytes = original_bytes
text_string_object.autodetect_utf16 = False
text_string_object.autodetect_pdfdocencoding = False
text_string_object.utf16_bom = b""
if original_bytes is not None and original_bytes[:2] in {codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE}:
# The value of `original_bytes` is only set for inputs being `bytes`.
# If this is UTF-16 data according to the BOM (first two characters),
# perform special handling. All other cases should not need any special conversion
# due to already being a string.
try:
text_string_object = str.__new__(cls, original_bytes.decode("utf-16"))
except UnicodeDecodeError as exception:
logger_warning(
f"{exception!s}\ninitial string:{exception.object!r}",
__name__,
)
text_string_object = str.__new__(cls, exception.object[: exception.start].decode("utf-16"))
text_string_object._original_bytes = original_bytes
text_string_object.autodetect_utf16 = True
text_string_object.utf16_bom = original_bytes[:2]
else:
try:
encode_pdfdocencoding(text_string_object)
text_string_object.autodetect_pdfdocencoding = True
except UnicodeEncodeError:
text_string_object.autodetect_utf16 = True
text_string_object.utf16_bom = codecs.BOM_UTF16_BE
return text_string_object
def clone(
self,
pdf_dest: Any,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "TextStringObject":
"""Clone object into pdf_dest."""
obj = TextStringObject(self)
obj._original_bytes = self._original_bytes
obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding
obj.autodetect_utf16 = self.autodetect_utf16
obj.utf16_bom = self.utf16_bom
return cast(
"TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, self.original_bytes))
@property
def original_bytes(self) -> bytes:
"""
It is occasionally possible that a text string object gets created where
a byte string object was expected due to the autodetection mechanism --
if that occurs, this "original_bytes" property can be used to
back-calculate what the original encoded bytes were.
"""
if self._original_bytes is not None:
return self._original_bytes
return self.get_original_bytes()
def get_original_bytes(self) -> bytes:
# We're a text string object, but the library is trying to get our raw
# bytes. This can happen if we auto-detected this string as text, but
# we were wrong. It's pretty common. Return the original bytes that
# would have been used to create this object, based upon the autodetect
# method.
if self.autodetect_utf16:
if self.utf16_bom == codecs.BOM_UTF16_LE:
return codecs.BOM_UTF16_LE + self.encode("utf-16le")
if self.utf16_bom == codecs.BOM_UTF16_BE:
return codecs.BOM_UTF16_BE + self.encode("utf-16be")
return self.encode("utf-16be")
if self.autodetect_pdfdocencoding:
return encode_pdfdocencoding(self)
raise Exception("no information about original bytes") # pragma: no cover
def get_encoded_bytes(self) -> bytes:
# Try to write the string out as a PDFDocEncoding encoded string. It's
# nicer to look at in the PDF file. Sadly, we take a performance hit
# here for trying...
try:
if self._original_bytes is not None:
return self._original_bytes
if self.autodetect_utf16:
raise UnicodeEncodeError("", "forced", -1, -1, "")
bytearr = encode_pdfdocencoding(self)
except UnicodeEncodeError:
if self.utf16_bom == codecs.BOM_UTF16_LE:
bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")
elif self.utf16_bom == codecs.BOM_UTF16_BE:
bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
else:
bytearr = self.encode("utf-16be")
return bytearr
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
bytearr = self.get_encoded_bytes()
stream.write(b"(")
for c_ in iter_unpack("c", bytearr):
c = cast(bytes, c_[0])
if not c.isalnum() and c != b" ":
# This:
# stream.write(rf"\{c:0>3o}".encode())
# gives
# https://github.com/davidhalter/parso/issues/207
stream.write(b"\\%03o" % ord(c))
else:
stream.write(c)
stream.write(b")")
class NameObject(str, PdfObject): # noqa: SLOT000
delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
prefix = b"/"
renumber_table: ClassVar[dict[str, bytes]] = {
**{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},
**{chr(i): f"#{i:02X}".encode() for i in range(33)},
}
def clone(
self,
pdf_dest: Any,
force_duplicate: bool = False,
ignore_fields: Optional[Sequence[Union[str, int]]] = (),
) -> "NameObject":
"""Clone object into pdf_dest."""
return cast(
"NameObject",
self._reference_clone(NameObject(self), pdf_dest, force_duplicate),
)
def hash_bin(self) -> int:
"""
Used to detect modified object.
Returns:
Hash considering type and value.
"""
return hash((self.__class__, self))
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(self.renumber())
def renumber(self) -> bytes:
out = self[0].encode("utf-8")
if out != b"/":
deprecation_no_replacement(
f"Incorrect first char in NameObject, should start with '/': ({self})",
"5.0.0",
)
for c in self[1:]:
if c > "~":
for x in c.encode("utf-8"):
out += f"#{x:02X}".encode()
else:
try:
out += self.renumber_table[c]
except KeyError:
out += c.encode("utf-8")
return out
def _sanitize(self) -> "NameObject":
"""
Sanitize the NameObject's name to be a valid PDF name part
(alphanumeric, underscore, hyphen). The _sanitize method replaces
spaces and any non-alphanumeric/non-underscore/non-hyphen with
underscores.
Returns:
NameObject with sanitized name.
"""
name = str(self).removeprefix("/")
name = re.sub(r"\ ", "_", name)
name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)
return NameObject("/" + name)
@classproperty
def surfix(cls) -> bytes: # noqa: N805
deprecation_with_replacement("surfix", "prefix", "5.0.0")
return b"/"
@staticmethod
def unnumber(sin: bytes) -> bytes:
i = sin.find(b"#", 0)
while i >= 0:
try:
sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]
i = sin.find(b"#", i + 1)
except ValueError:
# if the 2 characters after # can not be converted to hex
# we change nothing and carry on
i = i + 1
return sin
CHARSETS = ("utf-8", "gbk", "latin1")
@staticmethod
def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject": # PdfReader
name = stream.read(1)
if name != NameObject.prefix:
raise PdfReadError("Name read error")
name += read_until_regex(stream, NameObject.delimiter_pattern)
try:
# Name objects should represent irregular characters
# with a '#' followed by the symbol's hex number
name = NameObject.unnumber(name)
for enc in NameObject.CHARSETS:
try:
ret = name.decode(enc)
return NameObject(ret)
except Exception:
pass
raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")
except (UnicodeEncodeError, UnicodeDecodeError) as e:
if not pdf.strict:
logger_warning(
f"Illegal character in NameObject ({name!r}), "
"you may need to adjust NameObject.CHARSETS",
__name__,
)
return NameObject(name.decode("charmap"))
raise PdfReadError(
f"Illegal character in NameObject ({name!r}). "
"You may need to adjust NameObject.CHARSETS.",
) from e
def encode_pdfdocencoding(unicode_string: str) -> bytes:
try:
return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])
except KeyError:
raise UnicodeEncodeError(
"pdfdocencoding",
unicode_string,
-1,
-1,
"does not exist in translation table",
)
def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:
"""
Returns:
True if x is None or NullObject.
"""
return x is None or (
isinstance(x, PdfObject)
and (x.get_object() is None or isinstance(x.get_object(), NullObject))
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,401 @@
from __future__ import annotations
import bisect
from functools import cached_property
from typing import TYPE_CHECKING, cast
from pypdf._utils import format_iso8824_date, parse_iso8824_date
from pypdf.constants import CatalogAttributes as CA
from pypdf.constants import FileSpecificationDictionaryEntries
from pypdf.constants import PageAttributes as PG
from pypdf.errors import PdfReadError, PyPdfError
from pypdf.generic import (
ArrayObject,
ByteStringObject,
DecodedStreamObject,
DictionaryObject,
NameObject,
NullObject,
NumberObject,
StreamObject,
TextStringObject,
is_null_or_none,
)
if TYPE_CHECKING:
import datetime
from collections.abc import Generator
from pypdf._writer import PdfWriter
class EmbeddedFile:
"""
Container holding the information on an embedded file.
Attributes are evaluated lazily if possible.
Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
"""
def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
"""
Args:
name: The (primary) name as provided in the name tree.
pdf_object: The corresponding PDF object to allow retrieving further data.
parent: The parent list.
"""
self._name = name
self.pdf_object = pdf_object
self._parent = parent
@property
def name(self) -> str:
"""The (primary) name of the embedded file as provided in the name tree."""
return self._name
@classmethod
def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> EmbeddedFile:
"""
Create a new embedded file and add it to the PdfWriter.
Args:
writer: The PdfWriter instance to add the embedded file to.
name: The filename to display.
content: The data in the file.
Returns:
EmbeddedFile instance for the newly created embedded file.
"""
# Convert string content to bytes if needed
if isinstance(content, str):
content = content.encode("latin-1")
# Create the file entry (the actual embedded file stream)
file_entry = DecodedStreamObject()
file_entry.set_data(content)
file_entry.update({NameObject(PG.TYPE): NameObject("/EmbeddedFile")})
# Create the /EF entry
ef_entry = DictionaryObject()
ef_entry.update({NameObject("/F"): writer._add_object(file_entry)})
# Create the filespec dictionary
from pypdf.generic import create_string_object # noqa: PLC0415
filespec = DictionaryObject()
filespec_reference = writer._add_object(filespec)
name_object = cast(TextStringObject, create_string_object(name))
filespec.update(
{
NameObject(PG.TYPE): NameObject("/Filespec"),
NameObject(FileSpecificationDictionaryEntries.F): name_object,
NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
}
)
# Add the name and filespec to the names array.
# We use the inverse order for insertion, as this allows us to re-use the
# same index.
names_array = cls._get_names_array(writer)
insertion_index = cls._get_insertion_index(names_array, name_object)
names_array.insert(insertion_index, filespec_reference)
names_array.insert(insertion_index, name_object)
# Return an EmbeddedFile instance
return cls(name=name, pdf_object=filespec, parent=names_array)
@classmethod
def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
"""Get the names array for embedded files, possibly creating and flattening it."""
if CA.NAMES not in writer.root_object:
# Add the /Names entry to the catalog.
writer.root_object[NameObject(CA.NAMES)] = writer._add_object(DictionaryObject())
names_dict = cast(DictionaryObject, writer.root_object[CA.NAMES])
if "/EmbeddedFiles" not in names_dict:
# We do not yet have an entry for embedded files. Create and return it.
names = ArrayObject()
embedded_files_names_dictionary = DictionaryObject(
{NameObject(CA.NAMES): names}
)
names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
return names
# We have an existing embedded files entry.
embedded_files_names_tree = cast(DictionaryObject, names_dict["/EmbeddedFiles"])
if "/Names" in embedded_files_names_tree:
# Simple case: We already have a flat list.
return cast(ArrayObject, embedded_files_names_tree[NameObject(CA.NAMES)])
if "/Kids" not in embedded_files_names_tree:
# Invalid case: This is no name tree.
raise PdfReadError("Got neither Names nor Kids in embedded files tree.")
# Complex case: Convert a /Kids-based name tree to a /Names-based one.
# /Name-based ones are much easier to handle and allow us to simplify the
# actual insertion logic by only having to consider one case.
names = ArrayObject()
kids = cast(ArrayObject, embedded_files_names_tree["/Kids"].get_object())
embedded_files_names_dictionary = DictionaryObject(
{NameObject(CA.NAMES): names}
)
names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
for kid in kids:
# Write the flattened file entries. As we do not change the actual files,
# this should not have any impact on references to them.
# There might be further (nested) kids here.
# Wait for an example before evaluating an implementation.
for name in kid.get_object().get("/Names", []):
names.append(name)
return names
@classmethod
def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
name_bytes = name.encode("utf-8")
start = bisect.bisect_left(keys, name_bytes)
end = bisect.bisect_right(keys, name_bytes)
if start != end:
return end * 2
if start == 0:
return 0
if start == (key_count := len(keys)):
return key_count * 2
return end * 2
@property
def alternative_name(self) -> str | None:
"""Retrieve the alternative name (file specification)."""
for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
# PDF 2.0 reference, table 43:
# > A PDF reader shall use the value of the UF key, when present, instead of the F key.
if key in self.pdf_object:
value = self.pdf_object[key].get_object()
if not is_null_or_none(value):
return cast(str, value)
return None
@alternative_name.setter
def alternative_name(self, value: TextStringObject | None) -> None:
"""Set the alternative name (file specification)."""
if value is None:
if FileSpecificationDictionaryEntries.UF in self.pdf_object:
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = NullObject()
if FileSpecificationDictionaryEntries.F in self.pdf_object:
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = NullObject()
else:
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = value
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = value
@property
def description(self) -> str | None:
"""Retrieve the description."""
value = self.pdf_object.get(FileSpecificationDictionaryEntries.DESC)
if is_null_or_none(value):
return None
return value
@description.setter
def description(self, value: TextStringObject | None) -> None:
"""Set the description."""
if value is None:
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = NullObject()
else:
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = value
@property
def associated_file_relationship(self) -> str:
"""Retrieve the relationship of the referring document to this embedded file."""
return self.pdf_object.get("/AFRelationship", "/Unspecified")
@associated_file_relationship.setter
def associated_file_relationship(self, value: NameObject) -> None:
"""Set the relationship of the referring document to this embedded file."""
self.pdf_object[NameObject("/AFRelationship")] = value
@property
def _embedded_file(self) -> StreamObject:
"""Retrieve the actual embedded file stream."""
if "/EF" not in self.pdf_object:
raise PdfReadError(f"/EF entry not found: {self.pdf_object}")
ef = cast(DictionaryObject, self.pdf_object["/EF"])
for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
if key in ef:
return cast(StreamObject, ef[key].get_object())
raise PdfReadError(f"No /(U)F key found in file dictionary: {ef}")
@property
def _params(self) -> DictionaryObject:
"""Retrieve the file-specific parameters."""
return self._embedded_file.get("/Params", DictionaryObject()).get_object()
@cached_property
def _ensure_params(self) -> DictionaryObject:
"""Ensure the /Params dictionary exists and return it."""
embedded_file = self._embedded_file
if "/Params" not in embedded_file:
embedded_file[NameObject("/Params")] = DictionaryObject()
return cast(DictionaryObject, embedded_file["/Params"])
@property
def subtype(self) -> str | None:
"""Retrieve the subtype. This is a MIME media type, prefixed by a slash."""
value = self._embedded_file.get("/Subtype")
if is_null_or_none(value):
return None
return value
@subtype.setter
def subtype(self, value: NameObject | None) -> None:
"""Set the subtype. This should be a MIME media type, prefixed by a slash."""
embedded_file = self._embedded_file
if value is None:
embedded_file[NameObject("/Subtype")] = NullObject()
else:
embedded_file[NameObject("/Subtype")] = value
@property
def content(self) -> bytes:
"""Retrieve the actual file content."""
return self._embedded_file.get_data()
@content.setter
def content(self, value: str | bytes) -> None:
"""Set the file content."""
if isinstance(value, str):
value = value.encode("latin-1")
self._embedded_file.set_data(value)
@property
def size(self) -> int | None:
"""Retrieve the size of the uncompressed file in bytes."""
value = self._params.get("/Size")
if is_null_or_none(value):
return None
return value
@size.setter
def size(self, value: NumberObject | None) -> None:
"""Set the size of the uncompressed file in bytes."""
params = self._ensure_params
if value is None:
params[NameObject("/Size")] = NullObject()
else:
params[NameObject("/Size")] = value
@property
def creation_date(self) -> datetime.datetime | None:
"""Retrieve the file creation datetime."""
return parse_iso8824_date(self._params.get("/CreationDate"))
@creation_date.setter
def creation_date(self, value: datetime.datetime | None) -> None:
"""Set the file creation datetime."""
params = self._ensure_params
if value is None:
params[NameObject("/CreationDate")] = NullObject()
else:
date_str = format_iso8824_date(value)
params[NameObject("/CreationDate")] = TextStringObject(date_str)
@property
def modification_date(self) -> datetime.datetime | None:
"""Retrieve the datetime of the last file modification."""
return parse_iso8824_date(self._params.get("/ModDate"))
@modification_date.setter
def modification_date(self, value: datetime.datetime | None) -> None:
"""Set the datetime of the last file modification."""
params = self._ensure_params
if value is None:
params[NameObject("/ModDate")] = NullObject()
else:
date_str = format_iso8824_date(value)
params[NameObject("/ModDate")] = TextStringObject(date_str)
@property
def checksum(self) -> bytes | None:
"""Retrieve the MD5 checksum of the (uncompressed) file."""
value = self._params.get("/CheckSum")
if is_null_or_none(value):
return None
return value
@checksum.setter
def checksum(self, value: ByteStringObject | None) -> None:
"""Set the MD5 checksum of the (uncompressed) file."""
params = self._ensure_params
if value is None:
params[NameObject("/CheckSum")] = NullObject()
else:
params[NameObject("/CheckSum")] = value
def delete(self) -> None:
"""Delete the file from the document."""
if not self._parent:
raise PyPdfError("Parent required to delete file from document.")
if self.pdf_object in self._parent:
index = self._parent.index(self.pdf_object)
elif (
(indirect_reference := getattr(self.pdf_object, "indirect_reference", None)) is not None
and indirect_reference in self._parent
):
index = self._parent.index(indirect_reference)
else:
raise PyPdfError("File not found in parent object.")
self._parent.pop(index) # Reference.
self._parent.pop(index - 1) # Name.
self.pdf_object = DictionaryObject() # Invalidate.
def __repr__(self) -> str:
return f"<{self.__class__.__name__} name={self.name!r}>"
@classmethod
def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
"""
Convert the given name tree into class instances.
Args:
names: The name tree to load the data from.
Returns:
Iterable of class instances for the files found.
"""
# This is a name tree of the format [name_1, reference_1, name_2, reference_2, ...]
for i, name in enumerate(names):
if not isinstance(name, str):
# Skip plain strings and retrieve them as `direct_name` by index.
file_dictionary = name.get_object()
direct_name = names[i - 1].get_object()
yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)
@classmethod
def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
"""
Load the embedded files for the given document catalog.
This method and its signature are considered internal API and thus not exposed publicly for now.
Args:
catalog: The document catalog to load from.
Returns:
Iterable of class instances for the files found.
"""
try:
container = cast(
DictionaryObject,
cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
)
except KeyError:
return
if "/Kids" in container:
for kid in cast(ArrayObject, container["/Kids"].get_object()):
# There might be further (nested) kids here.
# Wait for an example before evaluating an implementation.
kid = kid.get_object()
if "/Names" in kid:
yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
if "/Names" in container:
yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))

View File

@@ -0,0 +1,174 @@
from typing import Any, Optional, Union
from ._base import is_null_or_none
class Fit:
def __init__(
self, fit_type: str, fit_args: tuple[Union[None, float, Any], ...] = ()
) -> None:
from ._base import FloatObject, NameObject, NullObject, NumberObject # noqa: PLC0415
self.fit_type = NameObject(fit_type)
self.fit_args: list[Union[NullObject, FloatObject, NumberObject]] = [
NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
]
@classmethod
def xyz(
cls,
left: Optional[float] = None,
top: Optional[float] = None,
zoom: Optional[float] = None,
) -> "Fit":
"""
Display the page designated by page, with the coordinates (left, top)
positioned at the upper-left corner of the window and the contents
of the page magnified by the factor zoom.
A null value for any of the parameters left, top, or zoom specifies
that the current value of that parameter is to be retained unchanged.
A zoom value of 0 has the same meaning as a null value.
Args:
left:
top:
zoom:
Returns:
The created fit object.
"""
return Fit(fit_type="/XYZ", fit_args=(left, top, zoom))
@classmethod
def fit(cls) -> "Fit":
"""
Display the page designated by page, with its contents magnified just
enough to fit the entire page within the window both horizontally and
vertically.
If the required horizontal and vertical magnification factors are
different, use the smaller of the two, centering the page within the
window in the other dimension.
"""
return Fit(fit_type="/Fit")
@classmethod
def fit_horizontally(cls, top: Optional[float] = None) -> "Fit":
"""
Display the page designated by page, with the vertical coordinate top
positioned at the top edge of the window and the contents of the page
magnified just enough to fit the entire width of the page within the
window.
A null value for ``top`` specifies that the current value of that
parameter is to be retained unchanged.
Args:
top:
Returns:
The created fit object.
"""
return Fit(fit_type="/FitH", fit_args=(top,))
@classmethod
def fit_vertically(cls, left: Optional[float] = None) -> "Fit":
return Fit(fit_type="/FitV", fit_args=(left,))
@classmethod
def fit_rectangle(
cls,
left: Optional[float] = None,
bottom: Optional[float] = None,
right: Optional[float] = None,
top: Optional[float] = None,
) -> "Fit":
"""
Display the page designated by page, with its contents magnified
just enough to fit the rectangle specified by the coordinates
left, bottom, right, and top entirely within the window
both horizontally and vertically.
If the required horizontal and vertical magnification factors are
different, use the smaller of the two, centering the rectangle within
the window in the other dimension.
A null value for any of the parameters may result in unpredictable
behavior.
Args:
left:
bottom:
right:
top:
Returns:
The created fit object.
"""
return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top))
@classmethod
def fit_box(cls) -> "Fit":
"""
Display the page designated by page, with its contents magnified just
enough to fit its bounding box entirely within the window both
horizontally and vertically.
If the required horizontal and vertical magnification factors are
different, use the smaller of the two, centering the bounding box
within the window in the other dimension.
"""
return Fit(fit_type="/FitB")
@classmethod
def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit":
"""
Display the page designated by page, with the vertical coordinate top
positioned at the top edge of the window and the contents of the page
magnified just enough to fit the entire width of its bounding box
within the window.
A null value for top specifies that the current value of that parameter
is to be retained unchanged.
Args:
top:
Returns:
The created fit object.
"""
return Fit(fit_type="/FitBH", fit_args=(top,))
@classmethod
def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit":
"""
Display the page designated by page, with the horizontal coordinate
left positioned at the left edge of the window and the contents of the
page magnified just enough to fit the entire height of its bounding box
within the window.
A null value for left specifies that the current value of that
parameter is to be retained unchanged.
Args:
left:
Returns:
The created fit object.
"""
return Fit(fit_type="/FitBV", fit_args=(left,))
def __str__(self) -> str:
if not self.fit_args:
return f"Fit({self.fit_type})"
return f"Fit({self.fit_type}, {self.fit_args})"
DEFAULT_FIT = Fit.fit()

View File

@@ -0,0 +1,314 @@
# Copyright (c) 2024, pypdf contributors
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import logging
from io import BytesIO
from typing import IO
from .._utils import (
WHITESPACES,
WHITESPACES_AS_BYTES,
StreamType,
logger_warning,
read_non_whitespace,
)
from ..errors import PdfReadError
logger = logging.getLogger(__name__)
# An inline image should be used only for small images (4096 bytes or less),
# but allow twice this for cases where this has been exceeded.
BUFFER_SIZE = 8192
def _check_end_image_marker(stream: StreamType) -> bool:
ei_tok = read_non_whitespace(stream)
ei_tok += stream.read(2)
stream.seek(-3, 1)
return ei_tok[:2] == b"EI" and (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES)
def extract_inline__ascii_hex_decode(stream: StreamType) -> bytes:
"""
Extract HexEncoded stream from inline image.
The stream will be moved onto the EI.
"""
data_out: bytes = b""
# Read data until delimiter > and EI as backup.
while True:
data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
if not data_buffered:
raise PdfReadError("Unexpected end of stream")
pos_tok = data_buffered.find(b">")
if pos_tok >= 0: # found >
data_out += data_buffered[: pos_tok + 1]
stream.seek(-len(data_buffered) + pos_tok + 1, 1)
break
pos_ei = data_buffered.find(b"EI")
if pos_ei >= 0: # found EI
stream.seek(-len(data_buffered) + pos_ei - 1, 1)
c = stream.read(1)
while c in WHITESPACES:
stream.seek(-2, 1)
c = stream.read(1)
pos_ei -= 1
data_out += data_buffered[:pos_ei]
break
if len(data_buffered) == 2:
data_out += data_buffered
raise PdfReadError("Unexpected end of stream")
# Neither > nor EI found
data_out += data_buffered[:-2]
stream.seek(-2, 1)
if not _check_end_image_marker(stream):
raise PdfReadError("EI stream not found")
return data_out
def extract_inline__ascii85_decode(stream: StreamType) -> bytes:
"""
Extract A85 stream from inline image.
The stream will be moved onto the EI.
"""
data_out: bytes = b""
# Read data until delimiter ~>
while True:
data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
if not data_buffered:
raise PdfReadError("Unexpected end of stream")
pos_tok = data_buffered.find(b"~>")
if pos_tok >= 0: # found!
data_out += data_buffered[: pos_tok + 2]
stream.seek(-len(data_buffered) + pos_tok + 2, 1)
break
if len(data_buffered) == 2: # end of buffer
data_out += data_buffered
raise PdfReadError("Unexpected end of stream")
data_out += data_buffered[
:-2
] # back by one char in case of in the middle of ~>
stream.seek(-2, 1)
if not _check_end_image_marker(stream):
raise PdfReadError("EI stream not found")
return data_out
def extract_inline__run_length_decode(stream: StreamType) -> bytes:
"""
Extract RL (RunLengthDecode) stream from inline image.
The stream will be moved onto the EI.
"""
data_out: bytes = b""
# Read data until delimiter 128
while True:
data_buffered = stream.read(BUFFER_SIZE)
if not data_buffered:
raise PdfReadError("Unexpected end of stream")
pos_tok = data_buffered.find(b"\x80")
if pos_tok >= 0: # found
# Ideally, we could just use plain run-length decoding here, where 80_16 = 128_10
# marks the EOD. But there apparently are cases like in issue #3517, where we have
# an inline image with up to 51 EOD markers. In these cases, be resilient here and
# use the default `EI` marker detection instead. Please note that this fallback
# still omits special `EI` handling within the stream, but for now assume that having
# both of these cases occur at the same time is very unlikely (and the image stream
# is broken anyway).
# For now, do not skip over more than one whitespace character.
after_token = data_buffered[pos_tok + 1 : pos_tok + 4]
if after_token.startswith(b"EI") or after_token.endswith(b"EI"):
data_out += data_buffered[: pos_tok + 1]
stream.seek(-len(data_buffered) + pos_tok + 1, 1)
else:
logger_warning("Early EOD in RunLengthDecode of inline image, using fallback.", __name__)
ei_marker = data_buffered.find(b"EI")
if ei_marker > 0:
data_out += data_buffered[: ei_marker]
stream.seek(-len(data_buffered) + ei_marker - 1, 1)
break
data_out += data_buffered
if not _check_end_image_marker(stream):
raise PdfReadError("EI stream not found")
return data_out
def extract_inline__dct_decode(stream: StreamType) -> bytes:
"""
Extract DCT (JPEG) stream from inline image.
The stream will be moved onto the EI.
"""
def read(length: int) -> bytes:
# If 0 bytes are returned, and *size* was not 0, this indicates end of file.
# If the object is in non-blocking mode and no bytes are available, `None` is returned.
_result = stream.read(length)
if _result is None or len(_result) != length:
raise PdfReadError("Unexpected end of stream")
return _result
data_out: bytes = b""
# Read Blocks of data (ID/Size/data) up to ID=FF/D9
# https://www.digicamsoft.com/itu/itu-t81-36.html
not_first = False
while True:
c = read(1)
if not_first or (c == b"\xff"):
data_out += c
if c != b"\xff":
continue
not_first = True
c = read(1)
data_out += c
if c == b"\xff":
stream.seek(-1, 1) # pragma: no cover
elif c == b"\x00": # stuffing
pass
elif c == b"\xd9": # end
break
elif c in (
b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
b"\xda\xdb\xdc\xdd\xde\xdf"
b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
):
c = read(2)
data_out += c
sz = c[0] * 256 + c[1]
data_out += read(sz - 2)
if not _check_end_image_marker(stream):
raise PdfReadError("EI stream not found")
return data_out
def extract_inline_default(stream: StreamType) -> bytes:
"""Legacy method, used by default"""
stream_out = BytesIO()
# Read the inline image, while checking for EI (End Image) operator.
while True:
data_buffered = stream.read(BUFFER_SIZE)
if not data_buffered:
raise PdfReadError("Unexpected end of stream")
pos_ei = data_buffered.find(
b"E"
) # We can not look straight for "EI" because it may not have been loaded in the buffer
if pos_ei == -1:
stream_out.write(data_buffered)
else:
# Write out everything including E (the one from EI to be removed)
stream_out.write(data_buffered[0 : pos_ei + 1])
sav_pos_ei = stream_out.tell() - 1
# Seek back in the stream to read the E next
stream.seek(pos_ei + 1 - len(data_buffered), 1)
saved_pos = stream.tell()
# Check for End Image
tok2 = stream.read(1) # I of "EI"
if tok2 != b"I":
stream.seek(saved_pos, 0)
continue
tok3 = stream.read(1) # possible space after "EI"
if tok3 not in WHITESPACES:
stream.seek(saved_pos, 0)
continue
while tok3 in WHITESPACES:
tok3 = stream.read(1)
if data_buffered[pos_ei - 1 : pos_ei] not in WHITESPACES and tok3 not in {
b"Q",
b"E",
}: # for Q or EMC
stream.seek(saved_pos, 0)
continue
if is_followed_by_binary_data(stream):
# Inline image contains `EI ` sequence usually marking the end of it, but
# is followed by binary data which does not make sense for the actual end.
stream.seek(saved_pos, 0)
continue
# Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficient
# remove E(I) wrongly inserted earlier
stream.seek(saved_pos - 1, 0)
stream_out.truncate(sav_pos_ei)
break
return stream_out.getvalue()
def is_followed_by_binary_data(stream: IO[bytes], length: int = 10) -> bool:
"""
Check if the next bytes of the stream look like binary image data or regular page content.
This is just some heuristics due to the PDF specification being too imprecise about
inline images containing the `EI` marker which would end an image. Starting with PDF 2.0,
we finally get a mandatory length field, but with (proper) PDF 2.0 support being very limited
everywhere, we should not expect to be able to remove such hacks in the near future - especially
considering legacy documents as well.
The actual implementation draws some inspiration from
https://github.com/itext/itext-java/blob/9.1.0/kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/util/InlineImageParsingUtils.java
"""
position = stream.tell()
data = stream.read(length)
stream.seek(position)
if not data:
return False
operator_start = None
operator_end = None
for index, byte in enumerate(data):
if byte < 32 and byte not in WHITESPACES_AS_BYTES:
# This covers all characters not being displayable directly, although omitting whitespace
# to allow for operator detection.
return True
is_whitespace = byte in WHITESPACES_AS_BYTES
if operator_start is None and not is_whitespace:
# Interpret all other non-whitespace characters as the start of an operation.
operator_start = index
if operator_start is not None and is_whitespace:
# A whitespace stops an operation.
# Assume that having an inline image with tons of whitespace is rather unlikely.
operator_end = index
break
if operator_start is None:
# Inline images should not have tons of whitespaces, which would lead to no operator start.
return False
if operator_end is None:
# We probably are inside an operation.
operator_end = length
operator_length = operator_end - operator_start
operator = data[operator_start:operator_end]
if operator.startswith(b"/") and operator_length > 1:
# Name object.
return False
if operator.replace(b".", b"").isdigit():
# Graphics operator, for example a move. A number (integer or float).
return False
if operator_length > 3: # noqa: SIM103
# Usually, the operators inside a content stream should not have more than three characters,
# especially after an inline image.
return True
return False

View File

@@ -0,0 +1,118 @@
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# This module contains code used by _writer.py to track links in pages
# being added to the writer until the links can be resolved.
from typing import TYPE_CHECKING, Optional, Union, cast
from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject
if TYPE_CHECKING:
from .._page import PageObject
from .._reader import PdfReader
from .._writer import PdfWriter
class NamedReferenceLink:
"""Named reference link being preserved until we can resolve it correctly."""
def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
"""reference: TextStringObject with named reference"""
self._reference = reference
self._source_pdf = source_pdf
def find_referenced_page(self) -> Union[IndirectObject, None]:
destination = self._source_pdf.named_destinations.get(str(self._reference))
return destination.page if destination else None
def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
"""target_pdf: PdfWriter which the new link went into"""
# point named destination in new PDF to the new page
if str(self._reference) not in target_pdf.named_destinations:
target_pdf.add_named_destination(str(self._reference), new_page.page_number)
class DirectReferenceLink:
"""Direct reference link being preserved until we can resolve it correctly."""
def __init__(self, reference: ArrayObject) -> None:
"""reference: an ArrayObject whose first element is the Page indirect object"""
self._reference = reference
def find_referenced_page(self) -> IndirectObject:
return self._reference[0]
def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
"""target_pdf: PdfWriter which the new link went into"""
self._reference[0] = new_page
ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
"""Extracts links from two pages on the assumption that the two pages are
the same. Produces one list of (new link, old link) tuples.
"""
new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])]
old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])]
return [
(new_link, old_link) for (new_link, old_link)
in zip(new_links, old_links)
if new_link and old_link
]
def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
src = cast("PdfReader", page.pdf)
link = cast(DictionaryObject, indirect_object.get_object())
if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
return None
if "/A" in link:
action = cast(DictionaryObject, link["/A"])
if action.get("/S") != "/GoTo":
return None
if "/D" not in action:
return None
return _create_link(action["/D"], src)
if "/Dest" in link:
return _create_link(link["/Dest"], src)
return None # Nothing to do here
def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]:
if isinstance(reference, TextStringObject):
return NamedReferenceLink(reference, source_pdf)
if isinstance(reference, ArrayObject):
return DirectReferenceLink(reference)
return None

View File

@@ -0,0 +1,33 @@
from typing import Union
from .._utils import StreamType, deprecation_no_replacement
from ._base import NameObject
from ._data_structures import Destination
class OutlineItem(Destination):
def write_to_stream(
self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
) -> None:
if encryption_key is not None: # deprecated
deprecation_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(b"<<\n")
for key in [
NameObject(x)
for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"]
if x in self
]:
key.write_to_stream(stream)
stream.write(b" ")
value = self.raw_get(key)
value.write_to_stream(stream)
stream.write(b"\n")
key = NameObject("/Dest")
key.write_to_stream(stream)
stream.write(b" ")
value = self.dest_array
value.write_to_stream(stream)
stream.write(b"\n")
stream.write(b">>")

View File

@@ -0,0 +1,132 @@
from typing import Any, Union
from ._base import FloatObject, NumberObject
from ._data_structures import ArrayObject
class RectangleObject(ArrayObject):
"""
This class is used to represent *page boxes* in pypdf.
These boxes include:
* :attr:`artbox <pypdf._page.PageObject.artbox>`
* :attr:`bleedbox <pypdf._page.PageObject.bleedbox>`
* :attr:`cropbox <pypdf._page.PageObject.cropbox>`
* :attr:`mediabox <pypdf._page.PageObject.mediabox>`
* :attr:`trimbox <pypdf._page.PageObject.trimbox>`
"""
def __init__(
self, arr: Union["RectangleObject", tuple[float, float, float, float]]
) -> None:
# must have four points
assert len(arr) == 4
# automatically convert arr[x] into NumberObject(arr[x]) if necessary
ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr])
def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
if not isinstance(value, (FloatObject, NumberObject)):
value = FloatObject(value)
return value
def scale(self, sx: float, sy: float) -> "RectangleObject":
return RectangleObject(
(
float(self.left) * sx,
float(self.bottom) * sy,
float(self.right) * sx,
float(self.top) * sy,
)
)
def __repr__(self) -> str:
return f"RectangleObject({list(self)!r})"
@property
def left(self) -> FloatObject:
return self[0]
@left.setter
def left(self, f: float) -> None:
self[0] = FloatObject(f)
@property
def bottom(self) -> FloatObject:
return self[1]
@bottom.setter
def bottom(self, f: float) -> None:
self[1] = FloatObject(f)
@property
def right(self) -> FloatObject:
return self[2]
@right.setter
def right(self, f: float) -> None:
self[2] = FloatObject(f)
@property
def top(self) -> FloatObject:
return self[3]
@top.setter
def top(self, f: float) -> None:
self[3] = FloatObject(f)
@property
def lower_left(self) -> tuple[float, float]:
"""
Property to read and modify the lower left coordinate of this box
in (x,y) form.
"""
return self.left, self.bottom
@lower_left.setter
def lower_left(self, value: tuple[float, float]) -> None:
self[0], self[1] = (self._ensure_is_number(x) for x in value)
@property
def lower_right(self) -> tuple[float, float]:
"""
Property to read and modify the lower right coordinate of this box
in (x,y) form.
"""
return self.right, self.bottom
@lower_right.setter
def lower_right(self, value: tuple[float, float]) -> None:
self[2], self[1] = (self._ensure_is_number(x) for x in value)
@property
def upper_left(self) -> tuple[float, float]:
"""
Property to read and modify the upper left coordinate of this box
in (x,y) form.
"""
return self.left, self.top
@upper_left.setter
def upper_left(self, value: tuple[float, float]) -> None:
self[0], self[3] = (self._ensure_is_number(x) for x in value)
@property
def upper_right(self) -> tuple[float, float]:
"""
Property to read and modify the upper right coordinate of this box
in (x,y) form.
"""
return self.right, self.top
@upper_right.setter
def upper_right(self, value: tuple[float, float]) -> None:
self[2], self[3] = (self._ensure_is_number(x) for x in value)
@property
def width(self) -> float:
return self.right - self.left
@property
def height(self) -> float:
return self.top - self.bottom

View File

@@ -0,0 +1,208 @@
import codecs
from typing import Union
from .._codecs import _pdfdoc_encoding
from .._utils import StreamType, logger_warning, read_non_whitespace
from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
from ._base import ByteStringObject, TextStringObject
def hex_to_rgb(value: str) -> tuple[float, float, float]:
return tuple(int(value.lstrip("#")[i : i + 2], 16) / 255.0 for i in (0, 2, 4)) # type: ignore
def read_hex_string_from_stream(
stream: StreamType,
forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union["TextStringObject", "ByteStringObject"]:
stream.read(1)
arr = []
x = b""
while True:
tok = read_non_whitespace(stream)
if not tok:
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
if tok == b">":
break
x += tok
if len(x) == 2:
arr.append(int(x, base=16))
x = b""
if len(x) == 1:
x += b"0"
if x != b"":
arr.append(int(x, base=16))
return create_string_object(bytes(arr), forced_encoding)
__ESCAPE_DICT__ = {
b"n": ord(b"\n"),
b"r": ord(b"\r"),
b"t": ord(b"\t"),
b"b": ord(b"\b"),
b"f": ord(b"\f"),
b"(": ord(b"("),
b")": ord(b")"),
b"/": ord(b"/"),
b"\\": ord(b"\\"),
b" ": ord(b" "),
b"%": ord(b"%"),
b"<": ord(b"<"),
b">": ord(b">"),
b"[": ord(b"["),
b"]": ord(b"]"),
b"#": ord(b"#"),
b"_": ord(b"_"),
b"&": ord(b"&"),
b"$": ord(b"$"),
}
__BACKSLASH_CODE__ = 92
def read_string_from_stream(
stream: StreamType,
forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union["TextStringObject", "ByteStringObject"]:
tok = stream.read(1)
parens = 1
txt = []
while True:
tok = stream.read(1)
if not tok:
raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
if tok == b"(":
parens += 1
elif tok == b")":
parens -= 1
if parens == 0:
break
elif tok == b"\\":
tok = stream.read(1)
try:
txt.append(__ESCAPE_DICT__[tok])
continue
except KeyError:
if b"0" <= tok <= b"7":
# "The number ddd may consist of one, two, or three
# octal digits; high-order overflow shall be ignored.
# Three octal digits shall be used, with leading zeros
# as needed, if the next character of the string is also
# a digit." (PDF reference 7.3.4.2, p 16)
sav = stream.tell() - 1
for _ in range(2):
ntok = stream.read(1)
if b"0" <= ntok <= b"7":
tok += ntok
else:
stream.seek(-1, 1) # ntok has to be analyzed
break
i = int(tok, base=8)
if i > 255:
txt.append(__BACKSLASH_CODE__)
stream.seek(sav)
else:
txt.append(i)
continue
if tok in b"\n\r":
# This case is hit when a backslash followed by a line
# break occurs. If it's a multi-char EOL, consume the
# second character:
tok = stream.read(1)
if tok not in b"\n\r":
stream.seek(-1, 1)
# Then don't add anything to the actual string, since this
# line break was escaped:
continue
msg = f"Unexpected escaped string: {tok.decode('utf-8', 'ignore')}"
logger_warning(msg, __name__)
txt.append(__BACKSLASH_CODE__)
txt.append(ord(tok))
return create_string_object(bytes(txt), forced_encoding)
def create_string_object(
string: Union[str, bytes],
forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union[TextStringObject, ByteStringObject]:
"""
Create a ByteStringObject or a TextStringObject from a string to represent the string.
Args:
string: The data being used
forced_encoding: Typically None, or an encoding string
Returns:
A ByteStringObject
Raises:
TypeError: If string is not of type str or bytes.
"""
if isinstance(string, str):
return TextStringObject(string)
if isinstance(string, bytes):
if isinstance(forced_encoding, (list, dict)):
out = ""
for x in string:
try:
out += forced_encoding[x]
except Exception:
out += bytes((x,)).decode("charmap")
obj = TextStringObject(out)
obj._original_bytes = string
return obj
if isinstance(forced_encoding, str):
if forced_encoding == "bytes":
return ByteStringObject(string)
obj = TextStringObject(string.decode(forced_encoding))
obj._original_bytes = string
return obj
try:
if string.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
retval = TextStringObject(string.decode("utf-16"))
retval._original_bytes = string
retval.autodetect_utf16 = True
retval.utf16_bom = string[:2]
return retval
if string.startswith(b"\x00"):
retval = TextStringObject(string.decode("utf-16be"))
retval._original_bytes = string
retval.autodetect_utf16 = True
retval.utf16_bom = codecs.BOM_UTF16_BE
return retval
if string[1:2] == b"\x00":
retval = TextStringObject(string.decode("utf-16le"))
retval._original_bytes = string
retval.autodetect_utf16 = True
retval.utf16_bom = codecs.BOM_UTF16_LE
return retval
# This is probably a big performance hit here, but we need
# to convert string objects into the text/unicode-aware
# version if possible... and the only way to check if that's
# possible is to try.
# Some strings are strings, some are just byte arrays.
retval = TextStringObject(decode_pdfdocencoding(string))
retval._original_bytes = string
retval.autodetect_pdfdocencoding = True
return retval
except UnicodeDecodeError:
return ByteStringObject(string)
else:
raise TypeError("create_string_object should have str or unicode arg")
def decode_pdfdocencoding(byte_array: bytes) -> str:
retval = ""
for b in byte_array:
c = _pdfdoc_encoding[b]
if c == "\u0000":
raise UnicodeDecodeError(
"pdfdocencoding",
bytearray(b),
-1,
-1,
"does not exist in translation table",
)
retval += c
return retval

View File

@@ -0,0 +1,163 @@
# Copyright (c) 2023, Pubpub-ZZ
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
from typing import (
Any,
Optional,
)
from ._base import BooleanObject, NameObject, NumberObject, is_null_or_none
from ._data_structures import ArrayObject, DictionaryObject
f_obj = BooleanObject(False)
class ViewerPreferences(DictionaryObject):
def __init__(self, obj: Optional[DictionaryObject] = None) -> None:
super().__init__(self)
if not is_null_or_none(obj):
self.update(obj.items()) # type: ignore
try:
self.indirect_reference = obj.indirect_reference # type: ignore
except AttributeError:
pass
def _get_bool(self, key: str, default: Optional[BooleanObject]) -> Optional[BooleanObject]:
return self.get(key, default)
def _set_bool(self, key: str, v: bool) -> None:
self[NameObject(key)] = BooleanObject(v is True)
def _get_name(self, key: str, default: Optional[NameObject]) -> Optional[NameObject]:
return self.get(key, default)
def _set_name(self, key: str, lst: list[str], v: NameObject) -> None:
if v[0] != "/":
raise ValueError(f"{v} does not start with '/'")
if lst != [] and v not in lst:
raise ValueError(f"{v} is an unacceptable value")
self[NameObject(key)] = NameObject(v)
def _get_arr(self, key: str, default: Optional[list[Any]]) -> Optional[ArrayObject]:
return self.get(key, None if default is None else ArrayObject(default))
def _set_arr(self, key: str, v: Optional[ArrayObject]) -> None:
if v is None:
try:
del self[NameObject(key)]
except KeyError:
pass
return
if not isinstance(v, ArrayObject):
raise ValueError("ArrayObject is expected")
self[NameObject(key)] = v
def _get_int(self, key: str, default: Optional[NumberObject]) -> Optional[NumberObject]:
return self.get(key, default)
def _set_int(self, key: str, v: int) -> None:
self[NameObject(key)] = NumberObject(v)
@property
def PRINT_SCALING(self) -> NameObject:
return NameObject("/PrintScaling")
def __new__(cls: Any, value: Any = None) -> "ViewerPreferences":
def _add_prop_bool(key: str, default: Optional[BooleanObject]) -> property:
return property(
lambda self: self._get_bool(key, default),
lambda self, v: self._set_bool(key, v),
None,
f"""
Returns/Modify the status of {key}, Returns {default} if not defined
""",
)
def _add_prop_name(
key: str, lst: list[str], default: Optional[NameObject]
) -> property:
return property(
lambda self: self._get_name(key, default),
lambda self, v: self._set_name(key, lst, v),
None,
f"""
Returns/Modify the status of {key}, Returns {default} if not defined.
Acceptable values: {lst}
""",
)
def _add_prop_arr(key: str, default: Optional[ArrayObject]) -> property:
return property(
lambda self: self._get_arr(key, default),
lambda self, v: self._set_arr(key, v),
None,
f"""
Returns/Modify the status of {key}, Returns {default} if not defined
""",
)
def _add_prop_int(key: str, default: Optional[int]) -> property:
return property(
lambda self: self._get_int(key, default),
lambda self, v: self._set_int(key, v),
None,
f"""
Returns/Modify the status of {key}, Returns {default} if not defined
""",
)
cls.hide_toolbar = _add_prop_bool("/HideToolbar", f_obj)
cls.hide_menubar = _add_prop_bool("/HideMenubar", f_obj)
cls.hide_windowui = _add_prop_bool("/HideWindowUI", f_obj)
cls.fit_window = _add_prop_bool("/FitWindow", f_obj)
cls.center_window = _add_prop_bool("/CenterWindow", f_obj)
cls.display_doctitle = _add_prop_bool("/DisplayDocTitle", f_obj)
cls.non_fullscreen_pagemode = _add_prop_name(
"/NonFullScreenPageMode",
["/UseNone", "/UseOutlines", "/UseThumbs", "/UseOC"],
NameObject("/UseNone"),
)
cls.direction = _add_prop_name(
"/Direction", ["/L2R", "/R2L"], NameObject("/L2R")
)
cls.view_area = _add_prop_name("/ViewArea", [], None)
cls.view_clip = _add_prop_name("/ViewClip", [], None)
cls.print_area = _add_prop_name("/PrintArea", [], None)
cls.print_clip = _add_prop_name("/PrintClip", [], None)
cls.print_scaling = _add_prop_name("/PrintScaling", [], None)
cls.duplex = _add_prop_name(
"/Duplex", ["/Simplex", "/DuplexFlipShortEdge", "/DuplexFlipLongEdge"], None
)
cls.pick_tray_by_pdfsize = _add_prop_bool("/PickTrayByPDFSize", None)
cls.print_pagerange = _add_prop_arr("/PrintPageRange", None)
cls.num_copies = _add_prop_int("/NumCopies", None)
cls.enforce = _add_prop_arr("/Enforce", ArrayObject())
return DictionaryObject.__new__(cls)