Update ashboard, dashboard, memory +1 more (+2 ~3)
This commit is contained in:
401
venv/lib/python3.12/site-packages/pypdf/generic/_files.py
Normal file
401
venv/lib/python3.12/site-packages/pypdf/generic/_files.py
Normal file
@@ -0,0 +1,401 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import bisect
|
||||
from functools import cached_property
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from pypdf._utils import format_iso8824_date, parse_iso8824_date
|
||||
from pypdf.constants import CatalogAttributes as CA
|
||||
from pypdf.constants import FileSpecificationDictionaryEntries
|
||||
from pypdf.constants import PageAttributes as PG
|
||||
from pypdf.errors import PdfReadError, PyPdfError
|
||||
from pypdf.generic import (
|
||||
ArrayObject,
|
||||
ByteStringObject,
|
||||
DecodedStreamObject,
|
||||
DictionaryObject,
|
||||
NameObject,
|
||||
NullObject,
|
||||
NumberObject,
|
||||
StreamObject,
|
||||
TextStringObject,
|
||||
is_null_or_none,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import datetime
|
||||
from collections.abc import Generator
|
||||
|
||||
from pypdf._writer import PdfWriter
|
||||
|
||||
|
||||
class EmbeddedFile:
|
||||
"""
|
||||
Container holding the information on an embedded file.
|
||||
|
||||
Attributes are evaluated lazily if possible.
|
||||
|
||||
Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
|
||||
"""
|
||||
def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
|
||||
"""
|
||||
Args:
|
||||
name: The (primary) name as provided in the name tree.
|
||||
pdf_object: The corresponding PDF object to allow retrieving further data.
|
||||
parent: The parent list.
|
||||
"""
|
||||
self._name = name
|
||||
self.pdf_object = pdf_object
|
||||
self._parent = parent
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The (primary) name of the embedded file as provided in the name tree."""
|
||||
return self._name
|
||||
|
||||
@classmethod
|
||||
def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> EmbeddedFile:
|
||||
"""
|
||||
Create a new embedded file and add it to the PdfWriter.
|
||||
|
||||
Args:
|
||||
writer: The PdfWriter instance to add the embedded file to.
|
||||
name: The filename to display.
|
||||
content: The data in the file.
|
||||
|
||||
Returns:
|
||||
EmbeddedFile instance for the newly created embedded file.
|
||||
"""
|
||||
# Convert string content to bytes if needed
|
||||
if isinstance(content, str):
|
||||
content = content.encode("latin-1")
|
||||
|
||||
# Create the file entry (the actual embedded file stream)
|
||||
file_entry = DecodedStreamObject()
|
||||
file_entry.set_data(content)
|
||||
file_entry.update({NameObject(PG.TYPE): NameObject("/EmbeddedFile")})
|
||||
|
||||
# Create the /EF entry
|
||||
ef_entry = DictionaryObject()
|
||||
ef_entry.update({NameObject("/F"): writer._add_object(file_entry)})
|
||||
|
||||
# Create the filespec dictionary
|
||||
from pypdf.generic import create_string_object # noqa: PLC0415
|
||||
filespec = DictionaryObject()
|
||||
filespec_reference = writer._add_object(filespec)
|
||||
name_object = cast(TextStringObject, create_string_object(name))
|
||||
filespec.update(
|
||||
{
|
||||
NameObject(PG.TYPE): NameObject("/Filespec"),
|
||||
NameObject(FileSpecificationDictionaryEntries.F): name_object,
|
||||
NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
|
||||
}
|
||||
)
|
||||
|
||||
# Add the name and filespec to the names array.
|
||||
# We use the inverse order for insertion, as this allows us to re-use the
|
||||
# same index.
|
||||
names_array = cls._get_names_array(writer)
|
||||
insertion_index = cls._get_insertion_index(names_array, name_object)
|
||||
names_array.insert(insertion_index, filespec_reference)
|
||||
names_array.insert(insertion_index, name_object)
|
||||
|
||||
# Return an EmbeddedFile instance
|
||||
return cls(name=name, pdf_object=filespec, parent=names_array)
|
||||
|
||||
@classmethod
|
||||
def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
|
||||
"""Get the names array for embedded files, possibly creating and flattening it."""
|
||||
if CA.NAMES not in writer.root_object:
|
||||
# Add the /Names entry to the catalog.
|
||||
writer.root_object[NameObject(CA.NAMES)] = writer._add_object(DictionaryObject())
|
||||
|
||||
names_dict = cast(DictionaryObject, writer.root_object[CA.NAMES])
|
||||
if "/EmbeddedFiles" not in names_dict:
|
||||
# We do not yet have an entry for embedded files. Create and return it.
|
||||
names = ArrayObject()
|
||||
embedded_files_names_dictionary = DictionaryObject(
|
||||
{NameObject(CA.NAMES): names}
|
||||
)
|
||||
names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
|
||||
return names
|
||||
|
||||
# We have an existing embedded files entry.
|
||||
embedded_files_names_tree = cast(DictionaryObject, names_dict["/EmbeddedFiles"])
|
||||
if "/Names" in embedded_files_names_tree:
|
||||
# Simple case: We already have a flat list.
|
||||
return cast(ArrayObject, embedded_files_names_tree[NameObject(CA.NAMES)])
|
||||
if "/Kids" not in embedded_files_names_tree:
|
||||
# Invalid case: This is no name tree.
|
||||
raise PdfReadError("Got neither Names nor Kids in embedded files tree.")
|
||||
|
||||
# Complex case: Convert a /Kids-based name tree to a /Names-based one.
|
||||
# /Name-based ones are much easier to handle and allow us to simplify the
|
||||
# actual insertion logic by only having to consider one case.
|
||||
names = ArrayObject()
|
||||
kids = cast(ArrayObject, embedded_files_names_tree["/Kids"].get_object())
|
||||
embedded_files_names_dictionary = DictionaryObject(
|
||||
{NameObject(CA.NAMES): names}
|
||||
)
|
||||
names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
|
||||
for kid in kids:
|
||||
# Write the flattened file entries. As we do not change the actual files,
|
||||
# this should not have any impact on references to them.
|
||||
# There might be further (nested) kids here.
|
||||
# Wait for an example before evaluating an implementation.
|
||||
for name in kid.get_object().get("/Names", []):
|
||||
names.append(name)
|
||||
return names
|
||||
|
||||
@classmethod
|
||||
def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
|
||||
keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
|
||||
name_bytes = name.encode("utf-8")
|
||||
|
||||
start = bisect.bisect_left(keys, name_bytes)
|
||||
end = bisect.bisect_right(keys, name_bytes)
|
||||
|
||||
if start != end:
|
||||
return end * 2
|
||||
if start == 0:
|
||||
return 0
|
||||
if start == (key_count := len(keys)):
|
||||
return key_count * 2
|
||||
return end * 2
|
||||
|
||||
@property
|
||||
def alternative_name(self) -> str | None:
|
||||
"""Retrieve the alternative name (file specification)."""
|
||||
for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
|
||||
# PDF 2.0 reference, table 43:
|
||||
# > A PDF reader shall use the value of the UF key, when present, instead of the F key.
|
||||
if key in self.pdf_object:
|
||||
value = self.pdf_object[key].get_object()
|
||||
if not is_null_or_none(value):
|
||||
return cast(str, value)
|
||||
return None
|
||||
|
||||
@alternative_name.setter
|
||||
def alternative_name(self, value: TextStringObject | None) -> None:
|
||||
"""Set the alternative name (file specification)."""
|
||||
if value is None:
|
||||
if FileSpecificationDictionaryEntries.UF in self.pdf_object:
|
||||
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = NullObject()
|
||||
if FileSpecificationDictionaryEntries.F in self.pdf_object:
|
||||
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = NullObject()
|
||||
else:
|
||||
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = value
|
||||
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = value
|
||||
|
||||
@property
|
||||
def description(self) -> str | None:
|
||||
"""Retrieve the description."""
|
||||
value = self.pdf_object.get(FileSpecificationDictionaryEntries.DESC)
|
||||
if is_null_or_none(value):
|
||||
return None
|
||||
return value
|
||||
|
||||
@description.setter
|
||||
def description(self, value: TextStringObject | None) -> None:
|
||||
"""Set the description."""
|
||||
if value is None:
|
||||
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = NullObject()
|
||||
else:
|
||||
self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = value
|
||||
|
||||
@property
|
||||
def associated_file_relationship(self) -> str:
|
||||
"""Retrieve the relationship of the referring document to this embedded file."""
|
||||
return self.pdf_object.get("/AFRelationship", "/Unspecified")
|
||||
|
||||
@associated_file_relationship.setter
|
||||
def associated_file_relationship(self, value: NameObject) -> None:
|
||||
"""Set the relationship of the referring document to this embedded file."""
|
||||
self.pdf_object[NameObject("/AFRelationship")] = value
|
||||
|
||||
@property
|
||||
def _embedded_file(self) -> StreamObject:
|
||||
"""Retrieve the actual embedded file stream."""
|
||||
if "/EF" not in self.pdf_object:
|
||||
raise PdfReadError(f"/EF entry not found: {self.pdf_object}")
|
||||
ef = cast(DictionaryObject, self.pdf_object["/EF"])
|
||||
for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
|
||||
if key in ef:
|
||||
return cast(StreamObject, ef[key].get_object())
|
||||
raise PdfReadError(f"No /(U)F key found in file dictionary: {ef}")
|
||||
|
||||
@property
|
||||
def _params(self) -> DictionaryObject:
|
||||
"""Retrieve the file-specific parameters."""
|
||||
return self._embedded_file.get("/Params", DictionaryObject()).get_object()
|
||||
|
||||
@cached_property
|
||||
def _ensure_params(self) -> DictionaryObject:
|
||||
"""Ensure the /Params dictionary exists and return it."""
|
||||
embedded_file = self._embedded_file
|
||||
if "/Params" not in embedded_file:
|
||||
embedded_file[NameObject("/Params")] = DictionaryObject()
|
||||
return cast(DictionaryObject, embedded_file["/Params"])
|
||||
|
||||
@property
|
||||
def subtype(self) -> str | None:
|
||||
"""Retrieve the subtype. This is a MIME media type, prefixed by a slash."""
|
||||
value = self._embedded_file.get("/Subtype")
|
||||
if is_null_or_none(value):
|
||||
return None
|
||||
return value
|
||||
|
||||
@subtype.setter
|
||||
def subtype(self, value: NameObject | None) -> None:
|
||||
"""Set the subtype. This should be a MIME media type, prefixed by a slash."""
|
||||
embedded_file = self._embedded_file
|
||||
if value is None:
|
||||
embedded_file[NameObject("/Subtype")] = NullObject()
|
||||
else:
|
||||
embedded_file[NameObject("/Subtype")] = value
|
||||
|
||||
@property
|
||||
def content(self) -> bytes:
|
||||
"""Retrieve the actual file content."""
|
||||
return self._embedded_file.get_data()
|
||||
|
||||
@content.setter
|
||||
def content(self, value: str | bytes) -> None:
|
||||
"""Set the file content."""
|
||||
if isinstance(value, str):
|
||||
value = value.encode("latin-1")
|
||||
self._embedded_file.set_data(value)
|
||||
|
||||
@property
|
||||
def size(self) -> int | None:
|
||||
"""Retrieve the size of the uncompressed file in bytes."""
|
||||
value = self._params.get("/Size")
|
||||
if is_null_or_none(value):
|
||||
return None
|
||||
return value
|
||||
|
||||
@size.setter
|
||||
def size(self, value: NumberObject | None) -> None:
|
||||
"""Set the size of the uncompressed file in bytes."""
|
||||
params = self._ensure_params
|
||||
if value is None:
|
||||
params[NameObject("/Size")] = NullObject()
|
||||
else:
|
||||
params[NameObject("/Size")] = value
|
||||
|
||||
@property
|
||||
def creation_date(self) -> datetime.datetime | None:
|
||||
"""Retrieve the file creation datetime."""
|
||||
return parse_iso8824_date(self._params.get("/CreationDate"))
|
||||
|
||||
@creation_date.setter
|
||||
def creation_date(self, value: datetime.datetime | None) -> None:
|
||||
"""Set the file creation datetime."""
|
||||
params = self._ensure_params
|
||||
if value is None:
|
||||
params[NameObject("/CreationDate")] = NullObject()
|
||||
else:
|
||||
date_str = format_iso8824_date(value)
|
||||
params[NameObject("/CreationDate")] = TextStringObject(date_str)
|
||||
|
||||
@property
|
||||
def modification_date(self) -> datetime.datetime | None:
|
||||
"""Retrieve the datetime of the last file modification."""
|
||||
return parse_iso8824_date(self._params.get("/ModDate"))
|
||||
|
||||
@modification_date.setter
|
||||
def modification_date(self, value: datetime.datetime | None) -> None:
|
||||
"""Set the datetime of the last file modification."""
|
||||
params = self._ensure_params
|
||||
if value is None:
|
||||
params[NameObject("/ModDate")] = NullObject()
|
||||
else:
|
||||
date_str = format_iso8824_date(value)
|
||||
params[NameObject("/ModDate")] = TextStringObject(date_str)
|
||||
|
||||
@property
|
||||
def checksum(self) -> bytes | None:
|
||||
"""Retrieve the MD5 checksum of the (uncompressed) file."""
|
||||
value = self._params.get("/CheckSum")
|
||||
if is_null_or_none(value):
|
||||
return None
|
||||
return value
|
||||
|
||||
@checksum.setter
|
||||
def checksum(self, value: ByteStringObject | None) -> None:
|
||||
"""Set the MD5 checksum of the (uncompressed) file."""
|
||||
params = self._ensure_params
|
||||
if value is None:
|
||||
params[NameObject("/CheckSum")] = NullObject()
|
||||
else:
|
||||
params[NameObject("/CheckSum")] = value
|
||||
|
||||
def delete(self) -> None:
|
||||
"""Delete the file from the document."""
|
||||
if not self._parent:
|
||||
raise PyPdfError("Parent required to delete file from document.")
|
||||
if self.pdf_object in self._parent:
|
||||
index = self._parent.index(self.pdf_object)
|
||||
elif (
|
||||
(indirect_reference := getattr(self.pdf_object, "indirect_reference", None)) is not None
|
||||
and indirect_reference in self._parent
|
||||
):
|
||||
index = self._parent.index(indirect_reference)
|
||||
else:
|
||||
raise PyPdfError("File not found in parent object.")
|
||||
self._parent.pop(index) # Reference.
|
||||
self._parent.pop(index - 1) # Name.
|
||||
self.pdf_object = DictionaryObject() # Invalidate.
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<{self.__class__.__name__} name={self.name!r}>"
|
||||
|
||||
@classmethod
|
||||
def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
|
||||
"""
|
||||
Convert the given name tree into class instances.
|
||||
|
||||
Args:
|
||||
names: The name tree to load the data from.
|
||||
|
||||
Returns:
|
||||
Iterable of class instances for the files found.
|
||||
"""
|
||||
# This is a name tree of the format [name_1, reference_1, name_2, reference_2, ...]
|
||||
for i, name in enumerate(names):
|
||||
if not isinstance(name, str):
|
||||
# Skip plain strings and retrieve them as `direct_name` by index.
|
||||
file_dictionary = name.get_object()
|
||||
direct_name = names[i - 1].get_object()
|
||||
yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)
|
||||
|
||||
@classmethod
|
||||
def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
|
||||
"""
|
||||
Load the embedded files for the given document catalog.
|
||||
|
||||
This method and its signature are considered internal API and thus not exposed publicly for now.
|
||||
|
||||
Args:
|
||||
catalog: The document catalog to load from.
|
||||
|
||||
Returns:
|
||||
Iterable of class instances for the files found.
|
||||
"""
|
||||
try:
|
||||
container = cast(
|
||||
DictionaryObject,
|
||||
cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
|
||||
)
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
if "/Kids" in container:
|
||||
for kid in cast(ArrayObject, container["/Kids"].get_object()):
|
||||
# There might be further (nested) kids here.
|
||||
# Wait for an example before evaluating an implementation.
|
||||
kid = kid.get_object()
|
||||
if "/Names" in kid:
|
||||
yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
|
||||
if "/Names" in container:
|
||||
yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))
|
||||
Reference in New Issue
Block a user