Update ashboard, dashboard, memory +1 more (+2 ~3)
This commit is contained in:
289
venv/lib/python3.12/site-packages/pypdf/_page_labels.py
Normal file
289
venv/lib/python3.12/site-packages/pypdf/_page_labels.py
Normal file
@@ -0,0 +1,289 @@
|
||||
"""
|
||||
Page labels are shown by PDF viewers as "the page number".
|
||||
|
||||
A page has a numeric index, starting at 0. Additionally, the page
|
||||
has a label. In the most simple case:
|
||||
|
||||
label = index + 1
|
||||
|
||||
However, the title page and the table of contents might have Roman numerals as
|
||||
page labels. This makes things more complicated.
|
||||
|
||||
Example 1
|
||||
---------
|
||||
|
||||
>>> reader.root_object["/PageLabels"]["/Nums"]
|
||||
[0, IndirectObject(18, 0, 139929798197504),
|
||||
8, IndirectObject(19, 0, 139929798197504)]
|
||||
>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
|
||||
{'/S': '/r'}
|
||||
>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
|
||||
{'/S': '/D'}
|
||||
|
||||
Example 2
|
||||
---------
|
||||
The following is a document with pages labeled
|
||||
i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...
|
||||
|
||||
1 0 obj
|
||||
<< /Type /Catalog
|
||||
/PageLabels << /Nums [
|
||||
0 << /S /r >>
|
||||
4 << /S /D >>
|
||||
7 << /S /D
|
||||
/P ( A- )
|
||||
/St 8
|
||||
>>
|
||||
% A number tree containing
|
||||
% three page label dictionaries
|
||||
]
|
||||
>>
|
||||
...
|
||||
>>
|
||||
endobj
|
||||
|
||||
|
||||
§12.4.2 PDF Specification 1.7 and 2.0
|
||||
=====================================
|
||||
|
||||
Entries in a page label dictionary
|
||||
----------------------------------
|
||||
The /S key:
|
||||
D Decimal Arabic numerals
|
||||
R Uppercase Roman numerals
|
||||
r Lowercase Roman numerals
|
||||
A Uppercase letters (A to Z for the first 26 pages,
|
||||
AA to ZZ for the next 26, and so on)
|
||||
a Lowercase letters (a to z for the first 26 pages,
|
||||
aa to zz for the next 26, and so on)
|
||||
"""
|
||||
|
||||
from collections.abc import Iterator
|
||||
from typing import Optional, cast
|
||||
|
||||
from ._protocols import PdfCommonDocProtocol
|
||||
from ._utils import logger_warning
|
||||
from .generic import (
|
||||
ArrayObject,
|
||||
DictionaryObject,
|
||||
NullObject,
|
||||
NumberObject,
|
||||
is_null_or_none,
|
||||
)
|
||||
|
||||
|
||||
def number2uppercase_roman_numeral(num: int) -> str:
|
||||
roman = [
|
||||
(1000, "M"),
|
||||
(900, "CM"),
|
||||
(500, "D"),
|
||||
(400, "CD"),
|
||||
(100, "C"),
|
||||
(90, "XC"),
|
||||
(50, "L"),
|
||||
(40, "XL"),
|
||||
(10, "X"),
|
||||
(9, "IX"),
|
||||
(5, "V"),
|
||||
(4, "IV"),
|
||||
(1, "I"),
|
||||
]
|
||||
|
||||
def roman_num(num: int) -> Iterator[str]:
|
||||
for decimal, roman_repr in roman:
|
||||
x, _ = divmod(num, decimal)
|
||||
yield roman_repr * x
|
||||
num -= decimal * x
|
||||
if num <= 0:
|
||||
break
|
||||
|
||||
return "".join(list(roman_num(num)))
|
||||
|
||||
|
||||
def number2lowercase_roman_numeral(number: int) -> str:
|
||||
return number2uppercase_roman_numeral(number).lower()
|
||||
|
||||
|
||||
def number2uppercase_letter(number: int) -> str:
|
||||
if number <= 0:
|
||||
raise ValueError("Expecting a positive number")
|
||||
alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
|
||||
rep = ""
|
||||
while number > 0:
|
||||
remainder = number % 26
|
||||
if remainder == 0:
|
||||
remainder = 26
|
||||
rep = alphabet[remainder - 1] + rep
|
||||
# update
|
||||
number -= remainder
|
||||
number = number // 26
|
||||
return rep
|
||||
|
||||
|
||||
def number2lowercase_letter(number: int) -> str:
|
||||
return number2uppercase_letter(number).lower()
|
||||
|
||||
|
||||
def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
|
||||
# [Nums] shall be an array of the form
|
||||
# [ key_1 value_1 key_2 value_2 ... key_n value_n ]
|
||||
# where each key_i is an integer and the corresponding
|
||||
# value_i shall be the object associated with that key.
|
||||
# The keys shall be sorted in numerical order,
|
||||
# analogously to the arrangement of keys in a name tree
|
||||
# as described in 7.9.6, "Name Trees."
|
||||
nums = cast(ArrayObject, dictionary_object["/Nums"])
|
||||
i = 0
|
||||
value = None
|
||||
start_index = 0
|
||||
while i < len(nums):
|
||||
start_index = nums[i]
|
||||
value = nums[i + 1].get_object()
|
||||
if i + 2 == len(nums):
|
||||
break
|
||||
if nums[i + 2] > index:
|
||||
break
|
||||
i += 2
|
||||
m = {
|
||||
None: lambda _: "",
|
||||
"/D": lambda n: str(n),
|
||||
"/R": number2uppercase_roman_numeral,
|
||||
"/r": number2lowercase_roman_numeral,
|
||||
"/A": number2uppercase_letter,
|
||||
"/a": number2lowercase_letter,
|
||||
}
|
||||
# if /Nums array is not following the specification or if /Nums is empty
|
||||
if not isinstance(value, dict):
|
||||
return str(index + 1) # Fallback
|
||||
start = value.get("/St", 1)
|
||||
prefix = value.get("/P", "")
|
||||
return prefix + m[value.get("/S")](index - start_index + start)
|
||||
|
||||
|
||||
def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
|
||||
"""
|
||||
See 7.9.7 "Number Trees".
|
||||
|
||||
Args:
|
||||
reader: The PdfReader
|
||||
index: The index of the page
|
||||
|
||||
Returns:
|
||||
The label of the page, e.g. "iv" or "4".
|
||||
|
||||
"""
|
||||
root = cast(DictionaryObject, reader.root_object)
|
||||
if "/PageLabels" not in root:
|
||||
return str(index + 1) # Fallback
|
||||
number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
|
||||
if "/Nums" in number_tree:
|
||||
return get_label_from_nums(number_tree, index)
|
||||
if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
|
||||
# number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
|
||||
# Limit maximum depth.
|
||||
level = 0
|
||||
while level < 100:
|
||||
kids = cast(list[DictionaryObject], number_tree["/Kids"])
|
||||
for kid in kids:
|
||||
# kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
|
||||
limits = cast(list[int], kid["/Limits"])
|
||||
if limits[0] <= index <= limits[1]:
|
||||
if not is_null_or_none(kid.get("/Kids", None)):
|
||||
# Recursive definition.
|
||||
level += 1
|
||||
if level == 100: # pragma: no cover
|
||||
raise NotImplementedError(
|
||||
"Too deep nesting is not supported."
|
||||
)
|
||||
number_tree = kid
|
||||
# Exit the inner `for` loop and continue at the next level with the
|
||||
# next iteration of the `while` loop.
|
||||
break
|
||||
return get_label_from_nums(kid, index)
|
||||
else:
|
||||
# When there are no kids, make sure to exit the `while` loop directly
|
||||
# and continue with the fallback.
|
||||
break
|
||||
|
||||
logger_warning(f"Could not reliably determine page label for {index}.", __name__)
|
||||
return str(index + 1) # Fallback if neither /Nums nor /Kids is in the number_tree
|
||||
|
||||
|
||||
def nums_insert(
|
||||
key: NumberObject,
|
||||
value: DictionaryObject,
|
||||
nums: ArrayObject,
|
||||
) -> None:
|
||||
"""
|
||||
Insert a key, value pair in a Nums array.
|
||||
|
||||
See 7.9.7 "Number Trees".
|
||||
|
||||
Args:
|
||||
key: number key of the entry
|
||||
value: value of the entry
|
||||
nums: Nums array to modify
|
||||
|
||||
"""
|
||||
if len(nums) % 2 != 0:
|
||||
raise ValueError("A nums like array must have an even number of elements")
|
||||
|
||||
i = len(nums)
|
||||
while i != 0 and key <= nums[i - 2]:
|
||||
i = i - 2
|
||||
|
||||
if i < len(nums) and key == nums[i]:
|
||||
nums[i + 1] = value
|
||||
else:
|
||||
nums.insert(i, key)
|
||||
nums.insert(i + 1, value)
|
||||
|
||||
|
||||
def nums_clear_range(
|
||||
key: NumberObject,
|
||||
page_index_to: int,
|
||||
nums: ArrayObject,
|
||||
) -> None:
|
||||
"""
|
||||
Remove all entries in a number tree in a range after an entry.
|
||||
|
||||
See 7.9.7 "Number Trees".
|
||||
|
||||
Args:
|
||||
key: number key of the entry before the range
|
||||
page_index_to: The page index of the upper limit of the range
|
||||
nums: Nums array to modify
|
||||
|
||||
"""
|
||||
if len(nums) % 2 != 0:
|
||||
raise ValueError("A nums like array must have an even number of elements")
|
||||
if page_index_to < key:
|
||||
raise ValueError("page_index_to must be greater or equal than key")
|
||||
|
||||
i = nums.index(key) + 2
|
||||
while i < len(nums) and nums[i] <= page_index_to:
|
||||
nums.pop(i)
|
||||
nums.pop(i)
|
||||
|
||||
|
||||
def nums_next(
|
||||
key: NumberObject,
|
||||
nums: ArrayObject,
|
||||
) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]:
|
||||
"""
|
||||
Return the (key, value) pair of the entry after the given one.
|
||||
|
||||
See 7.9.7 "Number Trees".
|
||||
|
||||
Args:
|
||||
key: number key of the entry
|
||||
nums: Nums array
|
||||
|
||||
"""
|
||||
if len(nums) % 2 != 0:
|
||||
raise ValueError("A nums like array must have an even number of elements")
|
||||
|
||||
i = nums.index(key) + 2
|
||||
if i < len(nums):
|
||||
return (nums[i], nums[i + 1])
|
||||
return (None, None)
|
||||
Reference in New Issue
Block a user