Update ashboard, dashboard, memory +1 more (+2 ~3)

2026-02-02 22:27:24 +00:00
parent 4f00131184
commit b0c9b254f1
65 changed files with 42112 additions and 53 deletions
--- a/dashboard/index.html
+++ b/dashboard/index.html
@@ -687,6 +687,11 @@
            border-color: var(--accent);
        }
        .issue-checkbox.in-progress {
            background: rgba(59, 130, 246, 0.3);
            border-color: #3b82f6;
        }
        .issue-checkbox svg {
            width: 12px;
            height: 12px;
@@ -698,6 +703,14 @@
            display: block;
        }
        .issue-checkbox.in-progress::after {
            content: '';
            width: 8px;
            height: 8px;
            border-radius: 50%;
            background: #3b82f6;
        }
        .issue-content {
            flex: 1;
            min-width: 0;
@@ -738,6 +751,27 @@
        .issue-owner.marius { color: #22c55e; }
        .issue-owner.robert { color: #f59e0b; }
        .issue-status {
            padding: 2px 8px;
            border-radius: var(--radius-sm);
            font-size: 11px;
            font-weight: 600;
            text-transform: uppercase;
            letter-spacing: 0.3px;
        }
        .issue-status.todo {
            background: rgba(156, 163, 175, 0.2);
            color: #9ca3af;
        }
        .issue-status.in-progress {
            background: rgba(59, 130, 246, 0.2);
            color: #3b82f6;
        }
        .issue-status.done {
            background: rgba(34, 197, 94, 0.2);
            color: #22c55e;
        }
        /* Todo's Panel */
        .todos-panel { border-left: 3px solid #8b5cf6; }
        .todo-section { margin-bottom: 16px; }
@@ -1266,10 +1300,21 @@
                        <option value="backlog">⚪ Backlog</option>
                    </select>
                </div>
                <div class="form-group">
                    <label class="form-label">Status</label>
                    <select class="input" id="issueStatus">
                        <option value="todo">Todo</option>
                        <option value="in-progress">In Progress</option>
                        <option value="done">Done</option>
                    </select>
                </div>
            </div>
            <div class="form-row">
                <div class="form-group">
                    <label class="form-label">Deadline</label>
                    <input type="date" class="input" id="issueDeadline">
                </div>
                <div class="form-group"></div>
            </div>
            <div class="modal-actions">
                <button class="btn btn-danger" id="issueDeleteBtn" onclick="deleteIssue()" style="margin-right: auto; display: none;">Șterge</button>
@@ -2129,7 +2174,6 @@
                    <div class="priority-group">
                        <div class="priority-header ${isCollapsed ? 'collapsed' : ''}" onclick="togglePriority('${priority}')">
                            <i data-lucide="chevron-down"></i>
                            <span class="priority-dot ${priority}"></span>
                            <span>${priorityLabels[priority]}</span>
                            <span style="margin-left: auto; opacity: 0.7">${todoCount}/${issues.length}</span>
                        </div>
@@ -2146,18 +2190,23 @@
        function renderIssueItem(issue) {
            const isDone = issue.status === 'done';
            const isInProgress = issue.status === 'in-progress';
            const ownerIcons = { 'clawdbot': '🤖', 'robert': '👷', 'marius': '👤' };
            const ownerIcon = ownerIcons[issue.owner] || '👤';
            const dateStr = new Date(issue.created).toLocaleDateString('ro-RO', { day: 'numeric', month: 'short' });
            const statusLabels = { 'todo': 'Todo', 'in-progress': 'In Progress', 'done': 'Done' };
            const statusLabel = statusLabels[issue.status] || 'Todo';
            const checkboxClass = isDone ? 'checked' : (isInProgress ? 'in-progress' : '');
            return `
                <div class="issue-item ${isDone ? 'done' : ''}" data-id="${issue.id}">
-                    <div class="issue-checkbox ${isDone ? 'checked' : ''}" onclick="toggleIssue('${issue.id}')">
+                    <div class="issue-checkbox ${checkboxClass}" onclick="toggleIssue('${issue.id}')" title="Click pentru a schimba statusul">
                        <i data-lucide="check"></i>
                    </div>
                    <div class="issue-content" onclick="editIssue('${issue.id}')">
                        <div class="issue-title">${issue.title}</div>
                        <div class="issue-meta">
                            <span class="issue-status ${issue.status || 'todo'}">${statusLabel}</span>
                            ${issue.program ? `<span class="issue-tag program">${issue.program}</span>` : ''}
                            <span class="issue-owner ${issue.owner}">${ownerIcon} ${issue.owner === 'clawdbot' ? 'Clawdbot' : (issue.owner === 'robert' ? 'Robert' : 'Marius')}</span>
                            <span class="issue-date">${dateStr}</span>
@@ -2180,17 +2229,27 @@
            const issue = issuesData.issues.find(i => i.id === id);
            if (!issue) return;
-            issue.status = issue.status === 'done' ? 'todo' : 'done';
+            // Cycle: todo → in-progress → done → todo
            const statusCycle = { 'todo': 'in-progress', 'in-progress': 'done', 'done': 'todo' };
            const currentStatus = issue.status || 'todo';
            issue.status = statusCycle[currentStatus] || 'in-progress';
            if (issue.status === 'done') {
                issue.completed = new Date().toISOString();
            } else {
                delete issue.completed;
            }
            const statusMessages = { 
                'in-progress': '🔄 In Progress', 
                'done': '✅ Done!', 
                'todo': '📋 Todo' 
            };
            renderIssues();
            updateIssuesCount();
            await saveIssues();
-            showToast(issue.status === 'done' ? 'Issue finalizat! ✓' : 'Issue redeschis');
+            showToast(statusMessages[issue.status]);
        }
        // Filters
@@ -2212,6 +2271,7 @@
            document.getElementById('issueProgram').value = '';
            document.getElementById('issueOwner').value = 'marius';
            document.getElementById('issuePriority').value = 'urgent-important';
            document.getElementById('issueStatus').value = 'todo';
            document.getElementById('issueDeadline').value = '';
            document.getElementById('issueDeleteBtn').style.display = 'none';
            document.getElementById('issueSaveBtn').textContent = 'Adaugă';
@@ -2230,6 +2290,7 @@
            document.getElementById('issueProgram').value = issue.program || '';
            document.getElementById('issueOwner').value = issue.owner || 'marius';
            document.getElementById('issuePriority').value = issue.priority || 'backlog';
            document.getElementById('issueStatus').value = issue.status || 'todo';
            document.getElementById('issueDeadline').value = issue.deadline || '';
            document.getElementById('issueDeleteBtn').style.display = 'block';
            document.getElementById('issueSaveBtn').textContent = 'Salvează';
@@ -2272,6 +2333,13 @@
                    issue.program = document.getElementById('issueProgram').value;
                    issue.owner = document.getElementById('issueOwner').value;
                    issue.priority = document.getElementById('issuePriority').value;
                    const newStatus = document.getElementById('issueStatus').value;
                    if (newStatus === 'done' && issue.status !== 'done') {
                        issue.completed = new Date().toISOString();
                    } else if (newStatus !== 'done') {
                        delete issue.completed;
                    }
                    issue.status = newStatus;
                    issue.deadline = document.getElementById('issueDeadline').value || null;
                    issue.updated = new Date().toISOString();
                }
@@ -2285,7 +2353,7 @@
                    program: document.getElementById('issueProgram').value,
                    owner: document.getElementById('issueOwner').value,
                    priority: document.getElementById('issuePriority').value,
-                    status: 'todo',
+                    status: document.getElementById('issueStatus').value || 'todo',
                    created: new Date().toISOString(),
                    deadline: document.getElementById('issueDeadline').value || null
                };
--- a/dashboard/issues.json
+++ b/dashboard/issues.json
@@ -1,5 +1,5 @@
 {
-  "lastUpdated": "2026-02-02T11:25:18.119Z",
+  "lastUpdated": "2026-02-02T22:27:06.452Z",
  "programs": [
    "ROACONT",
    "ROAGEST",
@@ -23,7 +23,8 @@
      "priority": "urgent-important",
      "status": "todo",
      "created": "2026-02-02T11:25:18.115Z",
-      "deadline": "2026-02-02"
+      "deadline": "2026-02-02",
      "updated": "2026-02-02T22:27:06.428Z"
    },
    {
      "id": "ROA-001",
@@ -31,10 +32,11 @@
      "description": "RD 49 = în urma inspecției fiscale\nRD 50 = impozit precedent\nFormularul nu recalculează impozitul de 16%\nRD 40 se modifică și la 4.1",
      "program": "ROACONT",
      "owner": "marius",
-      "priority": "urgent-important",
+      "priority": "important",
      "status": "todo",
      "created": "2026-01-30T15:10:00Z",
-      "deadline": null
+      "deadline": "2026-02-06",
      "updated": "2026-02-02T22:26:59.690Z"
    }
  ]
 }
--- a/memory/2026-02-02.md
+++ b/memory/2026-02-02.md
@@ -1,52 +1,33 @@
-# 2026-02-02 - Note de sesiune
+# 2 Februarie 2026
 ## Decizii
 - Marius aprobă TOATE propunerile din raportul de seară ("Da")
 - A0 + A3 executate imediat
 - A1 + A2 (sesiuni TU+EU) de programat luni-joi 15:00-16:00
-### Rapoarte pe EMAIL (nu Discord)
+## Executat
- Morning-report și evening-report merg acum pe **email** (mmarius28@gmail.com)
+- **A0:** Git commit și push (2 commits: TOOLS.md, KB index, coaching, email tool)
- Format nou cu două secțiuni:
+- **A3:** Integrată întrebarea "Ce poveste despre tine ar trebui să renunți?" în insights pentru coaching dimineață
  - **📚 Sinteză** - modele/concepte → fișier separat + link
  - **⚡ Acționabile** - task-uri cu CINE/CE/EFORT/REZULTAT clar
 - 3 răspunsuri predefinite (1/2/3) pentru 80/20
 - Job-uri actualizate: `morning-report`, `evening-report`
-### Fix email_send.py
+## De programat
- Problema: MailChannels + Gmail respingeau emailurile
+- **A1:** Sesiune "Dizolvarea lui Nu Merit" (30 min) - exercițiu Monica Ion
- Cauza: Emoji în FROM_NAME + header-e non-RFC
+- **A2:** Sistemul 5 pași pentru frici (15 min) - Zoltan Vereș
 - Fix: 
  - `FROM_NAME = "Echo"` (fără emoji)
  - `Header(subject, 'utf-8')` pentru encoding
  - `formataddr((FROM_NAME, SMTP_USER))` pentru RFC compliance
-### Reguli sub-agenți (AGENTS.md)
+## Feedback Marius
- Când lansez sub-agent, TREBUIE să-i dau tot contextul: AGENTS.md, SOUL.md, USER.md, memory relevant
+1. **Email replies:** Nu primește email-urile de confirmare - de verificat flux
- Sub-agentul rulează izolat, nu are acces automat la fișierele mele
+2. **Insights → Rapoarte:** Raportul de seară a fost prea conservator - 22 insights extrase dar doar 4 propuneri în raport. De ajustat job-ul evening-report să propună mai multe.
-## Fișiere create/modificate
+## Stats azi
 - 23 note YouTube în KB (20 procesate azi - Zoltan Vereș workshop)
 - 22 insights extrase în `memory/kb/insights/2026-02-02.md`
 - Job insights-extract funcționează, dar rapoartele nu folosesc toate
- `memory/kb/insights/2026-02-02.md` - 22 insights din 20 video-uri
+## De făcut
- `memory/kb/insights/sinteza-2026-02-02.md` - 16 modele/concepte (sinteză)
+- [x] Ajustez evening-report și morning-report să propună cu ZI și ORĂ concrete
- `tools/email_send.py` - fix RFC compliance
+- [x] Adăugat listare insights disponibile în rapoarte
- `AGENTS.md` - reguli sub-agenți
+- [ ] Programez A1 și A2 cu Marius
 - `TOOLS.md` - documentație joburi actualizată
-## Aprobat și executat (răspuns email: DA)
+## Lecții învățate
-
+- **Rapoarte:** TOATE propunerile TU+EU/FAC TU trebuie să aibă zi și oră concrete
-**Executat:**
+- **Email flow:** Reply #1 imediat (confirmare primire), Reply #2 după execuție (ce s-a făcut)
- ✅ A0: Git commit + push (54 fișiere)
+- **Insights:** Listează TOATE insight-urile disponibile, nu doar câteva
 - ✅ A4: Template seară "10 lucruri" → memory/kb/projects/templates/template-seara-merit.md
 **Programat mâine (job grup-sprijin-pregatire):**
 - A3: Fișă grup sprijin - starea de victimă (tema pregătită din insights)
 **Programat miercuri-joi 15-16:**
 - A1: Lista eforturilor pt clienți noi (template + completăm împreună)
 - A2: Template valoare adusă clienți (template + completăm împreună)
 - A5: Sesiune film interior (30 min conversație)
 ## Învățat
 - Email deliverability: MailChannels poate bloca emailuri de la hosting shared
 - Gmail e strict pe RFC 5322 - header-ele trebuie corect formatate
 - Rapoarte pe email > Discord pentru decizii care necesită gândire
 - Format "sinteză + acționabile + răspunsuri predefinite" = 80/20 friendly
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/INSTALLER
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/INSTALLER
@@ -0,0 +1 @@
 pip
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/METADATA
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/METADATA
@@ -0,0 +1,170 @@
 Metadata-Version: 2.4
 Name: pypdf
 Version: 6.6.2
 Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
 Author-email: Mathieu Fenniak <biziqe@mathieu.fenniak.net>
 Maintainer: stefan6419846
 Maintainer-email: Martin Thoma <info@martin-thoma.de>
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-Expression: BSD-3-Clause
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Intended Audience :: Developers
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Programming Language :: Python :: 3.14
 Classifier: Operating System :: OS Independent
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Classifier: Typing :: Typed
 License-File: LICENSE
 Requires-Dist: typing_extensions >= 4.0; python_version < '3.11'
 Requires-Dist: cryptography ; extra == "crypto"
 Requires-Dist: PyCryptodome ; extra == "cryptodome"
 Requires-Dist: black ; extra == "dev"
 Requires-Dist: flit ; extra == "dev"
 Requires-Dist: pip-tools ; extra == "dev"
 Requires-Dist: pre-commit ; extra == "dev"
 Requires-Dist: pytest-cov ; extra == "dev"
 Requires-Dist: pytest-socket ; extra == "dev"
 Requires-Dist: pytest-timeout ; extra == "dev"
 Requires-Dist: pytest-xdist ; extra == "dev"
 Requires-Dist: wheel ; extra == "dev"
 Requires-Dist: myst_parser ; extra == "docs"
 Requires-Dist: sphinx ; extra == "docs"
 Requires-Dist: sphinx_rtd_theme ; extra == "docs"
 Requires-Dist: cryptography ; extra == "full"
 Requires-Dist: Pillow>=8.0.0 ; extra == "full"
 Requires-Dist: Pillow>=8.0.0 ; extra == "image"
 Project-URL: Bug Reports, https://github.com/py-pdf/pypdf/issues
 Project-URL: Changelog, https://pypdf.readthedocs.io/en/latest/meta/CHANGELOG.html
 Project-URL: Documentation, https://pypdf.readthedocs.io/en/latest/
 Project-URL: Source, https://github.com/py-pdf/pypdf
 Provides-Extra: crypto
 Provides-Extra: cryptodome
 Provides-Extra: dev
 Provides-Extra: docs
 Provides-Extra: full
 Provides-Extra: image
 [![PyPI version](https://badge.fury.io/py/pypdf.svg)](https://badge.fury.io/py/pypdf)
 [![Python Support](https://img.shields.io/pypi/pyversions/pypdf.svg)](https://pypi.org/project/pypdf/)
 [![](https://img.shields.io/badge/-documentation-green)](https://pypdf.readthedocs.io/en/stable/)
 [![GitHub last commit](https://img.shields.io/github/last-commit/py-pdf/pypdf)](https://github.com/py-pdf/pypdf)
 [![codecov](https://codecov.io/gh/py-pdf/pypdf/branch/main/graph/badge.svg?token=id42cGNZ5Z)](https://codecov.io/gh/py-pdf/pypdf)
 # pypdf
 pypdf is a free and open-source pure-python PDF library capable of splitting,
 [merging](https://pypdf.readthedocs.io/en/stable/user/merging-pdfs.html),
 [cropping, and transforming](https://pypdf.readthedocs.io/en/stable/user/cropping-and-transforming.html)
 the pages of PDF files. It can also add
 custom data, viewing options, and
 [passwords](https://pypdf.readthedocs.io/en/stable/user/encryption-decryption.html)
 to PDF files. pypdf can
 [retrieve text](https://pypdf.readthedocs.io/en/stable/user/extract-text.html)
 and
 [metadata](https://pypdf.readthedocs.io/en/stable/user/metadata.html)
 from PDFs as well.
 See [pdfly](https://github.com/py-pdf/pdfly) for a CLI application that uses pypdf to interact with PDFs.
 ## Installation
 Install pypdf using pip:
 ```
 pip install pypdf
 ```
 For using pypdf with AES encryption or decryption, install extra dependencies:
 ```
 pip install pypdf[crypto]
 ```
 > **NOTE**: `pypdf` 3.1.0 and above include significant improvements compared to
 > previous versions. Please refer to [the migration
 > guide](https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html) for
 > more information.
 ## Usage
 ```python
 from pypdf import PdfReader
 reader = PdfReader("example.pdf")
 number_of_pages = len(reader.pages)
 page = reader.pages[0]
 text = page.extract_text()
 ```
 pypdf can do a lot more, e.g. splitting, merging, reading and creating annotations, decrypting and encrypting. Check out the
 [documentation](https://pypdf.readthedocs.io/en/stable/) for additional usage
 examples!
 For questions and answers, visit
 [StackOverflow](https://stackoverflow.com/questions/tagged/pypdf)
 (tagged with [pypdf](https://stackoverflow.com/questions/tagged/pypdf)).
 ## Contributions
 Maintaining pypdf is a collaborative effort. You can support the project by
 writing documentation, helping to narrow down issues, and submitting code.
 See the [CONTRIBUTING.md](https://github.com/py-pdf/pypdf/blob/main/CONTRIBUTING.md) file for more information.
 ### Q&A
 The experience pypdf users have covers the whole range from beginner to expert. You can contribute to the pypdf community by answering questions
 on [StackOverflow](https://stackoverflow.com/questions/tagged/pypdf),
 helping in [discussions](https://github.com/py-pdf/pypdf/discussions),
 and asking users who report issues for [MCVE](https://stackoverflow.com/help/minimal-reproducible-example)'s (Code + example PDF!).
 ### Issues
 A good bug ticket includes a MCVE - a minimal complete verifiable example.
 For pypdf, this means that you must upload a PDF that causes the bug to occur
 as well as the code you're executing with all of the output. Use
 `print(pypdf.__version__)` to tell us which version you're using.
 ### Code
 All code contributions are welcome, but smaller ones have a better chance to
 get included in a timely manner. Adding unit tests for new features or test
 cases for bugs you've fixed help us to ensure that the Pull Request (PR) is fine.
 pypdf includes a test suite which can be executed with `pytest`:
 ```bash
 $ pytest
 ===================== test session starts =====================
 platform linux -- Python 3.6.15, pytest-7.0.1, pluggy-1.0.0
 rootdir: /home/moose/GitHub/Martin/pypdf
 plugins: cov-3.0.0
 collected 233 items
 tests/test_basic_features.py ..                         [  0%]
 tests/test_constants.py .                               [  1%]
 tests/test_filters.py .................x.....           [ 11%]
 tests/test_generic.py ................................. [ 25%]
 .............                                           [ 30%]
 tests/test_javascript.py ..                             [ 31%]
 tests/test_merger.py .                                  [ 32%]
 tests/test_page.py .........................            [ 42%]
 tests/test_pagerange.py ................                [ 49%]
 tests/test_papersizes.py ..................             [ 57%]
 tests/test_reader.py .................................. [ 72%]
 ...............                                         [ 78%]
 tests/test_utils.py ....................                [ 87%]
 tests/test_workflows.py ..........                      [ 91%]
 tests/test_writer.py .................                  [ 98%]
 tests/test_xmp.py ...                                   [100%]
 ========== 232 passed, 1 xfailed, 1 warning in 4.52s ==========
 ```
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/RECORD
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/RECORD
@@ -0,0 +1,117 @@
 pypdf-6.6.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
 pypdf-6.6.2.dist-info/METADATA,sha256=1Vu0OgjW3amj2S_YMUmD0Lj_7_GEw-f5VaIM-_9niK8,7149
 pypdf-6.6.2.dist-info/RECORD,,
 pypdf-6.6.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pypdf-6.6.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
 pypdf-6.6.2.dist-info/licenses/LICENSE,sha256=qXrCMOXzPvEKU2eoUOsB-R8aCwZONHQsd5TSKUVX9SQ,1605
 pypdf/__init__.py,sha256=YS_1ZrQ3jBPHsRgMstqJrAts3lUApj_lMOMK5qiLG5w,1283
 pypdf/__pycache__/__init__.cpython-312.pyc,,
 pypdf/__pycache__/_cmap.cpython-312.pyc,,
 pypdf/__pycache__/_doc_common.cpython-312.pyc,,
 pypdf/__pycache__/_encryption.cpython-312.pyc,,
 pypdf/__pycache__/_font.cpython-312.pyc,,
 pypdf/__pycache__/_page.cpython-312.pyc,,
 pypdf/__pycache__/_page_labels.cpython-312.pyc,,
 pypdf/__pycache__/_protocols.cpython-312.pyc,,
 pypdf/__pycache__/_reader.cpython-312.pyc,,
 pypdf/__pycache__/_utils.cpython-312.pyc,,
 pypdf/__pycache__/_version.cpython-312.pyc,,
 pypdf/__pycache__/_writer.cpython-312.pyc,,
 pypdf/__pycache__/_xobj_image_helpers.cpython-312.pyc,,
 pypdf/__pycache__/constants.cpython-312.pyc,,
 pypdf/__pycache__/errors.cpython-312.pyc,,
 pypdf/__pycache__/filters.cpython-312.pyc,,
 pypdf/__pycache__/pagerange.cpython-312.pyc,,
 pypdf/__pycache__/papersizes.cpython-312.pyc,,
 pypdf/__pycache__/types.cpython-312.pyc,,
 pypdf/__pycache__/xmp.cpython-312.pyc,,
 pypdf/_cmap.py,sha256=iaAvJQQKBxkqMj5-WdD4vZV-Zdz-Sba5j6q3oPQyLT0,11713
 pypdf/_codecs/__init__.py,sha256=PF1KlsLWCOF0cgdqns7G4X-l3zq5_OnZePw7RFIn1bE,1645
 pypdf/_codecs/__pycache__/__init__.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/_codecs.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/adobe_glyphs.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/core_fontmetrics.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/pdfdoc.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/std.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/symbol.cpython-312.pyc,,
 pypdf/_codecs/__pycache__/zapfding.cpython-312.pyc,,
 pypdf/_codecs/_codecs.py,sha256=46oRZJySwGxCJp1kjIer7js_TYSjj4Gs2i2Uce3v-eE,10555
 pypdf/_codecs/adobe_glyphs.py,sha256=t3cDFPDqwIz1w9B0gdVzjdc8eEK9AuRjk5f7laEw_fY,447213
 pypdf/_codecs/core_fontmetrics.py,sha256=qQvNRQi8V8FOBmSwGcsak4qyl9cQ80cDjbpD5TvhuBg,113269
 pypdf/_codecs/pdfdoc.py,sha256=xfSvMFYsvxuaSQ0Uu9vZDKaB0Wu85h1uCiB1i9rAcUU,4269
 pypdf/_codecs/std.py,sha256=DyQMuEpAGEpS9uy1jWf4cnj-kqShPOAij5sI7Q1YD8E,2630
 pypdf/_codecs/symbol.py,sha256=nIaGQIlhWCJiPMHrwUlmGHH-_fOXyEKvguRmuKXcGAk,3734
 pypdf/_codecs/zapfding.py,sha256=PQxjxRC616d41xF3exVxP1W8nM4QrZfjO3lmtLxpE_s,3742
 pypdf/_crypt_providers/__init__.py,sha256=K3Z6AuXhXVeXgLet-Tukq2gt9H66OgdupsvxIS1CmkI,3054
 pypdf/_crypt_providers/__pycache__/__init__.cpython-312.pyc,,
 pypdf/_crypt_providers/__pycache__/_base.cpython-312.pyc,,
 pypdf/_crypt_providers/__pycache__/_cryptography.cpython-312.pyc,,
 pypdf/_crypt_providers/__pycache__/_fallback.cpython-312.pyc,,
 pypdf/_crypt_providers/__pycache__/_pycryptodome.cpython-312.pyc,,
 pypdf/_crypt_providers/_base.py,sha256=_f53Mj6vivhEZMQ4vNxN5G0IOgFY-n5_leke0c_qiNU,1711
 pypdf/_crypt_providers/_cryptography.py,sha256=zT3WmbPzesvgHRkGcKAldqJ24MY3BwZViVbSc55Zxhw,4557
 pypdf/_crypt_providers/_fallback.py,sha256=vsYoowR1YCAV_q-HrdIZhkUcrCb6HvRBNMYm03QtCU8,3334
 pypdf/_crypt_providers/_pycryptodome.py,sha256=U1aQZ9iYBrZo-hKCjJUhGOPhwEFToiitowQ316TNrrA,3381
 pypdf/_doc_common.py,sha256=Cbsc2uczFhAi2JRioaICx0ISC4lCBkRdo_tKRGw3bpc,53243
 pypdf/_encryption.py,sha256=-LwFEKfhL3B10afkco6fXx-EqtjoXf67pAUgH2VBfDw,48762
 pypdf/_font.py,sha256=R5jQsBYa_eMrK7VezyoWCmbBARZyS5xp8jzD2XRvKeE,14146
 pypdf/_page.py,sha256=Tp2GyjjOHLFwQ1tw8bO-poyZA65PJn3k94BymXMmurw,89909
 pypdf/_page_labels.py,sha256=_HXqgEhSLTH_mMhy8m4QAOzIOHRQLV6_lYvg81-l9hI,8546
 pypdf/_protocols.py,sha256=7qz92LVdPrYkSpdUPpAp9U4GW5jxNBTfVcpUWwUhEOo,2123
 pypdf/_reader.py,sha256=KyeDHVEI5n4cZBHGVzbGIfhaPC1nZMiIU0W_ZNb0w_Y,55079
 pypdf/_text_extraction/__init__.py,sha256=a3Z33rQVTiMKGtwt7_bfXlPosbST8rzELoNnt053_Qw,8515
 pypdf/_text_extraction/__pycache__/__init__.cpython-312.pyc,,
 pypdf/_text_extraction/__pycache__/_text_extractor.cpython-312.pyc,,
 pypdf/_text_extraction/_layout_mode/__init__.py,sha256=RUQIwiUwzneNtcljnVM6jkRaem6pgP7mOD2-MBmtpvw,340
 pypdf/_text_extraction/_layout_mode/__pycache__/__init__.cpython-312.pyc,,
 pypdf/_text_extraction/_layout_mode/__pycache__/_fixed_width_page.cpython-312.pyc,,
 pypdf/_text_extraction/_layout_mode/__pycache__/_text_state_manager.cpython-312.pyc,,
 pypdf/_text_extraction/_layout_mode/__pycache__/_text_state_params.cpython-312.pyc,,
 pypdf/_text_extraction/_layout_mode/_fixed_width_page.py,sha256=eJveDbyMooG970qJOhM5Rwb9ZoyyJDynzWpV9a7IS20,15370
 pypdf/_text_extraction/_layout_mode/_text_state_manager.py,sha256=XVrIjeTd5jSdMexBQxs0tL5I5RUOitRmN1mELOcKYm4,8221
 pypdf/_text_extraction/_layout_mode/_text_state_params.py,sha256=hyw6pnC8upBkoFVUJ3LH8hBIIHrNwiqaqcYyzIIyr6Y,5481
 pypdf/_text_extraction/_text_extractor.py,sha256=wRmFtgMYTbJFbZRJVG3j1-lQWhb6mUC5uiE73DLRhIo,14454
 pypdf/_utils.py,sha256=v579jJEHn-JophTC4Ej2MBFTEoQGitPWs_d507pyS6g,20194
 pypdf/_version.py,sha256=S2Qku7VqFDmWPW_O3fID47IPC76TVFqesX1qVVa575w,22
 pypdf/_writer.py,sha256=K7ANMEgNz-tPngYVMW9j07SEcksk5tFf1_tgi0JDRIg,129793
 pypdf/_xobj_image_helpers.py,sha256=y7EMrXlYqwbIeUtdQS2XH9nO_2R73DOLf9-T1IyHMIA,21450
 pypdf/annotations/__init__.py,sha256=f2k_-jAn39CCB27KxQ_e93GinnzkAHbUnnSeGJl1jyE,990
 pypdf/annotations/__pycache__/__init__.cpython-312.pyc,,
 pypdf/annotations/__pycache__/_base.cpython-312.pyc,,
 pypdf/annotations/__pycache__/_markup_annotations.cpython-312.pyc,,
 pypdf/annotations/__pycache__/_non_markup_annotations.cpython-312.pyc,,
 pypdf/annotations/_base.py,sha256=eeoc9v2w15jAUhKXj48l1bB66YgBgV-2v5IIUJH-vws,961
 pypdf/annotations/_markup_annotations.py,sha256=PLDCbsEWSgOmk6HTxepolEzj-Q3EE5J4hXMgnTDFaqc,9590
 pypdf/annotations/_non_markup_annotations.py,sha256=Z2IUvcCOcTcpJhSXrex_9riYM2D64XxFQ_vac10BNRU,3649
 pypdf/constants.py,sha256=_U_xkH1REx2rsgtx3jCOaKivhmyqPA25PLL7Z4A1_ZI,23260
 pypdf/errors.py,sha256=Bw1W9hxOsDgwqwU6YoQ2l0-JiUyTq6l5QjVCr-W4GFA,1947
 pypdf/filters.py,sha256=FzfrqdZK9bs3MjU75KJ2uIMPpx6VcxYQ4oV9wLh3j-w,29210
 pypdf/generic/__init__.py,sha256=VrqdYftQECePDU2rXVMgEqRaYFR8zOV_fvJgo19x_uw,3468
 pypdf/generic/__pycache__/__init__.cpython-312.pyc,,
 pypdf/generic/__pycache__/_appearance_stream.cpython-312.pyc,,
 pypdf/generic/__pycache__/_base.cpython-312.pyc,,
 pypdf/generic/__pycache__/_data_structures.cpython-312.pyc,,
 pypdf/generic/__pycache__/_files.cpython-312.pyc,,
 pypdf/generic/__pycache__/_fit.cpython-312.pyc,,
 pypdf/generic/__pycache__/_image_inline.cpython-312.pyc,,
 pypdf/generic/__pycache__/_link.cpython-312.pyc,,
 pypdf/generic/__pycache__/_outline.cpython-312.pyc,,
 pypdf/generic/__pycache__/_rectangle.cpython-312.pyc,,
 pypdf/generic/__pycache__/_utils.cpython-312.pyc,,
 pypdf/generic/__pycache__/_viewerpref.cpython-312.pyc,,
 pypdf/generic/_appearance_stream.py,sha256=ofXHlJC4-jSBCLOhkKztoeFiYlD-zi8QMdvRrMm3rdE,24867
 pypdf/generic/_base.py,sha256=N8O_NcqK5y5O70OF8-p6vsac9R1ykTDcBIksBY_9rnA,32531
 pypdf/generic/_data_structures.py,sha256=g1Jy5tpPSTHIhOme6HFXdMvxV2HuxbZx-HOsF2Awdc0,63602
 pypdf/generic/_files.py,sha256=NtSkRo6JBgisi4QOyrVneO891boVsuY25hRwij6X9RA,16238
 pypdf/generic/_fit.py,sha256=X_iADJj1YY4PUStS7rFWC2xR2LUVSvKtUAky0AFAIDM,5515
 pypdf/generic/_image_inline.py,sha256=4cADiCeaCYq2kgJu0wOYXRn5YZ27cCHb3hGFqFFT5D4,12787
 pypdf/generic/_link.py,sha256=ibdLhdU0mP_phneaJs-CzUDErkJuqnMT6TsQoHNOYiE,4951
 pypdf/generic/_outline.py,sha256=qKbMX42OWfqnopIiE6BUy6EvdTLGe3ZtjaiWN85JpaY,1094
 pypdf/generic/_rectangle.py,sha256=lOqSfFivQxgBN9LU9aqHoxPH8aCPTDUNgRZsNEUd6fc,3785
 pypdf/generic/_utils.py,sha256=vTDAesfG-cJNDKilz_kbgFodAITzd5ejppWHGjvConk,7258
 pypdf/generic/_viewerpref.py,sha256=6a_s0Avm9-XvV0wqxiW23cE92qK98ry3y6EPjfsFSdo,6758
 pypdf/pagerange.py,sha256=2bt21jQZm-9aq2bVf3TXuH8_wGVx7b9T6UrMFXCEJhQ,7108
 pypdf/papersizes.py,sha256=6Tz5sfNN_3JOUapY83U-lakohnpXYA0hSEQNmOVLFL8,1413
 pypdf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pypdf/types.py,sha256=sJ7wHzk7ER_CJ7kP-s8u9axFnkCXnFpr8nzcj1AxTas,1915
 pypdf/xmp.py,sha256=gqh3IlgTNP7ZuyhvE59p2tsMvu4adGkq0G8RDg0OtQw,29238
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/REQUESTED
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/REQUESTED
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/WHEEL
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/WHEEL
@@ -0,0 +1,4 @@
 Wheel-Version: 1.0
 Generator: flit 3.12.0
 Root-Is-Purelib: true
 Tag: py3-none-any
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/licenses/LICENSE
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/licenses/LICENSE
@@ -0,0 +1,29 @@
 Copyright (c) 2006-2008, Mathieu Fenniak
 Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
 Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:
 * Redistributions of source code must retain the above copyright notice,
 this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice,
 this list of conditions and the following disclaimer in the documentation
 and/or other materials provided with the distribution.
 * The name of the author may not be used to endorse or promote products
 derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
--- a/venv/lib/python3.12/site-packages/pypdf/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/init.py
@@ -0,0 +1,48 @@
 """
 pypdf is a free and open-source pure-python PDF library capable of splitting,
 merging, cropping, and transforming the pages of PDF files. It can also add
 custom data, viewing options, and passwords to PDF files. pypdf can retrieve
 text and metadata from PDFs as well.
 You can read the full docs at https://pypdf.readthedocs.io/.
 """
 from ._crypt_providers import crypt_provider
 from ._doc_common import DocumentInformation
 from ._encryption import PasswordType
 from ._page import PageObject, Transformation
 from ._reader import PdfReader
 from ._text_extraction import mult
 from ._version import __version__
 from ._writer import ObjectDeletionFlag, PdfWriter
 from .constants import ImageType
 from .pagerange import PageRange, parse_filename_page_ranges
 from .papersizes import PaperSize
 try:
    import PIL
    pil_version = PIL.__version__
 except ImportError:
    pil_version = "none"
 _debug_versions = (
    f"pypdf=={__version__}, {crypt_provider=}, PIL={pil_version}"
 )
 __all__ = [
    "DocumentInformation",
    "ImageType",
    "ObjectDeletionFlag",
    "PageObject",
    "PageRange",
    "PaperSize",
    "PasswordType",
    "PdfReader",
    "PdfWriter",
    "Transformation",
    "__version__",
    "_debug_versions",
    "mult",
    "parse_filename_page_ranges",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/_cmap.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_cmap.py
@@ -0,0 +1,338 @@
 import binascii
 from binascii import Error as BinasciiError
 from binascii import unhexlify
 from math import ceil
 from typing import Any, Union, cast
 from ._codecs import adobe_glyphs, charset_encoding
 from ._utils import logger_error, logger_warning
 from .generic import (
    DecodedStreamObject,
    DictionaryObject,
    NullObject,
    StreamObject,
    is_null_or_none,
 )
 _predefined_cmap: dict[str, str] = {
    "/Identity-H": "utf-16-be",
    "/Identity-V": "utf-16-be",
    "/GB-EUC-H": "gbk",
    "/GB-EUC-V": "gbk",
    "/GBpc-EUC-H": "gb2312",
    "/GBpc-EUC-V": "gb2312",
    "/GBK-EUC-H": "gbk",
    "/GBK-EUC-V": "gbk",
    "/GBK2K-H": "gb18030",
    "/GBK2K-V": "gb18030",
    "/ETen-B5-H": "cp950",
    "/ETen-B5-V": "cp950",
    "/ETenms-B5-H": "cp950",
    "/ETenms-B5-V": "cp950",
    "/UniCNS-UTF16-H": "utf-16-be",
    "/UniCNS-UTF16-V": "utf-16-be",
    "/UniGB-UTF16-H": "gb18030",
    "/UniGB-UTF16-V": "gb18030",
    # UCS2 in code
 }
 def get_encoding(
    ft: DictionaryObject
 ) -> tuple[Union[str, dict[int, str]], dict[Any, Any]]:
    encoding = _parse_encoding(ft)
    map_dict, int_entry = _parse_to_unicode(ft)
    # Apply rule from PDF ref 1.7 §5.9.1, 1st bullet:
    #   if cmap not empty encoding should be discarded
    #   (here transformed into identity for those characters)
    # If encoding is a string, it is expected to be an identity translation.
    if isinstance(encoding, dict):
        for x in int_entry:
            if x <= 255:
                encoding[x] = chr(x)
    return encoding, map_dict
 def _parse_encoding(
    ft: DictionaryObject
 ) -> Union[str, dict[int, str]]:
    encoding: Union[str, list[str], dict[int, str]] = []
    if "/Encoding" not in ft:
        if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
            encoding = dict(
                zip(range(256), charset_encoding[cast(str, ft["/BaseFont"])])
            )
        else:
            encoding = "charmap"
        return encoding
    enc: Union[str, DictionaryObject, NullObject] = cast(
        Union[str, DictionaryObject, NullObject], ft["/Encoding"].get_object()
    )
    if isinstance(enc, str):
        try:
            # already done : enc = NameObject.unnumber(enc.encode()).decode()
            # for #xx decoding
            if enc in charset_encoding:
                encoding = charset_encoding[enc].copy()
            elif enc in _predefined_cmap:
                encoding = _predefined_cmap[enc]
            elif "-UCS2-" in enc:
                encoding = "utf-16-be"
            else:
                raise Exception("not found")
        except Exception:
            logger_error(f"Advanced encoding {enc} not implemented yet", __name__)
            encoding = enc
    elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
        try:
            encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
        except Exception:
            logger_error(
                f"Advanced encoding {encoding} not implemented yet",
                __name__,
            )
            encoding = charset_encoding["/StandardEncoding"].copy()
    else:
        encoding = charset_encoding["/StandardEncoding"].copy()
    if isinstance(enc, DictionaryObject) and "/Differences" in enc:
        x: int = 0
        o: Union[int, str]
        for o in cast(DictionaryObject, enc["/Differences"]):
            if isinstance(o, int):
                x = o
            else:  # isinstance(o, str):
                try:
                    if x < len(encoding):
                        encoding[x] = adobe_glyphs[o]  # type: ignore
                except Exception:
                    encoding[x] = o  # type: ignore
                x += 1
    if isinstance(encoding, list):
        encoding = dict(zip(range(256), encoding))
    return encoding
 def _parse_to_unicode(
    ft: DictionaryObject
 ) -> tuple[dict[Any, Any], list[int]]:
    # will store all translation code
    # and map_dict[-1] we will have the number of bytes to convert
    map_dict: dict[Any, Any] = {}
    # will provide the list of cmap keys as int to correct encoding
    int_entry: list[int] = []
    if "/ToUnicode" not in ft:
        if ft.get("/Subtype", "") == "/Type1":
            return _type1_alternative(ft, map_dict, int_entry)
        return {}, []
    process_rg: bool = False
    process_char: bool = False
    multiline_rg: Union[
        None, tuple[int, int]
    ] = None  # tuple = (current_char, remaining size) ; cf #1285 for example of file
    cm = prepare_cm(ft)
    for line in cm.split(b"\n"):
        process_rg, process_char, multiline_rg = process_cm_line(
            line.strip(b" \t"),
            process_rg,
            process_char,
            multiline_rg,
            map_dict,
            int_entry,
        )
    return map_dict, int_entry
 def prepare_cm(ft: DictionaryObject) -> bytes:
    tu = ft["/ToUnicode"]
    cm: bytes
    if isinstance(tu, StreamObject):
        cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
    else:  # if (tu is None) or cast(str, tu).startswith("/Identity"):
        # the full range 0000-FFFF will be processed
        cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
    if isinstance(cm, str):
        cm = cm.encode()
    # we need to prepare cm before due to missing return line in pdf printed
    # to pdf from word
    cm = (
        cm.strip()
        .replace(b"beginbfchar", b"\nbeginbfchar\n")
        .replace(b"endbfchar", b"\nendbfchar\n")
        .replace(b"beginbfrange", b"\nbeginbfrange\n")
        .replace(b"endbfrange", b"\nendbfrange\n")
        .replace(b"<<", b"\n{\n")  # text between << and >> not used but
        .replace(b">>", b"\n}\n")  # some solution to find it back
    )
    ll = cm.split(b"<")
    for i in range(len(ll)):
        j = ll[i].find(b">")
        if j >= 0:
            if j == 0:
                # string is empty: stash a placeholder here (see below)
                # see https://github.com/py-pdf/pypdf/issues/1111
                content = b"."
            else:
                content = ll[i][:j].replace(b" ", b"")
            ll[i] = content + b" " + ll[i][j + 1 :]
    cm = (
        (b" ".join(ll))
        .replace(b"[", b" [ ")
        .replace(b"]", b" ]\n ")
        .replace(b"\r", b"\n")
    )
    return cm
 def process_cm_line(
    line: bytes,
    process_rg: bool,
    process_char: bool,
    multiline_rg: Union[None, tuple[int, int]],
    map_dict: dict[Any, Any],
    int_entry: list[int],
 ) -> tuple[bool, bool, Union[None, tuple[int, int]]]:
    if line == b"" or line[0] == 37:  # 37 = %
        return process_rg, process_char, multiline_rg
    line = line.replace(b"\t", b" ")
    if b"beginbfrange" in line:
        process_rg = True
    elif b"endbfrange" in line:
        process_rg = False
    elif b"beginbfchar" in line:
        process_char = True
    elif b"endbfchar" in line:
        process_char = False
    elif process_rg:
        try:
            multiline_rg = parse_bfrange(line, map_dict, int_entry, multiline_rg)
        except binascii.Error as error:
            logger_warning(f"Skipping broken line {line!r}: {error}", __name__)
    elif process_char:
        parse_bfchar(line, map_dict, int_entry)
    return process_rg, process_char, multiline_rg
 def parse_bfrange(
    line: bytes,
    map_dict: dict[Any, Any],
    int_entry: list[int],
    multiline_rg: Union[None, tuple[int, int]],
 ) -> Union[None, tuple[int, int]]:
    lst = [x for x in line.split(b" ") if x]
    closure_found = False
    if multiline_rg is not None:
        fmt = b"%%0%dX" % (map_dict[-1] * 2)
        a = multiline_rg[0]  # a, b not in the current line
        b = multiline_rg[1]
        for sq in lst:
            if sq == b"]":
                closure_found = True
                break
            map_dict[
                unhexlify(fmt % a).decode(
                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
                    "surrogatepass",
                )
            ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
            int_entry.append(a)
            a += 1
    else:
        a = int(lst[0], 16)
        b = int(lst[1], 16)
        nbi = max(len(lst[0]), len(lst[1]))
        map_dict[-1] = ceil(nbi / 2)
        fmt = b"%%0%dX" % (map_dict[-1] * 2)
        if lst[2] == b"[":
            for sq in lst[3:]:
                if sq == b"]":
                    closure_found = True
                    break
                map_dict[
                    unhexlify(fmt % a).decode(
                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
                        "surrogatepass",
                    )
                ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
                int_entry.append(a)
                a += 1
        else:  # case without list
            c = int(lst[2], 16)
            fmt2 = b"%%0%dX" % max(4, len(lst[2]))
            closure_found = True
            while a <= b:
                map_dict[
                    unhexlify(fmt % a).decode(
                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
                        "surrogatepass",
                    )
                ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
                int_entry.append(a)
                a += 1
                c += 1
    return None if closure_found else (a, b)
 def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
    lst = [x for x in line.split(b" ") if x]
    map_dict[-1] = len(lst[0]) // 2
    while len(lst) > 1:
        map_to = ""
        # placeholder (see above) means empty string
        if lst[1] != b".":
            try:
                map_to = unhexlify(lst[1]).decode(
                    "charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
                )  # join is here as some cases where the code was split
            except BinasciiError as exception:
                logger_warning(f"Got invalid hex string: {exception!s} ({lst[1]!r})", __name__)
        map_dict[
            unhexlify(lst[0]).decode(
                "charmap" if map_dict[-1] == 1 else "utf-16-be", "surrogatepass"
            )
        ] = map_to
        int_entry.append(int(lst[0], 16))
        lst = lst[2:]
 def _type1_alternative(
    ft: DictionaryObject,
    map_dict: dict[Any, Any],
    int_entry: list[int],
 ) -> tuple[dict[Any, Any], list[int]]:
    if "/FontDescriptor" not in ft:
        return map_dict, int_entry
    ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
    if is_null_or_none(ft_desc):
        return map_dict, int_entry
    assert ft_desc is not None, "mypy"
    txt = ft_desc.get_object().get_data()
    txt = txt.split(b"eexec\n")[0]  # only clear part
    txt = txt.split(b"/Encoding")[1]  # to get the encoding part
    lines = txt.replace(b"\r", b"\n").split(b"\n")
    for li in lines:
        if li.startswith(b"dup"):
            words = [_w for _w in li.split(b" ") if _w != b""]
            if len(words) > 3 and words[3] != b"put":
                continue
            try:
                i = int(words[1])
            except ValueError:  # pragma: no cover
                continue
            try:
                v = adobe_glyphs[words[2].decode()]
            except KeyError:
                if words[2].startswith(b"/uni"):
                    try:
                        v = chr(int(words[2][4:], 16))
                    except ValueError:  # pragma: no cover
                        continue
                else:
                    continue
            map_dict[chr(i)] = v
            int_entry.append(i)
    return map_dict, int_entry
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/init.py
@@ -0,0 +1,59 @@
 from .adobe_glyphs import adobe_glyphs
 from .pdfdoc import _pdfdoc_encoding
 from .std import _std_encoding
 from .symbol import _symbol_encoding
 from .zapfding import _zapfding_encoding
 def fill_from_encoding(enc: str) -> list[str]:
    lst: list[str] = []
    for x in range(256):
        try:
            lst += (bytes((x,)).decode(enc),)
        except Exception:
            lst += (chr(x),)
    return lst
 def rev_encoding(enc: list[str]) -> dict[str, int]:
    rev: dict[str, int] = {}
    for i in range(256):
        char = enc[i]
        if char == "\u0000":
            continue
        assert char not in rev, f"{char} at {i} already at {rev[char]}"
        rev[char] = i
    return rev
 _win_encoding = fill_from_encoding("cp1252")
 _mac_encoding = fill_from_encoding("mac_roman")
 _win_encoding_rev: dict[str, int] = rev_encoding(_win_encoding)
 _mac_encoding_rev: dict[str, int] = rev_encoding(_mac_encoding)
 _symbol_encoding_rev: dict[str, int] = rev_encoding(_symbol_encoding)
 _zapfding_encoding_rev: dict[str, int] = rev_encoding(_zapfding_encoding)
 _pdfdoc_encoding_rev: dict[str, int] = rev_encoding(_pdfdoc_encoding)
 charset_encoding: dict[str, list[str]] = {
    "/StandardEncoding": _std_encoding,
    "/WinAnsiEncoding": _win_encoding,
    "/MacRomanEncoding": _mac_encoding,
    "/PDFDocEncoding": _pdfdoc_encoding,
    "/Symbol": _symbol_encoding,
    "/ZapfDingbats": _zapfding_encoding,
 }
 __all__ = [
    "_mac_encoding",
    "_pdfdoc_encoding",
    "_pdfdoc_encoding_rev",
    "_std_encoding",
    "_symbol_encoding",
    "_win_encoding",
    "_zapfding_encoding",
    "adobe_glyphs",
    "charset_encoding",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.py
@@ -0,0 +1,281 @@
 """
 This module is for codecs only.
 While the codec implementation can contain details of the PDF specification,
 the module should not do any PDF parsing.
 """
 import io
 from abc import ABC, abstractmethod
 from pypdf._utils import logger_warning
 from pypdf.errors import LimitReachedError
 class Codec(ABC):
    """Abstract base class for all codecs."""
    @abstractmethod
    def encode(self, data: bytes) -> bytes:
        """
        Encode the input data.
        Args:
            data: Data to encode.
        Returns:
            Encoded data.
        """
    @abstractmethod
    def decode(self, data: bytes) -> bytes:
        """
        Decode the input data.
        Args:
            data: Data to decode.
        Returns:
            Decoded data.
        """
 class LzwCodec(Codec):
    """Lempel-Ziv-Welch (LZW) adaptive compression codec."""
    CLEAR_TABLE_MARKER = 256  # Special code to indicate table reset
    EOD_MARKER = 257  # End-of-data marker
    INITIAL_BITS_PER_CODE = 9  # Initial code bit width
    MAX_BITS_PER_CODE = 12  # Maximum code bit width
    def __init__(self, max_output_length: int = 75_000_000) -> None:
        self.max_output_length = max_output_length
    def _initialize_encoding_table(self) -> None:
        """Initialize the encoding table and state to initial conditions."""
        self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
        self.next_code = self.EOD_MARKER + 1
        self.bits_per_code = self.INITIAL_BITS_PER_CODE
        self.max_code_value = (1 << self.bits_per_code) - 1
    def _increase_next_code(self) -> None:
        """Update bits_per_code and max_code_value if necessary."""
        self.next_code += 1
        if (
            self.next_code > self.max_code_value
            and self.bits_per_code < self.MAX_BITS_PER_CODE
        ):
            self.bits_per_code += 1
            self.max_code_value = (1 << self.bits_per_code) - 1
    def encode(self, data: bytes) -> bytes:
        """
        Encode data using the LZW compression algorithm.
        Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
        """
        result_codes: list[int] = []
        # The encoder shall begin by issuing a clear-table code
        result_codes.append(self.CLEAR_TABLE_MARKER)
        self._initialize_encoding_table()
        current_sequence = b""
        for byte in data:
            next_sequence = current_sequence + bytes([byte])
            if next_sequence in self.encoding_table:
                # Extend current sequence if already in the table
                current_sequence = next_sequence
            else:
                # Output code for the current sequence
                result_codes.append(self.encoding_table[current_sequence])
                # Add the new sequence to the table if there's room
                if self.next_code <= (1 << self.MAX_BITS_PER_CODE) - 1:
                    self.encoding_table[next_sequence] = self.next_code
                    self._increase_next_code()
                else:
                    # If the table is full, emit a clear-table command
                    result_codes.append(self.CLEAR_TABLE_MARKER)
                    self._initialize_encoding_table()
                # Start new sequence
                current_sequence = bytes([byte])
        # Ensure everything actually is encoded
        if current_sequence:
            result_codes.append(self.encoding_table[current_sequence])
        result_codes.append(self.EOD_MARKER)
        return self._pack_codes_into_bytes(result_codes)
    def _pack_codes_into_bytes(self, codes: list[int]) -> bytes:
        """
        Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
        The bit-width starts at 9 bits and expands as needed.
        """
        self._initialize_encoding_table()
        buffer = 0
        bits_in_buffer = 0
        output = bytearray()
        for code in codes:
            buffer = (buffer << self.bits_per_code) | code
            bits_in_buffer += self.bits_per_code
            # Codes shall be packed into a continuous bit stream, high-order bit
            # first. This stream shall then be divided into bytes, high-order bit
            # first.
            while bits_in_buffer >= 8:
                bits_in_buffer -= 8
                output.append((buffer >> bits_in_buffer) & 0xFF)
            if code == self.CLEAR_TABLE_MARKER:
                self._initialize_encoding_table()
            elif code == self.EOD_MARKER:
                continue
            else:
                self._increase_next_code()
        # Flush any remaining bits in the buffer
        if bits_in_buffer > 0:
            output.append((buffer << (8 - bits_in_buffer)) & 0xFF)
        return bytes(output)
    def _initialize_decoding_table(self) -> None:
        self.max_code_value = (1 << self.MAX_BITS_PER_CODE) - 1
        self.decoding_table = [bytes([i]) for i in range(self.CLEAR_TABLE_MARKER)] + [
            b""
        ] * (self.max_code_value - self.CLEAR_TABLE_MARKER + 1)
        self._table_index = self.EOD_MARKER + 1
        self._bits_to_get = 9
    def _next_code_decode(self, data: bytes) -> int:
        self._next_data: int
        try:
            while self._next_bits < self._bits_to_get:
                self._next_data = (self._next_data << 8) | (
                    data[self._byte_pointer]
                )
                self._byte_pointer += 1
                self._next_bits += 8
            code = (
                self._next_data >> (self._next_bits - self._bits_to_get)
            ) & self._and_table[self._bits_to_get - 9]
            self._next_bits -= self._bits_to_get
            # Reduce data to get rid of the overhead,
            # which increases performance on large streams significantly.
            self._next_data = self._next_data & 0xFFFFF
            return code
        except IndexError:
            return self.EOD_MARKER
    # The following method has been converted to Python from PDFsharp:
    # https://github.com/empira/PDFsharp/blob/5fbf6ed14740bc4e16786816882d32e43af3ff5d/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
    #
    # Original license:
    #
    # -------------------------------------------------------------------------
    # Copyright (c) 2001-2024 empira Software GmbH, Troisdorf (Cologne Area),
    # Germany
    #
    # http://docs.pdfsharp.net
    #
    # MIT License
    #
    # Permission is hereby granted, free of charge, to any person obtaining a
    # copy of this software and associated documentation files (the "Software"),
    # to deal in the Software without restriction, including without limitation
    # the rights to use, copy, modify, merge, publish, distribute, sublicense,
    # and/or sell copies of the Software, and to permit persons to whom the
    # Software is furnished to do so, subject to the following conditions:
    #
    # The above copyright notice and this permission notice shall be included
    # in all copies or substantial portions of the Software.
    #
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    # DEALINGS IN THE SOFTWARE.
    # --------------------------------------------------------------------------
    def decode(self, data: bytes) -> bytes:
        """
        The following code was converted to Python from the following code:
        https://github.com/empira/PDFsharp/blob/master/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
        """
        self._and_table = [511, 1023, 2047, 4095]
        self._table_index = 0
        self._bits_to_get = 9
        self._byte_pointer = 0
        self._next_data = 0
        self._next_bits = 0
        output_stream = io.BytesIO()
        output_length = 0
        self._initialize_decoding_table()
        self._byte_pointer = 0
        self._next_data = 0
        self._next_bits = 0
        old_code = self.CLEAR_TABLE_MARKER
        while True:
            code = self._next_code_decode(data)
            if code == self.EOD_MARKER:
                break
            if code == self.CLEAR_TABLE_MARKER:
                self._initialize_decoding_table()
                code = self._next_code_decode(data)
                if code == self.EOD_MARKER:
                    break
                output_stream.write(decoded := self.decoding_table[code])
                old_code = code
            elif code < self._table_index:
                decoded = self.decoding_table[code]
                output_stream.write(decoded)
                if old_code != self.CLEAR_TABLE_MARKER:
                    self._add_entry_decode(self.decoding_table[old_code], decoded[0])
                old_code = code
            else:
                # The code is not in the table and not one of the special codes
                decoded = (
                    self.decoding_table[old_code] + self.decoding_table[old_code][:1]
                )
                output_stream.write(decoded)
                self._add_entry_decode(self.decoding_table[old_code], decoded[0])
                old_code = code
            output_length += len(decoded)
            if output_length > self.max_output_length:
                raise LimitReachedError(
                    f"Limit reached while decompressing: {output_length} > {self.max_output_length}"
                )
        return output_stream.getvalue()
    def _add_entry_decode(self, old_string: bytes, new_char: int) -> None:
        new_string = old_string + bytes([new_char])
        if self._table_index > self.max_code_value:
            logger_warning("Ignoring too large LZW table index.", __name__)
            return
        self.decoding_table[self._table_index] = new_string
        self._table_index += 1
        # Update the number of bits to get based on the table index
        if self._table_index == 511:
            self._bits_to_get = 10
        elif self._table_index == 1023:
            self._bits_to_get = 11
        elif self._table_index == 2047:
            self._bits_to_get = 12
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/adobe_glyphs.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/adobe_glyphs.py
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/core_fontmetrics.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/core_fontmetrics.py
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pdfdoc.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pdfdoc.py
@@ -0,0 +1,264 @@
 # PDFDocEncoding Character Set: Table D.2 of PDF Reference 1.7
 # C.1 Predefined encodings sorted by character name of another PDF reference
 # Some indices have '\u0000' although they should have something else:
 # 22: should be '\u0017'
 _pdfdoc_encoding = [
    "\u0000",
    "\u0001",
    "\u0002",
    "\u0003",
    "\u0004",
    "\u0005",
    "\u0006",
    "\u0007",  # 0 -  7
    "\u0008",
    "\u0009",
    "\u000a",
    "\u000b",
    "\u000c",
    "\u000d",
    "\u000e",
    "\u000f",  # 8 - 15
    "\u0010",
    "\u0011",
    "\u0012",
    "\u0013",
    "\u0014",
    "\u0015",
    "\u0000",
    "\u0017",  # 16 - 23
    "\u02d8",
    "\u02c7",
    "\u02c6",
    "\u02d9",
    "\u02dd",
    "\u02db",
    "\u02da",
    "\u02dc",  # 24 - 31
    "\u0020",
    "\u0021",
    "\u0022",
    "\u0023",
    "\u0024",
    "\u0025",
    "\u0026",
    "\u0027",  # 32 - 39
    "\u0028",
    "\u0029",
    "\u002a",
    "\u002b",
    "\u002c",
    "\u002d",
    "\u002e",
    "\u002f",  # 40 - 47
    "\u0030",
    "\u0031",
    "\u0032",
    "\u0033",
    "\u0034",
    "\u0035",
    "\u0036",
    "\u0037",  # 48 - 55
    "\u0038",
    "\u0039",
    "\u003a",
    "\u003b",
    "\u003c",
    "\u003d",
    "\u003e",
    "\u003f",  # 56 - 63
    "\u0040",
    "\u0041",
    "\u0042",
    "\u0043",
    "\u0044",
    "\u0045",
    "\u0046",
    "\u0047",  # 64 - 71
    "\u0048",
    "\u0049",
    "\u004a",
    "\u004b",
    "\u004c",
    "\u004d",
    "\u004e",
    "\u004f",  # 72 - 79
    "\u0050",
    "\u0051",
    "\u0052",
    "\u0053",
    "\u0054",
    "\u0055",
    "\u0056",
    "\u0057",  # 80 - 87
    "\u0058",
    "\u0059",
    "\u005a",
    "\u005b",
    "\u005c",
    "\u005d",
    "\u005e",
    "\u005f",  # 88 - 95
    "\u0060",
    "\u0061",
    "\u0062",
    "\u0063",
    "\u0064",
    "\u0065",
    "\u0066",
    "\u0067",  # 96 - 103
    "\u0068",
    "\u0069",
    "\u006a",
    "\u006b",
    "\u006c",
    "\u006d",
    "\u006e",
    "\u006f",  # 104 - 111
    "\u0070",
    "\u0071",
    "\u0072",
    "\u0073",
    "\u0074",
    "\u0075",
    "\u0076",
    "\u0077",  # 112 - 119
    "\u0078",
    "\u0079",
    "\u007a",
    "\u007b",
    "\u007c",
    "\u007d",
    "\u007e",
    "\u0000",  # 120 - 127
    "\u2022",
    "\u2020",
    "\u2021",
    "\u2026",
    "\u2014",
    "\u2013",
    "\u0192",
    "\u2044",  # 128 - 135
    "\u2039",
    "\u203a",
    "\u2212",
    "\u2030",
    "\u201e",
    "\u201c",
    "\u201d",
    "\u2018",  # 136 - 143
    "\u2019",
    "\u201a",
    "\u2122",
    "\ufb01",
    "\ufb02",
    "\u0141",
    "\u0152",
    "\u0160",  # 144 - 151
    "\u0178",
    "\u017d",
    "\u0131",
    "\u0142",
    "\u0153",
    "\u0161",
    "\u017e",
    "\u0000",  # 152 - 159
    "\u20ac",
    "\u00a1",
    "\u00a2",
    "\u00a3",
    "\u00a4",
    "\u00a5",
    "\u00a6",
    "\u00a7",  # 160 - 167
    "\u00a8",
    "\u00a9",
    "\u00aa",
    "\u00ab",
    "\u00ac",
    "\u0000",
    "\u00ae",
    "\u00af",  # 168 - 175
    "\u00b0",
    "\u00b1",
    "\u00b2",
    "\u00b3",
    "\u00b4",
    "\u00b5",
    "\u00b6",
    "\u00b7",  # 176 - 183
    "\u00b8",
    "\u00b9",
    "\u00ba",
    "\u00bb",
    "\u00bc",
    "\u00bd",
    "\u00be",
    "\u00bf",  # 184 - 191
    "\u00c0",
    "\u00c1",
    "\u00c2",
    "\u00c3",
    "\u00c4",
    "\u00c5",
    "\u00c6",
    "\u00c7",  # 192 - 199
    "\u00c8",
    "\u00c9",
    "\u00ca",
    "\u00cb",
    "\u00cc",
    "\u00cd",
    "\u00ce",
    "\u00cf",  # 200 - 207
    "\u00d0",
    "\u00d1",
    "\u00d2",
    "\u00d3",
    "\u00d4",
    "\u00d5",
    "\u00d6",
    "\u00d7",  # 208 - 215
    "\u00d8",
    "\u00d9",
    "\u00da",
    "\u00db",
    "\u00dc",
    "\u00dd",
    "\u00de",
    "\u00df",  # 216 - 223
    "\u00e0",
    "\u00e1",
    "\u00e2",
    "\u00e3",
    "\u00e4",
    "\u00e5",
    "\u00e6",
    "\u00e7",  # 224 - 231
    "\u00e8",
    "\u00e9",
    "\u00ea",
    "\u00eb",
    "\u00ec",
    "\u00ed",
    "\u00ee",
    "\u00ef",  # 232 - 239
    "\u00f0",
    "\u00f1",
    "\u00f2",
    "\u00f3",
    "\u00f4",
    "\u00f5",
    "\u00f6",
    "\u00f7",  # 240 - 247
    "\u00f8",
    "\u00f9",
    "\u00fa",
    "\u00fb",
    "\u00fc",
    "\u00fd",
    "\u00fe",
    "\u00ff",  # 248 - 255
 ]
 assert len(_pdfdoc_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/std.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/std.py
@@ -0,0 +1,258 @@
 _std_encoding = [
    "\x00",
    "\x01",
    "\x02",
    "\x03",
    "\x04",
    "\x05",
    "\x06",
    "\x07",
    "\x08",
    "\t",
    "\n",
    "\x0b",
    "\x0c",
    "\r",
    "\x0e",
    "\x0f",
    "\x10",
    "\x11",
    "\x12",
    "\x13",
    "\x14",
    "\x15",
    "\x16",
    "\x17",
    "\x18",
    "\x19",
    "\x1a",
    "\x1b",
    "\x1c",
    "\x1d",
    "\x1e",
    "\x1f",
    " ",
    "!",
    '"',
    "#",
    "$",
    "%",
    "&",
    "’",
    "(",
    ")",
    "*",
    "+",
    ",",
    "-",
    ".",
    "/",
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
    ":",
    ";",
    "<",
    "=",
    ">",
    "?",
    "@",
    "A",
    "B",
    "C",
    "D",
    "E",
    "F",
    "G",
    "H",
    "I",
    "J",
    "K",
    "L",
    "M",
    "N",
    "O",
    "P",
    "Q",
    "R",
    "S",
    "T",
    "U",
    "V",
    "W",
    "X",
    "Y",
    "Z",
    "[",
    "\\",
    "]",
    "^",
    "_",
    "‘",
    "a",
    "b",
    "c",
    "d",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "{",
    "|",
    "}",
    "~",
    "\x7f",
    "\x80",
    "\x81",
    "\x82",
    "\x83",
    "\x84",
    "\x85",
    "\x86",
    "\x87",
    "\x88",
    "\x89",
    "\x8a",
    "\x8b",
    "\x8c",
    "\x8d",
    "\x8e",
    "\x8f",
    "\x90",
    "\x91",
    "\x92",
    "\x93",
    "\x94",
    "\x95",
    "\x96",
    "\x97",
    "\x98",
    "\x99",
    "\x9a",
    "\x9b",
    "\x9c",
    "\x9d",
    "\x9e",
    "\x9f",
    "\xa0",
    "¡",
    "¢",
    "£",
    "⁄",
    "¥",
    "ƒ",
    "§",
    "¤",
    "'",
    "“",
    "«",
    "‹",
    "›",
    "ﬁ",
    "ﬂ",
    "°",
    "–",
    "†",
    "‡",
    "·",
    "µ",
    "¶",
    "•",
    "‚",
    "„",
    "”",
    "»",
    "…",
    "‰",
    "¾",
    "¿",
    "À",
    "`",
    "´",
    "ˆ",
    "˜",
    "¯",
    "˘",
    "˙",
    "¨",
    "É",
    "˚",
    "¸",
    "Ì",
    "˝",
    "˛",
    "ˇ",
    "—",
    "Ñ",
    "Ò",
    "Ó",
    "Ô",
    "Õ",
    "Ö",
    "×",
    "Ø",
    "Ù",
    "Ú",
    "Û",
    "Ü",
    "Ý",
    "Þ",
    "ß",
    "à",
    "Æ",
    "â",
    "ª",
    "ä",
    "å",
    "æ",
    "ç",
    "Ł",
    "Ø",
    "Œ",
    "º",
    "ì",
    "í",
    "î",
    "ï",
    "ð",
    "æ",
    "ò",
    "ó",
    "ô",
    "ı",
    "ö",
    "÷",
    "ł",
    "ø",
    "œ",
    "ß",
    "ü",
    "ý",
    "þ",
    "ÿ",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/symbol.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/symbol.py
@@ -0,0 +1,260 @@
 # manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
 _symbol_encoding = [
    "\u0000",
    "\u0001",
    "\u0002",
    "\u0003",
    "\u0004",
    "\u0005",
    "\u0006",
    "\u0007",
    "\u0008",
    "\u0009",
    "\u000A",
    "\u000B",
    "\u000C",
    "\u000D",
    "\u000E",
    "\u000F",
    "\u0010",
    "\u0011",
    "\u0012",
    "\u0013",
    "\u0014",
    "\u0015",
    "\u0016",
    "\u0017",
    "\u0018",
    "\u0019",
    "\u001A",
    "\u001B",
    "\u001C",
    "\u001D",
    "\u001E",
    "\u001F",
    "\u0020",
    "\u0021",
    "\u2200",
    "\u0023",
    "\u2203",
    "\u0025",
    "\u0026",
    "\u220B",
    "\u0028",
    "\u0029",
    "\u2217",
    "\u002B",
    "\u002C",
    "\u2212",
    "\u002E",
    "\u002F",
    "\u0030",
    "\u0031",
    "\u0032",
    "\u0033",
    "\u0034",
    "\u0035",
    "\u0036",
    "\u0037",
    "\u0038",
    "\u0039",
    "\u003A",
    "\u003B",
    "\u003C",
    "\u003D",
    "\u003E",
    "\u003F",
    "\u2245",
    "\u0391",
    "\u0392",
    "\u03A7",
    "\u0394",
    "\u0395",
    "\u03A6",
    "\u0393",
    "\u0397",
    "\u0399",
    "\u03D1",
    "\u039A",
    "\u039B",
    "\u039C",
    "\u039D",
    "\u039F",
    "\u03A0",
    "\u0398",
    "\u03A1",
    "\u03A3",
    "\u03A4",
    "\u03A5",
    "\u03C2",
    "\u03A9",
    "\u039E",
    "\u03A8",
    "\u0396",
    "\u005B",
    "\u2234",
    "\u005D",
    "\u22A5",
    "\u005F",
    "\uF8E5",
    "\u03B1",
    "\u03B2",
    "\u03C7",
    "\u03B4",
    "\u03B5",
    "\u03C6",
    "\u03B3",
    "\u03B7",
    "\u03B9",
    "\u03D5",
    "\u03BA",
    "\u03BB",
    "\u00B5",
    "\u03BD",
    "\u03BF",
    "\u03C0",
    "\u03B8",
    "\u03C1",
    "\u03C3",
    "\u03C4",
    "\u03C5",
    "\u03D6",
    "\u03C9",
    "\u03BE",
    "\u03C8",
    "\u03B6",
    "\u007B",
    "\u007C",
    "\u007D",
    "\u223C",
    "\u007F",
    "\u0080",
    "\u0081",
    "\u0082",
    "\u0083",
    "\u0084",
    "\u0085",
    "\u0086",
    "\u0087",
    "\u0088",
    "\u0089",
    "\u008A",
    "\u008B",
    "\u008C",
    "\u008D",
    "\u008E",
    "\u008F",
    "\u0090",
    "\u0091",
    "\u0092",
    "\u0093",
    "\u0094",
    "\u0095",
    "\u0096",
    "\u0097",
    "\u0098",
    "\u0099",
    "\u009A",
    "\u009B",
    "\u009C",
    "\u009D",
    "\u009E",
    "\u009F",
    "\u20AC",
    "\u03D2",
    "\u2032",
    "\u2264",
    "\u2044",
    "\u221E",
    "\u0192",
    "\u2663",
    "\u2666",
    "\u2665",
    "\u2660",
    "\u2194",
    "\u2190",
    "\u2191",
    "\u2192",
    "\u2193",
    "\u00B0",
    "\u00B1",
    "\u2033",
    "\u2265",
    "\u00D7",
    "\u221D",
    "\u2202",
    "\u2022",
    "\u00F7",
    "\u2260",
    "\u2261",
    "\u2248",
    "\u2026",
    "\uF8E6",
    "\uF8E7",
    "\u21B5",
    "\u2135",
    "\u2111",
    "\u211C",
    "\u2118",
    "\u2297",
    "\u2295",
    "\u2205",
    "\u2229",
    "\u222A",
    "\u2283",
    "\u2287",
    "\u2284",
    "\u2282",
    "\u2286",
    "\u2208",
    "\u2209",
    "\u2220",
    "\u2207",
    "\uF6DA",
    "\uF6D9",
    "\uF6DB",
    "\u220F",
    "\u221A",
    "\u22C5",
    "\u00AC",
    "\u2227",
    "\u2228",
    "\u21D4",
    "\u21D0",
    "\u21D1",
    "\u21D2",
    "\u21D3",
    "\u25CA",
    "\u2329",
    "\uF8E8",
    "\uF8E9",
    "\uF8EA",
    "\u2211",
    "\uF8EB",
    "\uF8EC",
    "\uF8ED",
    "\uF8EE",
    "\uF8EF",
    "\uF8F0",
    "\uF8F1",
    "\uF8F2",
    "\uF8F3",
    "\uF8F4",
    "\u00F0",
    "\u232A",
    "\u222B",
    "\u2320",
    "\uF8F5",
    "\u2321",
    "\uF8F6",
    "\uF8F7",
    "\uF8F8",
    "\uF8F9",
    "\uF8FA",
    "\uF8FB",
    "\uF8FC",
    "\uF8FD",
    "\uF8FE",
    "\u00FF",
 ]
 assert len(_symbol_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/zapfding.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/zapfding.py
@@ -0,0 +1,261 @@
 #  manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
 _zapfding_encoding = [
    "\u0000",
    "\u0001",
    "\u0002",
    "\u0003",
    "\u0004",
    "\u0005",
    "\u0006",
    "\u0007",
    "\u0008",
    "\u0009",
    "\u000A",
    "\u000B",
    "\u000C",
    "\u000D",
    "\u000E",
    "\u000F",
    "\u0010",
    "\u0011",
    "\u0012",
    "\u0013",
    "\u0014",
    "\u0015",
    "\u0016",
    "\u0017",
    "\u0018",
    "\u0019",
    "\u001A",
    "\u001B",
    "\u001C",
    "\u001D",
    "\u001E",
    "\u001F",
    "\u0020",
    "\u2701",
    "\u2702",
    "\u2703",
    "\u2704",
    "\u260E",
    "\u2706",
    "\u2707",
    "\u2708",
    "\u2709",
    "\u261B",
    "\u261E",
    "\u270C",
    "\u270D",
    "\u270E",
    "\u270F",
    "\u2710",
    "\u2711",
    "\u2712",
    "\u2713",
    "\u2714",
    "\u2715",
    "\u2716",
    "\u2717",
    "\u2718",
    "\u2719",
    "\u271A",
    "\u271B",
    "\u271C",
    "\u271D",
    "\u271E",
    "\u271F",
    "\u2720",
    "\u2721",
    "\u2722",
    "\u2723",
    "\u2724",
    "\u2725",
    "\u2726",
    "\u2727",
    "\u2605",
    "\u2729",
    "\u272A",
    "\u272B",
    "\u272C",
    "\u272D",
    "\u272E",
    "\u272F",
    "\u2730",
    "\u2731",
    "\u2732",
    "\u2733",
    "\u2734",
    "\u2735",
    "\u2736",
    "\u2737",
    "\u2738",
    "\u2739",
    "\u273A",
    "\u273B",
    "\u273C",
    "\u273D",
    "\u273E",
    "\u273F",
    "\u2740",
    "\u2741",
    "\u2742",
    "\u2743",
    "\u2744",
    "\u2745",
    "\u2746",
    "\u2747",
    "\u2748",
    "\u2749",
    "\u274A",
    "\u274B",
    "\u25CF",
    "\u274D",
    "\u25A0",
    "\u274F",
    "\u2750",
    "\u2751",
    "\u2752",
    "\u25B2",
    "\u25BC",
    "\u25C6",
    "\u2756",
    "\u25D7",
    "\u2758",
    "\u2759",
    "\u275A",
    "\u275B",
    "\u275C",
    "\u275D",
    "\u275E",
    "\u007F",
    "\uF8D7",
    "\uF8D8",
    "\uF8D9",
    "\uF8DA",
    "\uF8DB",
    "\uF8DC",
    "\uF8DD",
    "\uF8DE",
    "\uF8DF",
    "\uF8E0",
    "\uF8E1",
    "\uF8E2",
    "\uF8E3",
    "\uF8E4",
    "\u008E",
    "\u008F",
    "\u0090",
    "\u0091",
    "\u0092",
    "\u0093",
    "\u0094",
    "\u0095",
    "\u0096",
    "\u0097",
    "\u0098",
    "\u0099",
    "\u009A",
    "\u009B",
    "\u009C",
    "\u009D",
    "\u009E",
    "\u009F",
    "\u00A0",
    "\u2761",
    "\u2762",
    "\u2763",
    "\u2764",
    "\u2765",
    "\u2766",
    "\u2767",
    "\u2663",
    "\u2666",
    "\u2665",
    "\u2660",
    "\u2460",
    "\u2461",
    "\u2462",
    "\u2463",
    "\u2464",
    "\u2465",
    "\u2466",
    "\u2467",
    "\u2468",
    "\u2469",
    "\u2776",
    "\u2777",
    "\u2778",
    "\u2779",
    "\u277A",
    "\u277B",
    "\u277C",
    "\u277D",
    "\u277E",
    "\u277F",
    "\u2780",
    "\u2781",
    "\u2782",
    "\u2783",
    "\u2784",
    "\u2785",
    "\u2786",
    "\u2787",
    "\u2788",
    "\u2789",
    "\u278A",
    "\u278B",
    "\u278C",
    "\u278D",
    "\u278E",
    "\u278F",
    "\u2790",
    "\u2791",
    "\u2792",
    "\u2793",
    "\u2794",
    "\u2192",
    "\u2194",
    "\u2195",
    "\u2798",
    "\u2799",
    "\u279A",
    "\u279B",
    "\u279C",
    "\u279D",
    "\u279E",
    "\u279F",
    "\u27A0",
    "\u27A1",
    "\u27A2",
    "\u27A3",
    "\u27A4",
    "\u27A5",
    "\u27A6",
    "\u27A7",
    "\u27A8",
    "\u27A9",
    "\u27AA",
    "\u27AB",
    "\u27AC",
    "\u27AD",
    "\u27AE",
    "\u27AF",
    "\u00F0",
    "\u27B1",
    "\u27B2",
    "\u27B3",
    "\u27B4",
    "\u27B5",
    "\u27B6",
    "\u27B7",
    "\u27B8",
    "\u27B9",
    "\u27BA",
    "\u27BB",
    "\u27BC",
    "\u27BD",
    "\u27BE",
    "\u00FF",
 ]
 assert len(_zapfding_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/init.py
@@ -0,0 +1,86 @@
 # Copyright (c) 2023, exiledkingcc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 from pypdf._crypt_providers._base import CryptBase, CryptIdentity
 try:
    from pypdf._crypt_providers._cryptography import (
        CryptAES,
        CryptRC4,
        aes_cbc_decrypt,
        aes_cbc_encrypt,
        aes_ecb_decrypt,
        aes_ecb_encrypt,
        crypt_provider,
        rc4_decrypt,
        rc4_encrypt,
    )
    from pypdf._utils import Version
    if Version(crypt_provider[1]) <= Version("3.0"):
        # This is due to the backend parameter being required back then:
        # https://cryptography.io/en/latest/changelog/#v3-1
        raise ImportError("cryptography<=3.0 is not supported")  # pragma: no cover
 except ImportError:
    try:
        from pypdf._crypt_providers._pycryptodome import (  # type: ignore
            CryptAES,
            CryptRC4,
            aes_cbc_decrypt,
            aes_cbc_encrypt,
            aes_ecb_decrypt,
            aes_ecb_encrypt,
            crypt_provider,
            rc4_decrypt,
            rc4_encrypt,
        )
    except ImportError:
        from pypdf._crypt_providers._fallback import (  # type: ignore
            CryptAES,
            CryptRC4,
            aes_cbc_decrypt,
            aes_cbc_encrypt,
            aes_ecb_decrypt,
            aes_ecb_encrypt,
            crypt_provider,
            rc4_decrypt,
            rc4_encrypt,
        )
 __all__ = [
    "CryptAES",
    "CryptBase",
    "CryptIdentity",
    "CryptRC4",
    "aes_cbc_decrypt",
    "aes_cbc_encrypt",
    "aes_ecb_decrypt",
    "aes_ecb_encrypt",
    "crypt_provider",
    "rc4_decrypt",
    "rc4_encrypt",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_base.py
@@ -0,0 +1,38 @@
 # Copyright (c) 2023, exiledkingcc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 class CryptBase:
    def encrypt(self, data: bytes) -> bytes:  # pragma: no cover
        return data
    def decrypt(self, data: bytes) -> bytes:  # pragma: no cover
        return data
 class CryptIdentity(CryptBase):
    pass
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_cryptography.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_cryptography.py
@@ -0,0 +1,118 @@
 # Copyright (c) 2023, exiledkingcc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 import secrets
 from cryptography import __version__
 from cryptography.hazmat.primitives import padding
 from cryptography.hazmat.primitives.ciphers.algorithms import AES
 try:
    # 43.0.0 - https://cryptography.io/en/latest/changelog/#v43-0-0
    from cryptography.hazmat.decrepit.ciphers.algorithms import ARC4
 except ImportError:
    from cryptography.hazmat.primitives.ciphers.algorithms import ARC4
 from cryptography.hazmat.primitives.ciphers.base import Cipher
 from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB
 from pypdf._crypt_providers._base import CryptBase
 crypt_provider = ("cryptography", __version__)
 class CryptRC4(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.cipher = Cipher(ARC4(key), mode=None)
    def encrypt(self, data: bytes) -> bytes:
        encryptor = self.cipher.encryptor()
        return encryptor.update(data) + encryptor.finalize()
    def decrypt(self, data: bytes) -> bytes:
        decryptor = self.cipher.decryptor()
        return decryptor.update(data) + decryptor.finalize()
 class CryptAES(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.alg = AES(key)
    def encrypt(self, data: bytes) -> bytes:
        iv = secrets.token_bytes(16)
        pad = padding.PKCS7(128).padder()
        data = pad.update(data) + pad.finalize()
        cipher = Cipher(self.alg, CBC(iv))
        encryptor = cipher.encryptor()
        return iv + encryptor.update(data) + encryptor.finalize()
    def decrypt(self, data: bytes) -> bytes:
        iv = data[:16]
        data = data[16:]
        # for empty encrypted data
        if not data:
            return data
        # just for robustness, it does not happen under normal circumstances
        if len(data) % 16 != 0:
            pad = padding.PKCS7(128).padder()
            data = pad.update(data) + pad.finalize()
        cipher = Cipher(self.alg, CBC(iv))
        decryptor = cipher.decryptor()
        d = decryptor.update(data) + decryptor.finalize()
        return d[: -d[-1]]
 def rc4_encrypt(key: bytes, data: bytes) -> bytes:
    encryptor = Cipher(ARC4(key), mode=None).encryptor()
    return encryptor.update(data) + encryptor.finalize()
 def rc4_decrypt(key: bytes, data: bytes) -> bytes:
    decryptor = Cipher(ARC4(key), mode=None).decryptor()
    return decryptor.update(data) + decryptor.finalize()
 def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
    encryptor = Cipher(AES(key), mode=ECB()).encryptor()
    return encryptor.update(data) + encryptor.finalize()
 def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
    decryptor = Cipher(AES(key), mode=ECB()).decryptor()
    return decryptor.update(data) + decryptor.finalize()
 def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    encryptor = Cipher(AES(key), mode=CBC(iv)).encryptor()
    return encryptor.update(data) + encryptor.finalize()
 def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    decryptor = Cipher(AES(key), mode=CBC(iv)).decryptor()
    return decryptor.update(data) + decryptor.finalize()
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_fallback.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_fallback.py
@@ -0,0 +1,93 @@
 # Copyright (c) 2023, exiledkingcc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 from pypdf._crypt_providers._base import CryptBase
 from pypdf.errors import DependencyError
 _DEPENDENCY_ERROR_STR = "cryptography>=3.1 is required for AES algorithm"
 crypt_provider = ("local_crypt_fallback", "0.0.0")
 class CryptRC4(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.s = bytearray(range(256))
        j = 0
        for i in range(256):
            j = (j + self.s[i] + key[i % len(key)]) % 256
            self.s[i], self.s[j] = self.s[j], self.s[i]
    def encrypt(self, data: bytes) -> bytes:
        s = bytearray(self.s)
        out = [0 for _ in range(len(data))]
        i, j = 0, 0
        for k in range(len(data)):
            i = (i + 1) % 256
            j = (j + s[i]) % 256
            s[i], s[j] = s[j], s[i]
            x = s[(s[i] + s[j]) % 256]
            out[k] = data[k] ^ x
        return bytes(out)
    def decrypt(self, data: bytes) -> bytes:
        return self.encrypt(data)
 class CryptAES(CryptBase):
    def __init__(self, key: bytes) -> None:
        pass
    def encrypt(self, data: bytes) -> bytes:
        raise DependencyError(_DEPENDENCY_ERROR_STR)
    def decrypt(self, data: bytes) -> bytes:
        raise DependencyError(_DEPENDENCY_ERROR_STR)
 def rc4_encrypt(key: bytes, data: bytes) -> bytes:
    return CryptRC4(key).encrypt(data)
 def rc4_decrypt(key: bytes, data: bytes) -> bytes:
    return CryptRC4(key).decrypt(data)
 def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)
 def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)
 def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)
 def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_pycryptodome.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_pycryptodome.py
@@ -0,0 +1,97 @@
 # Copyright (c) 2023, exiledkingcc
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 import secrets
 from Crypto import __version__
 from Crypto.Cipher import AES, ARC4
 from Crypto.Util.Padding import pad
 from pypdf._crypt_providers._base import CryptBase
 crypt_provider = ("pycryptodome", __version__)
 class CryptRC4(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.key = key
    def encrypt(self, data: bytes) -> bytes:
        return ARC4.ARC4Cipher(self.key).encrypt(data)
    def decrypt(self, data: bytes) -> bytes:
        return ARC4.ARC4Cipher(self.key).decrypt(data)
 class CryptAES(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.key = key
    def encrypt(self, data: bytes) -> bytes:
        iv = secrets.token_bytes(16)
        data = pad(data, 16)
        aes = AES.new(self.key, AES.MODE_CBC, iv)
        return iv + aes.encrypt(data)
    def decrypt(self, data: bytes) -> bytes:
        iv = data[:16]
        data = data[16:]
        # for empty encrypted data
        if not data:
            return data
        # just for robustness, it does not happen under normal circumstances
        if len(data) % 16 != 0:
            data = pad(data, 16)
        aes = AES.new(self.key, AES.MODE_CBC, iv)
        d = aes.decrypt(data)
        return d[: -d[-1]]
 def rc4_encrypt(key: bytes, data: bytes) -> bytes:
    return ARC4.ARC4Cipher(key).encrypt(data)
 def rc4_decrypt(key: bytes, data: bytes) -> bytes:
    return ARC4.ARC4Cipher(key).decrypt(data)
 def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_ECB).encrypt(data)
 def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_ECB).decrypt(data)
 def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_CBC, iv).encrypt(data)
 def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_CBC, iv).decrypt(data)
--- a/venv/lib/python3.12/site-packages/pypdf/_doc_common.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_doc_common.py
--- a/venv/lib/python3.12/site-packages/pypdf/_encryption.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_encryption.py
--- a/venv/lib/python3.12/site-packages/pypdf/_font.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_font.py
@@ -0,0 +1,327 @@
 from collections.abc import Sequence
 from dataclasses import dataclass, field
 from typing import Any, Optional, Union, cast
 from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject
 from ._cmap import get_encoding
 from ._codecs.adobe_glyphs import adobe_glyphs
 from ._utils import logger_warning
@dataclass(frozen=True)
 class FontDescriptor:
    """
    Represents the FontDescriptor dictionary as defined in the PDF specification.
    This contains both descriptive and metric information.
    The defaults are derived from the mean values of the 14 core fonts, rounded
    to 100.
    """
    name: str = "Unknown"
    family: str = "Unknown"
    weight: str = "Unknown"
    ascent: float = 700.0
    descent: float = -200.0
    cap_height: float = 600.0
    x_height: float = 500.0
    italic_angle: float = 0.0  # Non-italic
    flags: int = 32  # Non-serif, non-symbolic, not fixed width
    bbox: tuple[float, float, float, float] = field(default_factory=lambda: (-100.0, -200.0, 1000.0, 900.0))
    character_widths: dict[str, int] = field(default_factory=lambda: {"default": 500})
    @staticmethod
    def _parse_font_descriptor(font_kwargs: dict[str, Any], font_descriptor_obj: DictionaryObject) -> dict[str, Any]:
        font_descriptor_dict: DictionaryObject = (
            font_descriptor_obj.get_object()
            if isinstance(font_descriptor_obj, IndirectObject)
            else font_descriptor_obj
        )
        for source_key, target_key in [
            ("/FontName", "name"),
            ("/FontFamily", "family"),
            ("/FontWeight", "weight"),
            ("/Ascent", "ascent"),
            ("/Descent", "descent"),
            ("/CapHeight", "cap_height"),
            ("/XHeight", "x_height"),
            ("/ItalicAngle", "italic_angle"),
            ("/Flags", "flags"),
            ("/FontBBox", "bbox")
        ]:
            if source_key in font_descriptor_dict:
                font_kwargs[target_key] = font_descriptor_dict[source_key]
        # Handle missing bbox gracefully - PDFs may have fonts without valid bounding boxes
        if "bbox" in font_kwargs:
            bbox_tuple = tuple(map(float, font_kwargs["bbox"]))
            assert len(bbox_tuple) == 4, bbox_tuple
            font_kwargs["bbox"] = bbox_tuple
        return font_kwargs
    @staticmethod
    def _collect_tt_t1_character_widths(
        pdf_font_dict: DictionaryObject,
        char_map: dict[Any, Any],
        encoding: Union[str, dict[int, str]],
        current_widths: dict[str, int]
    ) -> None:
        """Parses a TrueType or Type1 font's /Widths array from a font dictionary and updates character widths"""
        widths_array = cast(ArrayObject, pdf_font_dict["/Widths"])
        first_char = pdf_font_dict.get("/FirstChar", 0)
        if not isinstance(encoding, str):
            # This means that encoding is a dict
            current_widths.update({
                encoding.get(idx + first_char, chr(idx + first_char)): width
                for idx, width in enumerate(widths_array)
            })
            return
        # We map the character code directly to the character
        # using the string encoding
        for idx, width in enumerate(widths_array):
            # Often "idx == 0" will denote the .notdef character, but we add it anyway
            char_code = idx + first_char  # This is a raw code
            # Get the "raw" character or byte representation
            raw_char = bytes([char_code]).decode(encoding, "surrogatepass")
            # Translate raw_char to the REAL Unicode character using the char_map
            unicode_char = char_map.get(raw_char)
            if unicode_char:
                current_widths[unicode_char] = int(width)
            else:
                current_widths[raw_char] = int(width)
    @staticmethod
    def _collect_cid_character_widths(
        d_font: DictionaryObject, char_map: dict[Any, Any], current_widths: dict[str, int]
    ) -> None:
        """Parses the /W array from a DescendantFont dictionary and updates character widths."""
        ord_map = {
            ord(_target): _surrogate
            for _target, _surrogate in char_map.items()
            if isinstance(_target, str)
        }
        # /W width definitions have two valid formats which can be mixed and matched:
        #   (1) A character start index followed by a list of widths, e.g.
        #       `45 [500 600 700]` applies widths 500, 600, 700 to characters 45-47.
        #   (2) A character start index, a character stop index, and a width, e.g.
        #       `45 65 500` applies width 500 to characters 45-65.
        skip_count = 0
        _w = d_font.get("/W", [])
        for idx, w_entry in enumerate(_w):
            w_entry = w_entry.get_object()
            if skip_count:
                skip_count -= 1
                continue
            if not isinstance(w_entry, (int, float)):
                # We should never get here due to skip_count above. But
                # sometimes we do.
                logger_warning(f"Expected numeric value for width, got {w_entry}. Ignoring it.", __name__)
                continue
            # check for format (1): `int [int int int int ...]`
            w_next_entry = _w[idx + 1].get_object()
            if isinstance(w_next_entry, Sequence):
                start_idx, width_list = w_entry, w_next_entry
                current_widths.update(
                    {
                        ord_map[_cidx]: _width
                        for _cidx, _width in zip(
                            range(
                                cast(int, start_idx),
                                cast(int, start_idx) + len(width_list),
                                1,
                            ),
                            width_list,
                        )
                        if _cidx in ord_map
                    }
                )
                skip_count = 1
            # check for format (2): `int int int`
            elif isinstance(w_next_entry, (int, float)) and isinstance(
                _w[idx + 2].get_object(), (int, float)
            ):
                start_idx, stop_idx, const_width = (
                    w_entry,
                    w_next_entry,
                    _w[idx + 2].get_object(),
                )
                current_widths.update(
                    {
                        ord_map[_cidx]: const_width
                        for _cidx in range(
                            cast(int, start_idx), cast(int, stop_idx + 1), 1
                        )
                        if _cidx in ord_map
                    }
                )
                skip_count = 2
            else:
                # This handles the case of out of bounds (reaching the end of the width definitions
                # while expecting more elements).
                logger_warning(
                    f"Invalid font width definition. Last element: {w_entry}.",
                    __name__
                )
    @staticmethod
    def _add_default_width(current_widths: dict[str, int]) -> None:
        if not current_widths:
            current_widths["default"] = 500
            return
        if "default" in current_widths:
            return
        if " " in current_widths and current_widths[" "] != 0:
            # Setting default to twice the space width
            current_widths["default"] = int(2 * current_widths[" "])
            return
        # Use the average width of existing glyph widths
        valid_widths = [w for w in current_widths.values() if w > 0]
        current_widths["default"] = sum(valid_widths) // len(valid_widths) if valid_widths else 500
    @classmethod
    def from_font_resource(
        cls,
        pdf_font_dict: DictionaryObject,
        encoding: Optional[Union[str, dict[int, str]]] = None,
        char_map: Optional[dict[Any, Any]] = None
    ) -> "FontDescriptor":
        from pypdf._codecs.core_fontmetrics import CORE_FONT_METRICS  # noqa: PLC0415
        # Prioritize information from the PDF font dictionary
        font_name = pdf_font_dict.get("/BaseFont", "Unknown").removeprefix("/")
        font_kwargs: dict[str, Any] = {"character_widths": {}}
        # Deal with fonts by type; Type1, TrueType and certain Type3
        if pdf_font_dict.get("/Subtype") in ("/Type1", "/MMType1", "/TrueType", "/Type3"):
            if "/Widths" in pdf_font_dict:
                if not (encoding and char_map):
                    encoding, char_map = get_encoding(pdf_font_dict)
                cls._collect_tt_t1_character_widths(
                    pdf_font_dict, char_map, encoding, font_kwargs["character_widths"]
                )
            elif font_name in CORE_FONT_METRICS:
                font_descriptor = CORE_FONT_METRICS[font_name]
                cls._add_default_width(font_descriptor.character_widths)
                return font_descriptor
            if "/FontDescriptor" in pdf_font_dict:  # TODO: This does not account for some Type3 fonts;
                                                    #       see tests/test_cmap.py::test_ascii_charset
                font_descriptor_resource = pdf_font_dict.get("/FontDescriptor", DictionaryObject()).get_object()
                font_descriptor_obj = cast(DictionaryObject, font_descriptor_resource)
                if "/MissingWidth" in font_descriptor_obj:
                    font_kwargs["character_widths"]["default"] = font_descriptor_obj["/MissingWidth"].get_object()
                font_kwargs = cls._parse_font_descriptor(
                    font_kwargs, pdf_font_dict.get("/FontDescriptor", DictionaryObject())
                )
            if "default" not in font_kwargs["character_widths"]:
                cls._add_default_width(font_kwargs["character_widths"])
            return cls(**font_kwargs)
        # Composite font or CID font - CID fonts have a /W array mapping character codes
        # to widths stashed in /DescendantFonts. No need to test for /DescendantFonts though,
        # because all other fonts have already been dealt with.
        if not (encoding and char_map):
            encoding, char_map = get_encoding(pdf_font_dict)
        d_font: DictionaryObject
        for d_font_idx, d_font in enumerate(
            cast(ArrayObject, pdf_font_dict["/DescendantFonts"])
        ):
            d_font = cast(DictionaryObject, d_font.get_object())
            cast(ArrayObject, pdf_font_dict["/DescendantFonts"])[d_font_idx] = d_font
            cls._collect_cid_character_widths(
                d_font, char_map, font_kwargs["character_widths"]
            )
            if "/DW" in d_font:
                font_kwargs["character_widths"]["default"] = d_font["/DW"].get_object()
            else:
                cls._add_default_width(font_kwargs["character_widths"])
            font_kwargs = cls._parse_font_descriptor(
                font_kwargs, d_font.get("/FontDescriptor", DictionaryObject())
            )
        return cls(**font_kwargs)
@dataclass
 class Font:
    """
    A font object for use during text extraction and for producing
    text appearance streams.
    Attributes:
        name: Font name, derived from font["/BaseFont"]
        character_map: The font's character map
        encoding: Font encoding
        sub_type: The font type, such as Type1, TrueType, or Type3.
        font_descriptor: Font metrics, including a mapping of characters to widths
        character_widths: A mapping of characters to widths
        space_width: The width of a space, or an approximation
        interpretable: Default True. If False, the font glyphs cannot
            be translated to characters, e.g. Type3 fonts that do not define
            a '/ToUnicode' mapping.
    """
    name: str
    encoding: Union[str, dict[int, str]]
    character_map: dict[Any, Any] = field(default_factory=dict)
    sub_type: str = "Unknown"
    font_descriptor: FontDescriptor = field(default_factory=FontDescriptor)
    character_widths: dict[str, int] = field(default_factory=dict)
    space_width: Union[float, int] = 250
    interpretable: bool = True
    @classmethod
    def from_font_resource(
        cls,
        pdf_font_dict: DictionaryObject,
    ) -> "Font":
        # Can collect base_font, name and encoding directly from font resource
        name = pdf_font_dict.get("/BaseFont", "Unknown").removeprefix("/")
        sub_type = pdf_font_dict.get("/Subtype", "Unknown").removeprefix("/")
        encoding, character_map = get_encoding(pdf_font_dict)
        # Type3 fonts that do not specify a "/ToUnicode" mapping cannot be
        # reliably converted into character codes unless all named chars
        # in /CharProcs map to a standard adobe glyph. See §9.10.2 of the
        # PDF 1.7 standard.
        interpretable = True
        if sub_type == "Type3" and "/ToUnicode" not in pdf_font_dict:
            interpretable = all(
                cname in adobe_glyphs
                for cname in pdf_font_dict.get("/CharProcs") or []
            )
        if interpretable:
            font_descriptor = FontDescriptor.from_font_resource(pdf_font_dict, encoding, character_map)
        else:
            font_descriptor = FontDescriptor()  # Save some overhead if font is not interpretable
        character_widths = font_descriptor.character_widths
        space_width = font_descriptor.character_widths.get(" ")
        if not space_width or space_width == 0:
            space_width = font_descriptor.character_widths["default"] // 2
        return cls(
            name=name,
            sub_type=sub_type,
            encoding=encoding,
            font_descriptor=font_descriptor,
            character_map=character_map,
            character_widths=character_widths,
            space_width=space_width,
            interpretable=interpretable
        )
    def text_width(self, text: str = "") -> float:
        """Sum of character widths specified in PDF font for the supplied text."""
        return sum(
            [self.character_widths.get(char, self.character_widths["default"]) for char in text], 0.0
        )
--- a/venv/lib/python3.12/site-packages/pypdf/_page.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_page.py
--- a/venv/lib/python3.12/site-packages/pypdf/_page_labels.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_page_labels.py
@@ -0,0 +1,289 @@
 """
 Page labels are shown by PDF viewers as "the page number".
 A page has a numeric index, starting at 0. Additionally, the page
 has a label. In the most simple case:
    label = index + 1
 However, the title page and the table of contents might have Roman numerals as
 page labels. This makes things more complicated.
 Example 1
 ---------
 >>> reader.root_object["/PageLabels"]["/Nums"]
 [0, IndirectObject(18, 0, 139929798197504),
 8, IndirectObject(19, 0, 139929798197504)]
 >>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
 {'/S': '/r'}
 >>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
 {'/S': '/D'}
 Example 2
 ---------
 The following is a document with pages labeled
 i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...
 1 0 obj
    << /Type /Catalog
       /PageLabels << /Nums [
                        0 << /S /r >>
                        4 << /S /D >>
                        7 << /S /D
                             /P ( A- )
                             /St 8
                        >>
                        % A number tree containing
                        % three page label dictionaries
                        ]
                   >>
    ...
    >>
 endobj
 §12.4.2 PDF Specification 1.7 and 2.0
 =====================================
 Entries in a page label dictionary
 ----------------------------------
 The /S key:
 D       Decimal Arabic numerals
 R       Uppercase Roman numerals
 r       Lowercase Roman numerals
 A       Uppercase letters (A to Z for the first 26 pages,
                           AA to ZZ for the next 26, and so on)
 a       Lowercase letters (a to z for the first 26 pages,
                           aa to zz for the next 26, and so on)
 """
 from collections.abc import Iterator
 from typing import Optional, cast
 from ._protocols import PdfCommonDocProtocol
 from ._utils import logger_warning
 from .generic import (
    ArrayObject,
    DictionaryObject,
    NullObject,
    NumberObject,
    is_null_or_none,
 )
 def number2uppercase_roman_numeral(num: int) -> str:
    roman = [
        (1000, "M"),
        (900, "CM"),
        (500, "D"),
        (400, "CD"),
        (100, "C"),
        (90, "XC"),
        (50, "L"),
        (40, "XL"),
        (10, "X"),
        (9, "IX"),
        (5, "V"),
        (4, "IV"),
        (1, "I"),
    ]
    def roman_num(num: int) -> Iterator[str]:
        for decimal, roman_repr in roman:
            x, _ = divmod(num, decimal)
            yield roman_repr * x
            num -= decimal * x
            if num <= 0:
                break
    return "".join(list(roman_num(num)))
 def number2lowercase_roman_numeral(number: int) -> str:
    return number2uppercase_roman_numeral(number).lower()
 def number2uppercase_letter(number: int) -> str:
    if number <= 0:
        raise ValueError("Expecting a positive number")
    alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
    rep = ""
    while number > 0:
        remainder = number % 26
        if remainder == 0:
            remainder = 26
        rep = alphabet[remainder - 1] + rep
        # update
        number -= remainder
        number = number // 26
    return rep
 def number2lowercase_letter(number: int) -> str:
    return number2uppercase_letter(number).lower()
 def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
    # [Nums] shall be an array of the form
    #   [ key_1 value_1 key_2 value_2 ... key_n value_n ]
    # where each key_i is an integer and the corresponding
    # value_i shall be the object associated with that key.
    # The keys shall be sorted in numerical order,
    # analogously to the arrangement of keys in a name tree
    # as described in 7.9.6, "Name Trees."
    nums = cast(ArrayObject, dictionary_object["/Nums"])
    i = 0
    value = None
    start_index = 0
    while i < len(nums):
        start_index = nums[i]
        value = nums[i + 1].get_object()
        if i + 2 == len(nums):
            break
        if nums[i + 2] > index:
            break
        i += 2
    m = {
        None: lambda _: "",
        "/D": lambda n: str(n),
        "/R": number2uppercase_roman_numeral,
        "/r": number2lowercase_roman_numeral,
        "/A": number2uppercase_letter,
        "/a": number2lowercase_letter,
    }
    # if /Nums array is not following the specification or if /Nums is empty
    if not isinstance(value, dict):
        return str(index + 1)  # Fallback
    start = value.get("/St", 1)
    prefix = value.get("/P", "")
    return prefix + m[value.get("/S")](index - start_index + start)
 def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
    """
    See 7.9.7 "Number Trees".
    Args:
        reader: The PdfReader
        index: The index of the page
    Returns:
        The label of the page, e.g. "iv" or "4".
    """
    root = cast(DictionaryObject, reader.root_object)
    if "/PageLabels" not in root:
        return str(index + 1)  # Fallback
    number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
    if "/Nums" in number_tree:
        return get_label_from_nums(number_tree, index)
    if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
        # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
        # Limit maximum depth.
        level = 0
        while level < 100:
            kids = cast(list[DictionaryObject], number_tree["/Kids"])
            for kid in kids:
                # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
                limits = cast(list[int], kid["/Limits"])
                if limits[0] <= index <= limits[1]:
                    if not is_null_or_none(kid.get("/Kids", None)):
                        # Recursive definition.
                        level += 1
                        if level == 100:  # pragma: no cover
                            raise NotImplementedError(
                                "Too deep nesting is not supported."
                            )
                        number_tree = kid
                        # Exit the inner `for` loop and continue at the next level with the
                        # next iteration of the `while` loop.
                        break
                    return get_label_from_nums(kid, index)
            else:
                # When there are no kids, make sure to exit the `while` loop directly
                # and continue with the fallback.
                break
    logger_warning(f"Could not reliably determine page label for {index}.", __name__)
    return str(index + 1)  # Fallback if neither /Nums nor /Kids is in the number_tree
 def nums_insert(
    key: NumberObject,
    value: DictionaryObject,
    nums: ArrayObject,
 ) -> None:
    """
    Insert a key, value pair in a Nums array.
    See 7.9.7 "Number Trees".
    Args:
        key: number key of the entry
        value: value of the entry
        nums: Nums array to modify
    """
    if len(nums) % 2 != 0:
        raise ValueError("A nums like array must have an even number of elements")
    i = len(nums)
    while i != 0 and key <= nums[i - 2]:
        i = i - 2
    if i < len(nums) and key == nums[i]:
        nums[i + 1] = value
    else:
        nums.insert(i, key)
        nums.insert(i + 1, value)
 def nums_clear_range(
    key: NumberObject,
    page_index_to: int,
    nums: ArrayObject,
 ) -> None:
    """
    Remove all entries in a number tree in a range after an entry.
    See 7.9.7 "Number Trees".
    Args:
        key: number key of the entry before the range
        page_index_to: The page index of the upper limit of the range
        nums: Nums array to modify
    """
    if len(nums) % 2 != 0:
        raise ValueError("A nums like array must have an even number of elements")
    if page_index_to < key:
        raise ValueError("page_index_to must be greater or equal than key")
    i = nums.index(key) + 2
    while i < len(nums) and nums[i] <= page_index_to:
        nums.pop(i)
        nums.pop(i)
 def nums_next(
    key: NumberObject,
    nums: ArrayObject,
 ) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]:
    """
    Return the (key, value) pair of the entry after the given one.
    See 7.9.7 "Number Trees".
    Args:
        key: number key of the entry
        nums: Nums array
    """
    if len(nums) % 2 != 0:
        raise ValueError("A nums like array must have an even number of elements")
    i = nums.index(key) + 2
    if i < len(nums):
        return (nums[i], nums[i + 1])
    return (None, None)
--- a/venv/lib/python3.12/site-packages/pypdf/_protocols.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_protocols.py
@@ -0,0 +1,86 @@
 """Helpers for working with PDF types."""
 from abc import abstractmethod
 from pathlib import Path
 from typing import IO, Any, Optional, Protocol, Union
 from ._utils import StrByteType, StreamType
 class PdfObjectProtocol(Protocol):
    indirect_reference: Any
    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Union[tuple[str, ...], list[str], None] = (),
    ) -> Any:
        ...  # pragma: no cover
    def _reference_clone(self, clone: Any, pdf_dest: Any) -> Any:
        ...  # pragma: no cover
    def get_object(self) -> Optional["PdfObjectProtocol"]:
        ...  # pragma: no cover
    def hash_value(self) -> bytes:
        ...  # pragma: no cover
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        ...  # pragma: no cover
 class XmpInformationProtocol(PdfObjectProtocol):
    pass
 class PdfCommonDocProtocol(Protocol):
    @property
    def pdf_header(self) -> str:
        ...  # pragma: no cover
    @property
    def pages(self) -> list[Any]:
        ...  # pragma: no cover
    @property
    def root_object(self) -> PdfObjectProtocol:
        ...  # pragma: no cover
    def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
        ...  # pragma: no cover
    @property
    def strict(self) -> bool:
        ...  # pragma: no cover
 class PdfReaderProtocol(PdfCommonDocProtocol, Protocol):
    @property
    @abstractmethod
    def xref(self) -> dict[int, dict[int, Any]]:
        ...  # pragma: no cover
    @property
    @abstractmethod
    def trailer(self) -> dict[str, Any]:
        ...  # pragma: no cover
 class PdfWriterProtocol(PdfCommonDocProtocol, Protocol):
    _objects: list[Any]
    _id_translated: dict[int, dict[int, int]]
    incremental: bool
    _reader: Any  # PdfReader
    @abstractmethod
    def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]:
        ...  # pragma: no cover
    @abstractmethod
    def _add_object(self, obj: Any) -> Any:
        ...  # pragma: no cover
--- a/venv/lib/python3.12/site-packages/pypdf/_reader.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_reader.py
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/init.py
@@ -0,0 +1,245 @@
 """
 Code related to text extraction.
 Some parts are still in _page.py. In doubt, they will stay there.
 """
 import math
 from typing import Any, Callable, Optional, Union
 from .._font import Font
 from ..generic import DictionaryObject, TextStringObject, encode_pdfdocencoding
 CUSTOM_RTL_MIN: int = -1
 CUSTOM_RTL_MAX: int = -1
 CUSTOM_RTL_SPECIAL_CHARS: list[int] = []
 LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS: int = 5
 class OrientationNotFoundError(Exception):
    pass
 def set_custom_rtl(
    _min: Union[str, int, None] = None,
    _max: Union[str, int, None] = None,
    specials: Union[str, list[int], None] = None,
 ) -> tuple[int, int, list[int]]:
    """
    Change the Right-To-Left and special characters custom parameters.
    Args:
        _min: The new minimum value for the range of custom characters that
            will be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        _max: The new maximum value for the range of custom characters that will
            be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        specials: The new list of special characters to be inserted in the
            current insertion order.
            If set to ``None``, the current value will not be changed.
            If set to a string, it will be converted to a list of ASCII codes.
            The default value is an empty list.
    Returns:
        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.
    """
    global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
    if isinstance(_min, int):
        CUSTOM_RTL_MIN = _min
    elif isinstance(_min, str):
        CUSTOM_RTL_MIN = ord(_min)
    if isinstance(_max, int):
        CUSTOM_RTL_MAX = _max
    elif isinstance(_max, str):
        CUSTOM_RTL_MAX = ord(_max)
    if isinstance(specials, str):
        CUSTOM_RTL_SPECIAL_CHARS = [ord(x) for x in specials]
    elif isinstance(specials, list):
        CUSTOM_RTL_SPECIAL_CHARS = specials
    return CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
 def mult(m: list[float], n: list[float]) -> list[float]:
    return [
        m[0] * n[0] + m[1] * n[2],
        m[0] * n[1] + m[1] * n[3],
        m[2] * n[0] + m[3] * n[2],
        m[2] * n[1] + m[3] * n[3],
        m[4] * n[0] + m[5] * n[2] + n[4],
        m[4] * n[1] + m[5] * n[3] + n[5],
    ]
 def orient(m: list[float]) -> int:
    if m[3] > 1e-6:
        return 0
    if m[3] < -1e-6:
        return 180
    if m[1] > 0:
        return 90
    return 270
 def crlf_space_check(
    text: str,
    cmtm_prev: tuple[list[float], list[float]],
    cmtm_matrix: tuple[list[float], list[float]],
    memo_cmtm: tuple[list[float], list[float]],
    font_resource: Optional[DictionaryObject],
    orientations: tuple[int, ...],
    output: str,
    font_size: float,
    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
    str_widths: float,
    spacewidth: float,
    str_height: float,
 ) -> tuple[str, str, list[float], list[float]]:
    cm_prev = cmtm_prev[0]
    tm_prev = cmtm_prev[1]
    cm_matrix = cmtm_matrix[0]
    tm_matrix = cmtm_matrix[1]
    memo_cm = memo_cmtm[0]
    memo_tm = memo_cmtm[1]
    m_prev = mult(tm_prev, cm_prev)
    m = mult(tm_matrix, cm_matrix)
    orientation = orient(m)
    delta_x = m[4] - m_prev[4]
    delta_y = m[5] - m_prev[5]
    # Table 108 of the 1.7 reference ("Text positioning operators")
    scale_prev_x = math.sqrt(tm_prev[0]**2 + tm_prev[1]**2)
    scale_prev_y = math.sqrt(tm_prev[2]**2 + tm_prev[3]**2)
    scale_y = math.sqrt(tm_matrix[2]**2 + tm_matrix[3]**2)
    cm_prev = m
    if orientation not in orientations:
        raise OrientationNotFoundError
    if orientation in (0, 180):
        moved_height: float = delta_y
        moved_width: float = delta_x
    elif orientation in (90, 270):
        moved_height = delta_x
        moved_width = delta_y
    try:
        if abs(moved_height) > 0.8 * min(str_height * scale_prev_y, font_size * scale_y):
            if (output + text)[-1] != "\n":
                output += text + "\n"
                if visitor_text is not None:
                    visitor_text(
                        text + "\n",
                        memo_cm,
                        memo_tm,
                        font_resource,
                        font_size,
                    )
                text = ""
        elif (
            (moved_width >= (spacewidth + str_widths) * scale_prev_x)
            and (output + text)[-1] != " "
        ):
            text += " "
    except Exception:
        pass
    tm_prev = tm_matrix.copy()
    cm_prev = cm_matrix.copy()
    return text, output, cm_prev, tm_prev
 def get_text_operands(
    operands: list[Union[str, TextStringObject]],
    cm_matrix: list[float],
    tm_matrix: list[float],
    font: Font,
    orientations: tuple[int, ...]
 ) -> tuple[str, bool]:
    t: str = ""
    is_str_operands = False
    m = mult(tm_matrix, cm_matrix)
    orientation = orient(m)
    if orientation in orientations and len(operands) > 0:
        if isinstance(operands[0], str):
            t = operands[0]
            is_str_operands = True
        else:
            t = ""
            tt: bytes = (
                encode_pdfdocencoding(operands[0])
                if isinstance(operands[0], str)
                else operands[0]
            )
            if isinstance(font.encoding, str):
                try:
                    t = tt.decode(font.encoding, "surrogatepass")  # apply str encoding
                except Exception:
                    # the data does not match the expectation,
                    # we use the alternative ;
                    # text extraction may not be good
                    t = tt.decode(
                        "utf-16-be" if font.encoding == "charmap" else "charmap",
                        "surrogatepass",
                    )  # apply str encoding
            else:  # apply dict encoding
                t = "".join(
                    [font.encoding[x] if x in font.encoding else bytes((x,)).decode() for x in tt]
                )
    return (t, is_str_operands)
 def get_display_str(
    text: str,
    cm_matrix: list[float],
    tm_matrix: list[float],
    font_resource: Optional[DictionaryObject],
    font: Font,
    text_operands: str,
    font_size: float,
    rtl_dir: bool,
    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]]
 ) -> tuple[str, bool, float]:
    # "\u0590 - \u08FF \uFB50 - \uFDFF"
    widths: float = 0.0
    for x in [font.character_map.get(x, x) for x in text_operands]:
        # x can be a sequence of bytes ; ex: habibi.pdf
        if len(x) == 1:
            xx = ord(x)
        else:
            xx = 1
        # fmt: off
        if (
            # cases where the current inserting order is kept
            (xx <= 0x2F)                        # punctuations but...
            or 0x3A <= xx <= 0x40               # numbers (x30-39)
            or 0x2000 <= xx <= 0x206F           # upper punctuations..
            or 0x20A0 <= xx <= 0x21FF           # but (numbers) indices/exponents
            or xx in CUSTOM_RTL_SPECIAL_CHARS   # customized....
        ):
            text = x + text if rtl_dir else text + x
        elif (  # right-to-left characters set
            0x0590 <= xx <= 0x08FF
            or 0xFB1D <= xx <= 0xFDFF
            or 0xFE70 <= xx <= 0xFEFF
            or CUSTOM_RTL_MIN <= xx <= CUSTOM_RTL_MAX
        ):
            if not rtl_dir:
                rtl_dir = True
                if visitor_text is not None:
                    visitor_text(text, cm_matrix, tm_matrix, font_resource, font_size)
                text = ""
            text = x + text
        else:  # left-to-right
            if rtl_dir:
                rtl_dir = False
                if visitor_text is not None:
                    visitor_text(text, cm_matrix, tm_matrix, font_resource, font_size)
                text = ""
            text = text + x
        widths += font.space_width if x == " " else font.text_width(x)
        # fmt: on
    return text, rtl_dir, widths
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/init.py
@@ -0,0 +1,16 @@
 """Layout mode text extraction extension for pypdf"""
 from ..._font import Font
 from ._fixed_width_page import (
    fixed_char_width,
    fixed_width_page,
    text_show_operations,
    y_coordinate_groups,
 )
 __all__ = [
    "Font",
    "fixed_char_width",
    "fixed_width_page",
    "text_show_operations",
    "y_coordinate_groups",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
@@ -0,0 +1,400 @@
 """Extract PDF text preserving the layout of the source PDF"""
 from collections.abc import Iterator
 from itertools import groupby
 from math import ceil
 from pathlib import Path
 from typing import Any, Literal, Optional, TypedDict
 from ..._font import Font
 from ..._utils import logger_warning
 from .. import LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
 from ._text_state_manager import TextStateManager
 from ._text_state_params import TextStateParams
 class BTGroup(TypedDict):
    """
    Dict describing a line of text rendered within a BT/ET operator pair.
    If multiple text show operations render text on the same line, the text
    will be combined into a single BTGroup dict.
    Keys:
        tx: x coordinate of first character in BTGroup
        ty: y coordinate of first character in BTGroup
        font_size: nominal font size
        font_height: effective font height
        text: rendered text
        displaced_tx: x coordinate of last character in BTGroup
        flip_sort: -1 if page is upside down, else 1
    """
    tx: float
    ty: float
    font_size: float
    font_height: float
    text: str
    displaced_tx: float
    flip_sort: Literal[-1, 1]
 def bt_group(tj_op: TextStateParams, rendered_text: str, dispaced_tx: float) -> BTGroup:
    """
    BTGroup constructed from a TextStateParams instance, rendered text, and
    displaced tx value.
    Args:
        tj_op (TextStateParams): TextStateParams instance
        rendered_text (str): rendered text
        dispaced_tx (float): x coordinate of last character in BTGroup
    """
    return BTGroup(
        tx=tj_op.tx,
        ty=tj_op.ty,
        font_size=tj_op.font_size,
        font_height=tj_op.font_height,
        text=rendered_text,
        displaced_tx=dispaced_tx,
        flip_sort=-1 if tj_op.flip_vertical else 1,
    )
 def recurs_to_target_op(
    ops: Iterator[tuple[list[Any], bytes]],
    text_state_mgr: TextStateManager,
    end_target: Literal[b"Q", b"ET"],
    fonts: dict[str, Font],
    strip_rotated: bool = True,
 ) -> tuple[list[BTGroup], list[TextStateParams]]:
    """
    Recurse operators between BT/ET and/or q/Q operators managing the transform
    stack and capturing text positioning and rendering data.
    Args:
        ops: iterator of operators in content stream
        text_state_mgr: a TextStateManager instance
        end_target: Either b"Q" (ends b"q" op) or b"ET" (ends b"BT" op)
        fonts: font dictionary as returned by PageObject._layout_mode_fonts()
    Returns:
        tuple: list of BTGroup dicts + list of TextStateParams dataclass instances.
    """
    # 1 entry per line of text rendered within each BT/ET operation.
    bt_groups: list[BTGroup] = []
    # 1 entry per text show operator (Tj/TJ/'/")
    tj_ops: list[TextStateParams] = []
    if end_target == b"Q":
        # add new q level. cm's added at this level will be popped at next b'Q'
        text_state_mgr.add_q()
    for operands, op in ops:
        # The loop is broken by the end target, or exits normally when there are no more ops.
        if op == end_target:
            if op == b"Q":
                text_state_mgr.remove_q()
            if op == b"ET":
                if not tj_ops:
                    return bt_groups, tj_ops
                _text = ""
                bt_idx = 0  # idx of first tj in this bt group
                last_displaced_tx = tj_ops[bt_idx].displaced_tx
                last_ty = tj_ops[bt_idx].ty
                for _idx, _tj in enumerate(
                    tj_ops
                ):  # ... build text from new Tj operators
                    if strip_rotated and _tj.rotated:
                        continue
                    if not _tj.font.interpretable:  # generates warning
                        continue
                    # if the y position of the text is greater than the font height, assume
                    # the text is on a new line and start a new group
                    if abs(_tj.ty - last_ty) > _tj.font_height:
                        if _text.strip():
                            bt_groups.append(
                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
                            )
                        bt_idx = _idx
                        _text = ""
                    # if the x position of the text is less than the last x position by
                    # more than 5 spaces widths, assume the text order should be flipped
                    # and start a new group
                    if (
                        last_displaced_tx - _tj.tx
                        > _tj.space_tx * LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
                    ):
                        if _text.strip():
                            bt_groups.append(
                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
                            )
                        bt_idx = _idx
                        last_displaced_tx = _tj.displaced_tx
                        _text = ""
                    # calculate excess x translation based on ending tx of previous Tj.
                    # multiply by bool (_idx != bt_idx) to ensure spaces aren't double
                    # applied to the first tj of a BTGroup in fixed_width_page().
                    excess_tx = round(_tj.tx - last_displaced_tx, 3) * (_idx != bt_idx)
                    # space_tx could be 0 if either Tz or font_size was 0 for this _tj.
                    spaces = int(excess_tx // _tj.space_tx) if _tj.space_tx else 0
                    new_text = f'{" " * spaces}{_tj.txt}'
                    last_ty = _tj.ty
                    _text = f"{_text}{new_text}"
                    last_displaced_tx = _tj.displaced_tx
                if _text:
                    bt_groups.append(bt_group(tj_ops[bt_idx], _text, last_displaced_tx))
                text_state_mgr.reset_tm()
            break
        if op == b"q":
            bts, tjs = recurs_to_target_op(
                ops, text_state_mgr, b"Q", fonts, strip_rotated
            )
            bt_groups.extend(bts)
            tj_ops.extend(tjs)
        elif op == b"cm":
            text_state_mgr.add_cm(*operands)
        elif op == b"BT":
            bts, tjs = recurs_to_target_op(
                ops, text_state_mgr, b"ET", fonts, strip_rotated
            )
            bt_groups.extend(bts)
            tj_ops.extend(tjs)
        elif op == b"Tj":
            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
        elif op == b"TJ":
            _tj = text_state_mgr.text_state_params()
            for tj_op in operands[0]:
                if isinstance(tj_op, bytes):
                    _tj = text_state_mgr.text_state_params(tj_op)
                    tj_ops.append(_tj)
                else:
                    text_state_mgr.add_trm(_tj.displacement_matrix(td_offset=tj_op))
        elif op == b"'":
            text_state_mgr.reset_trm()
            text_state_mgr.add_tm([0, -text_state_mgr.TL])
            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
        elif op == b'"':
            text_state_mgr.reset_trm()
            text_state_mgr.set_state_param(b"Tw", operands[0])
            text_state_mgr.set_state_param(b"Tc", operands[1])
            text_state_mgr.add_tm([0, -text_state_mgr.TL])
            tj_ops.append(text_state_mgr.text_state_params(operands[2]))
        elif op in (b"Td", b"Tm", b"TD", b"T*"):
            text_state_mgr.reset_trm()
            if op == b"Tm":
                text_state_mgr.reset_tm()
            elif op == b"TD":
                text_state_mgr.set_state_param(b"TL", -operands[1])
            elif op == b"T*":
                operands = [0, -text_state_mgr.TL]
            text_state_mgr.add_tm(operands)
        elif op == b"Tf":
            text_state_mgr.set_font(fonts[operands[0]], operands[1])
        else:  # handle Tc, Tw, Tz, TL, and Ts operators
            text_state_mgr.set_state_param(op, operands)
    else:
        logger_warning(
            f"Unbalanced target operations, expected {end_target!r}.",
            __name__,
        )
    return bt_groups, tj_ops
 def y_coordinate_groups(
    bt_groups: list[BTGroup], debug_path: Optional[Path] = None
 ) -> dict[int, list[BTGroup]]:
    """
    Group text operations by rendered y coordinate, i.e. the line number.
    Args:
        bt_groups: list of dicts as returned by text_show_operations()
        debug_path (Path, optional): Path to a directory for saving debug output.
    Returns:
        Dict[int, List[BTGroup]]: dict of lists of text rendered by each BT operator
            keyed by y coordinate
    """
    ty_groups = {
        ty: sorted(grp, key=lambda x: x["tx"])
        for ty, grp in groupby(
            bt_groups, key=lambda bt_grp: int(bt_grp["ty"] * bt_grp["flip_sort"])
        )
    }
    # combine groups whose y coordinates differ by less than the effective font height
    # (accounts for mixed fonts and other minor oddities)
    last_ty = next(iter(ty_groups))
    last_txs = {int(_t["tx"]) for _t in ty_groups[last_ty] if _t["text"].strip()}
    for ty in list(ty_groups)[1:]:
        fsz = min(ty_groups[_y][0]["font_height"] for _y in (ty, last_ty))
        txs = {int(_t["tx"]) for _t in ty_groups[ty] if _t["text"].strip()}
        # prevent merge if both groups are rendering in the same x position.
        no_text_overlap = not (txs & last_txs)
        offset_less_than_font_height = abs(ty - last_ty) < fsz
        if no_text_overlap and offset_less_than_font_height:
            ty_groups[last_ty] = sorted(
                ty_groups.pop(ty) + ty_groups[last_ty], key=lambda x: x["tx"]
            )
            last_txs |= txs
        else:
            last_ty = ty
            last_txs = txs
    if debug_path:  # pragma: no cover
        import json  # noqa: PLC0415
        debug_path.joinpath("bt_groups.json").write_text(
            json.dumps(ty_groups, indent=2, default=str), "utf-8"
        )
    return ty_groups
 def text_show_operations(
    ops: Iterator[tuple[list[Any], bytes]],
    fonts: dict[str, Font],
    strip_rotated: bool = True,
    debug_path: Optional[Path] = None,
 ) -> list[BTGroup]:
    """
    Extract text from BT/ET operator pairs.
    Args:
        ops (Iterator[Tuple[List, bytes]]): iterator of operators in content stream
        fonts (Dict[str, Font]): font dictionary
        strip_rotated: Removes text if rotated w.r.t. to the page. Defaults to True.
        debug_path (Path, optional): Path to a directory for saving debug output.
    Returns:
        List[BTGroup]: list of dicts of text rendered by each BT operator
    """
    state_mgr = TextStateManager()  # transformation stack manager
    bt_groups: list[BTGroup] = []  # BT operator dict
    tj_ops: list[TextStateParams] = []  # Tj/TJ operator data
    for operands, op in ops:
        if op in (b"BT", b"q"):
            bts, tjs = recurs_to_target_op(
                ops, state_mgr, b"ET" if op == b"BT" else b"Q", fonts, strip_rotated
            )
            bt_groups.extend(bts)
            tj_ops.extend(tjs)
        elif op == b"Tf":
            state_mgr.set_font(fonts[operands[0]], operands[1])
        else:  # set Tc, Tw, Tz, TL, and Ts if required. ignores all other ops
            state_mgr.set_state_param(op, operands)
    if any(tj.rotated for tj in tj_ops):
        if strip_rotated:
            logger_warning(
                "Rotated text discovered. Output will be incomplete.", __name__
            )
        else:
            logger_warning(
                "Rotated text discovered. Layout will be degraded.", __name__
            )
    if not all(tj.font.interpretable for tj in tj_ops):
        logger_warning(
            "PDF contains an uninterpretable font. Output will be incomplete.", __name__
        )
    # left align the data, i.e. decrement all tx values by min(tx)
    min_x = min((x["tx"] for x in bt_groups), default=0.0)
    bt_groups = [
        dict(ogrp, tx=ogrp["tx"] - min_x, displaced_tx=ogrp["displaced_tx"] - min_x)  # type: ignore[misc]
        for ogrp in sorted(
            bt_groups, key=lambda x: (x["ty"] * x["flip_sort"], -x["tx"]), reverse=True
        )
    ]
    if debug_path:  # pragma: no cover
        import json  # noqa: PLC0415
        debug_path.joinpath("bts.json").write_text(
            json.dumps(bt_groups, indent=2, default=str), "utf-8"
        )
        debug_path.joinpath("tjs.json").write_text(
            json.dumps(
                tj_ops, indent=2, default=lambda x: getattr(x, "to_dict", str)(x)
            ),
            "utf-8",
        )
    return bt_groups
 def fixed_char_width(bt_groups: list[BTGroup], scale_weight: float = 1.25) -> float:
    """
    Calculate average character width weighted by the length of the rendered
    text in each sample for conversion to fixed-width layout.
    Args:
        bt_groups (List[BTGroup]): List of dicts of text rendered by each
            BT operator
    Returns:
        float: fixed character width
    """
    char_widths = []
    for _bt in bt_groups:
        _len = len(_bt["text"]) * scale_weight
        char_widths.append(((_bt["displaced_tx"] - _bt["tx"]) / _len, _len))
    return sum(_w * _l for _w, _l in char_widths) / sum(_l for _, _l in char_widths)
 def fixed_width_page(
    ty_groups: dict[int, list[BTGroup]], char_width: float, space_vertically: bool, font_height_weight: float
 ) -> str:
    """
    Generate page text from text operations grouped by rendered y coordinate.
    Args:
        ty_groups: dict of text show ops as returned by y_coordinate_groups()
        char_width: fixed character width
        space_vertically: include blank lines inferred from y distance + font height.
        font_height_weight: multiplier for font height when calculating blank lines.
    Returns:
        str: page text in a fixed width format that closely adheres to the rendered
            layout in the source pdf.
    """
    lines: list[str] = []
    last_y_coord = 0
    table = str.maketrans(dict.fromkeys(range(14, 32), " "))
    for y_coord, line_data in ty_groups.items():
        if space_vertically and lines:
            fh = line_data[0]["font_height"]
            blank_lines = 0 if fh == 0 else (
                int(abs(y_coord - last_y_coord) / (fh * font_height_weight)) - 1
            )
            lines.extend([""] * blank_lines)
        line_parts = []  # It uses a list to construct the line, avoiding string concatenation.
        current_len = 0  # Track the size with int instead of len(str) overhead.
        last_disp = 0.0
        for bt_op in line_data:
            tx = bt_op["tx"]
            offset = int(tx // char_width)
            needed_spaces = offset - current_len
            if needed_spaces > 0 and ceil(last_disp) < int(tx):
                padding = " " * needed_spaces
                line_parts.append(padding)
                current_len += needed_spaces
            raw_text = bt_op["text"]
            text = raw_text.translate(table)
            line_parts.append(text)
            current_len += len(text)
            last_disp = bt_op["displaced_tx"]
        full_line = "".join(line_parts).rstrip()
        if full_line.strip() or (space_vertically and lines):
            lines.append(full_line)
        last_y_coord = y_coord
    return "\n".join(lines)
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_manager.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_manager.py
@@ -0,0 +1,221 @@
 """manage the PDF transform stack during "layout" mode text extraction"""
 from collections import ChainMap, Counter
 from collections import ChainMap as ChainMapType
 from collections import Counter as CounterType
 from collections.abc import MutableMapping
 from typing import Any, Union
 from ..._font import Font
 from ...errors import PdfReadError
 from .. import mult
 from ._text_state_params import TextStateParams
 TextStateManagerChainMapType = ChainMapType[Union[int, str], Union[float, bool]]
 TextStateManagerDictType = MutableMapping[Union[int, str], Union[float, bool]]
 class TextStateManager:
    """
    Tracks the current text state including cm/tm/trm transformation matrices.
    Attributes:
        transform_stack (ChainMap): ChainMap of cm/tm transformation matrices
        q_queue (Counter[int]): Counter of q operators
        q_depth (List[int]): list of q operator nesting levels
        Tc (float): character spacing
        Tw (float): word spacing
        Tz (int): horizontal scaling
        TL (float): leading
        Ts (float): text rise
        font (Font): font object
        font_size (int | float): font size
    """
    def __init__(self) -> None:
        self.transform_stack: TextStateManagerChainMapType = ChainMap(
            self.new_transform()
        )
        self.q_queue: CounterType[int] = Counter()
        self.q_depth = [0]
        self.Tc: float = 0.0
        self.Tw: float = 0.0
        self.Tz: float = 100.0
        self.TL: float = 0.0
        self.Ts: float = 0.0
        self.font_stack: list[tuple[Union[Font, None], Union[int, float]]] = []
        self.font: Union[Font, None] = None
        self.font_size: Union[int, float] = 0
    def set_state_param(self, op: bytes, value: Union[float, list[Any]]) -> None:
        """
        Set a text state parameter. Supports Tc, Tz, Tw, TL, and Ts operators.
        Args:
            op: operator read from PDF stream as bytes. No action is taken
                for unsupported operators (see supported operators above).
            value (float | List[Any]): new parameter value. If a list,
                value[0] is used.
        """
        if op not in [b"Tc", b"Tz", b"Tw", b"TL", b"Ts"]:
            return
        self.__setattr__(op.decode(), value[0] if isinstance(value, list) else value)
    def set_font(self, font: Font, size: float) -> None:
        """
        Set the current font and font_size.
        Args:
            font (Font): a layout mode Font
            size (float): font size
        """
        self.font = font
        self.font_size = size
    def text_state_params(self, value: Union[bytes, str] = "") -> TextStateParams:
        """
        Create a TextStateParams instance to display a text string. Type[bytes] values
        will be decoded implicitly.
        Args:
            value (str | bytes): text to associate with the captured state.
        Raises:
            PdfReadError: if font not set (no Tf operator in incoming pdf content stream)
        Returns:
            TextStateParams: current text state parameters
        """
        if not isinstance(self.font, Font):
            raise PdfReadError(
                "font not set: is PDF missing a Tf operator?"
            )  # pragma: no cover
        if isinstance(value, bytes):
            try:
                if isinstance(self.font.encoding, str):
                    txt = value.decode(self.font.encoding, "surrogatepass")
                else:
                    txt = "".join(
                        self.font.encoding[x]
                        if x in self.font.encoding
                        else bytes((x,)).decode()
                        for x in value
                    )
            except (UnicodeEncodeError, UnicodeDecodeError):
                txt = value.decode("utf-8", "replace")
            txt = "".join(
                self.font.character_map.get(x, x) for x in txt
            )
        else:
            txt = value
        return TextStateParams(
            txt,
            self.font,
            self.font_size,
            self.Tc,
            self.Tw,
            self.Tz,
            self.TL,
            self.Ts,
            self.effective_transform,
        )
    @staticmethod
    def raw_transform(
        _a: float = 1.0,
        _b: float = 0.0,
        _c: float = 0.0,
        _d: float = 1.0,
        _e: float = 0.0,
        _f: float = 0.0,
    ) -> dict[int, float]:
        """Only a/b/c/d/e/f matrix params"""
        return dict(zip(range(6), map(float, (_a, _b, _c, _d, _e, _f))))
    @staticmethod
    def new_transform(
        _a: float = 1.0,
        _b: float = 0.0,
        _c: float = 0.0,
        _d: float = 1.0,
        _e: float = 0.0,
        _f: float = 0.0,
        is_text: bool = False,
        is_render: bool = False,
    ) -> TextStateManagerDictType:
        """Standard a/b/c/d/e/f matrix params + 'is_text' and 'is_render' keys"""
        result: Any = TextStateManager.raw_transform(_a, _b, _c, _d, _e, _f)
        result.update({"is_text": is_text, "is_render": is_render})
        return result
    def reset_tm(self) -> TextStateManagerChainMapType:
        """Clear all transforms from chainmap having is_text==True or is_render==True"""
        while (
            self.transform_stack.maps[0]["is_text"]
            or self.transform_stack.maps[0]["is_render"]
        ):
            self.transform_stack = self.transform_stack.parents
        return self.transform_stack
    def reset_trm(self) -> TextStateManagerChainMapType:
        """Clear all transforms from chainmap having is_render==True"""
        while self.transform_stack.maps[0]["is_render"]:
            self.transform_stack = self.transform_stack.parents
        return self.transform_stack
    def remove_q(self) -> TextStateManagerChainMapType:
        """Rewind to stack prior state after closing a 'q' with internal 'cm' ops"""
        self.font, self.font_size = self.font_stack.pop(-1)
        self.transform_stack = self.reset_tm()
        self.transform_stack.maps = self.transform_stack.maps[
            self.q_queue.pop(self.q_depth.pop(), 0) :
        ]
        return self.transform_stack
    def add_q(self) -> None:
        """Add another level to q_queue"""
        self.font_stack.append((self.font, self.font_size))
        self.q_depth.append(len(self.q_depth))
    def add_cm(self, *args: Any) -> TextStateManagerChainMapType:
        """Concatenate an additional transform matrix"""
        self.transform_stack = self.reset_tm()
        self.q_queue.update(self.q_depth[-1:])
        self.transform_stack = self.transform_stack.new_child(self.new_transform(*args))
        return self.transform_stack
    def _complete_matrix(self, operands: list[float]) -> list[float]:
        """Adds a, b, c, and d to an "e/f only" operand set (e.g Td)"""
        if len(operands) == 2:  # this is a Td operator or equivalent
            operands = [1.0, 0.0, 0.0, 1.0, *operands]
        return operands
    def add_tm(self, operands: list[float]) -> TextStateManagerChainMapType:
        """Append a text transform matrix"""
        self.transform_stack = self.transform_stack.new_child(
            self.new_transform(  # type: ignore[misc]
                *self._complete_matrix(operands), is_text=True  # type: ignore[arg-type]
            )
        )
        return self.transform_stack
    def add_trm(self, operands: list[float]) -> TextStateManagerChainMapType:
        """Append a text rendering transform matrix"""
        self.transform_stack = self.transform_stack.new_child(
            self.new_transform(  # type: ignore[misc]
                *self._complete_matrix(operands), is_text=True, is_render=True  # type: ignore[arg-type]
            )
        )
        return self.transform_stack
    @property
    def effective_transform(self) -> list[float]:
        """Current effective transform accounting for cm, tm, and trm transforms"""
        eff_transform = [*self.transform_stack.maps[0].values()]
        for transform in self.transform_stack.maps[1:]:
            eff_transform = mult(eff_transform, transform)  # type: ignore[arg-type]  # dict has int keys 0-5
        return eff_transform
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_params.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_params.py
@@ -0,0 +1,135 @@
 """A dataclass that captures the CTM and Text State for a tj operation"""
 import math
 from dataclasses import dataclass, field
 from typing import Any, Union
 from ..._font import Font
 from .. import mult, orient
@dataclass
 class TextStateParams:
    """
    Text state parameters and operator values for a single text value in a
    TJ or Tj PDF operation.
    Attributes:
        txt (str): the text to be rendered.
        font (Font): font object
        font_size (int | float): font size
        Tc (float): character spacing. Defaults to 0.0.
        Tw (float): word spacing. Defaults to 0.0.
        Tz (float): horizontal scaling. Defaults to 100.0.
        TL (float): leading, vertical displacement between text lines. Defaults to 0.0.
        Ts (float): text rise. Used for super/subscripts. Defaults to 0.0.
        transform (List[float]): effective transformation matrix.
        tx (float): x cood of rendered text, i.e. self.transform[4]
        ty (float): y cood of rendered text. May differ from self.transform[5] per self.Ts.
        displaced_tx (float): x coord immediately following rendered text
        space_tx (float): tx for a space character
        font_height (float): effective font height accounting for CTM
        flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.)
        rotated (bool): True if the text orientation is rotated with respect to the page.
    """
    txt: str
    font: Font
    font_size: Union[int, float]
    Tc: float = 0.0
    Tw: float = 0.0
    Tz: float = 100.0
    TL: float = 0.0
    Ts: float = 0.0
    transform: list[float] = field(
        default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
    )
    tx: float = field(default=0.0, init=False)
    ty: float = field(default=0.0, init=False)
    displaced_tx: float = field(default=0.0, init=False)
    space_tx: float = field(default=0.0, init=False)
    font_height: float = field(default=0.0, init=False)
    flip_vertical: bool = field(default=False, init=False)
    rotated: bool = field(default=False, init=False)
    def __post_init__(self) -> None:
        if orient(self.transform) in (90, 270):
            self.transform = mult(
                [1.0, -self.transform[1], -self.transform[2], 1.0, 0.0, 0.0],
                self.transform,
            )
            self.rotated = True
        # self.transform[0] AND self.transform[3] < 0 indicates true rotation.
        # If only self.transform[3] < 0, the y coords are simply inverted.
        if orient(self.transform) == 180 and self.transform[0] < -1e-6:
            self.transform = mult([-1.0, 0.0, 0.0, -1.0, 0.0, 0.0], self.transform)
            self.rotated = True
        self.displaced_tx = self.displaced_transform()[4]
        self.tx = self.transform[4]
        self.ty = self.render_transform()[5]
        self.space_tx = round(self.word_tx(" "), 3)
        if self.space_tx < 1e-6:
            # if the " " char is assigned 0 width (e.g. for fine tuned spacing
            # with TJ int operators a la crazyones.pdf), calculate space_tx as
            # a td_offset of -1 * font.space_width where font.space_width is
            # the space_width calculated in _font.py.
            self.space_tx = round(self.word_tx("", -self.font.space_width), 3)
        self.font_height = self.font_size * math.sqrt(
            self.transform[1] ** 2 + self.transform[3] ** 2
        )
        # flip_vertical handles PDFs generated by Microsoft Word's "publish" command.
        self.flip_vertical = self.transform[3] < -1e-6  # inverts y axis
    def font_size_matrix(self) -> list[float]:
        """Font size matrix"""
        return [
            self.font_size * (self.Tz / 100.0),
            0.0,
            0.0,
            self.font_size,
            0.0,
            self.Ts,
        ]
    def displaced_transform(self) -> list[float]:
        """Effective transform matrix after text has been rendered."""
        return mult(self.displacement_matrix(), self.transform)
    def render_transform(self) -> list[float]:
        """Effective transform matrix accounting for font size, Tz, and Ts."""
        return mult(self.font_size_matrix(), self.transform)
    def displacement_matrix(
        self, word: Union[str, None] = None, td_offset: float = 0.0
    ) -> list[float]:
        """
        Text displacement matrix
        Args:
            word (str, optional): Defaults to None in which case self.txt displacement is
                returned.
            td_offset (float, optional): translation applied by TD operator. Defaults to 0.0.
        """
        word = word if word is not None else self.txt
        return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, td_offset), 0.0]
    def word_tx(self, word: str, td_offset: float = 0.0) -> float:
        """Horizontal text displacement for any word according this text state"""
        width: float = 0.0
        for char in word:
            if char == " ":
                width += self.font.space_width
            else:
                width += self.font.text_width(char)
        return (
            (self.font_size * ((width - td_offset) / 1000.0))
            + self.Tc
            + word.count(" ") * self.Tw
        ) * (self.Tz / 100.0)
    @staticmethod
    def to_dict(inst: "TextStateParams") -> dict[str, Any]:
        """Dataclass to dict for json.dumps serialization"""
        return {k: getattr(inst, k) for k in inst.__dataclass_fields__ if k != "font"}
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_text_extractor.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_text_extractor.py
@@ -0,0 +1,351 @@
 # Copyright (c) 2006, Mathieu Fenniak
 # Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
 #
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 import math
 from typing import Any, Callable, Optional, Union
 from .._font import Font, FontDescriptor
 from ..generic import DictionaryObject, TextStringObject
 from . import OrientationNotFoundError, crlf_space_check, get_display_str, get_text_operands, mult
 class TextExtraction:
    """
    A class to handle PDF text extraction operations.
    This class encapsulates all the state and operations needed for extracting
    text from PDF content streams, replacing the nested functions and nonlocal
    variables in the original implementation.
    """
    def __init__(self) -> None:
        self._font_width_maps: dict[str, tuple[dict[Any, float], str, float]] = {}
        # Text extraction state variables
        self.cm_matrix: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.tm_matrix: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.cm_stack: list[
            tuple[
                list[float],
                Optional[DictionaryObject],
                Font,
                float,
                float,
                float,
                float,
            ]
        ] = []
        # Store the last modified matrices; can be an intermediate position
        self.cm_prev: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.tm_prev: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        # Store the position at the beginning of building the text
        self.memo_cm: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.memo_tm: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.char_scale = 1.0
        self.space_scale = 1.0
        self._space_width: float = 500.0  # will be set correctly at first Tf
        self._actual_str_size: dict[str, float] = {
            "str_widths": 0.0,
            "str_height": 0.0,
        }  # will be set to string length calculation result
        self.TL = 0.0
        self.font_size = 12.0  # init just in case of
        # Text extraction variables
        self.text: str = ""
        self.output: str = ""
        self.rtl_dir: bool = False  # right-to-left
        self.font_resource: Optional[DictionaryObject] = None
        self.font = Font(
            name = "NotInitialized",
            sub_type="Unknown",
            encoding="charmap",
            font_descriptor=FontDescriptor(),
            )
        self.orientations: tuple[int, ...] = (0, 90, 180, 270)
        self.visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None
        self.font_resources: dict[str, DictionaryObject] = {}
        self.fonts: dict[str, Font] = {}
        self.operation_handlers = {
            b"BT": self._handle_bt,
            b"ET": self._handle_et,
            b"q": self._handle_save_graphics_state,
            b"Q": self._handle_restore_graphics_state,
            b"cm": self._handle_cm,
            b"Tz": self._handle_tz,
            b"Tw": self._handle_tw,
            b"TL": self._handle_tl,
            b"Tf": self._handle_tf,
            b"Td": self._handle_td,
            b"Tm": self._handle_tm,
            b"T*": self._handle_t_star,
            b"Tj": self._handle_tj_operation,
        }
    def initialize_extraction(
        self,
        orientations: tuple[int, ...] = (0, 90, 180, 270),
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None,
        font_resources: Optional[dict[str, DictionaryObject]] = None,
        fonts: Optional[dict[str, Font]] = None
    ) -> None:
        """Initialize the extractor with extraction parameters."""
        self.orientations = orientations
        self.visitor_text = visitor_text
        self.font_resources = font_resources or {}
        self.fonts = fonts or {}
        # Reset state
        self.text = ""
        self.output = ""
        self.rtl_dir = False
    def compute_str_widths(self, str_widths: float) -> float:
        return str_widths / 1000
    def process_operation(self, operator: bytes, operands: list[Any]) -> None:
        if operator in self.operation_handlers:
            handler = self.operation_handlers[operator]
            str_widths = handler(operands)
            # Post-process operations that affect text positioning
            if operator in {b"Td", b"Tm", b"T*", b"Tj"}:
                self._post_process_text_operation(str_widths or 0.0)
    def _post_process_text_operation(self, str_widths: float) -> None:
        """Handle common post-processing for text positioning operations."""
        try:
            self.text, self.output, self.cm_prev, self.tm_prev = crlf_space_check(
                self.text,
                (self.cm_prev, self.tm_prev),
                (self.cm_matrix, self.tm_matrix),
                (self.memo_cm, self.memo_tm),
                self.font_resource,
                self.orientations,
                self.output,
                self.font_size,
                self.visitor_text,
                str_widths,
                self.compute_str_widths(self.font_size * self._space_width),
                self._actual_str_size["str_height"],
            )
            if self.text == "":
                self.memo_cm = self.cm_matrix.copy()
                self.memo_tm = self.tm_matrix.copy()
        except OrientationNotFoundError:
            pass
    def _handle_tj(
        self,
        text: str,
        operands: list[Union[str, TextStringObject]],
        cm_matrix: list[float],
        tm_matrix: list[float],
        font_resource: Optional[DictionaryObject],
        font: Font,
        orientations: tuple[int, ...],
        font_size: float,
        rtl_dir: bool,
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
        actual_str_size: dict[str, float],
    ) -> tuple[str, bool, dict[str, float]]:
        text_operands, is_str_operands = get_text_operands(
            operands, cm_matrix, tm_matrix, font, orientations
        )
        if is_str_operands:
            text += text_operands
            font_widths = sum([font.space_width if x == " " else font.text_width(x) for x in text_operands])
        else:
            text, rtl_dir, font_widths = get_display_str(
                text,
                cm_matrix,
                tm_matrix,  # text matrix
                font_resource,
                font,
                text_operands,
                font_size,
                rtl_dir,
                visitor_text,
            )
        actual_str_size["str_widths"] += font_widths * font_size
        actual_str_size["str_height"] = font_size
        return text, rtl_dir, actual_str_size
    def _flush_text(self) -> None:
        """Flush accumulated text to output and call visitor if present."""
        self.output += self.text
        if self.visitor_text is not None:
            self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
        self.text = ""
        self.memo_cm = self.cm_matrix.copy()
        self.memo_tm = self.tm_matrix.copy()
    # Operation handlers
    def _handle_bt(self, operands: list[Any]) -> None:
        """Handle BT (Begin Text) operation - Table 5.4 page 405."""
        self.tm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self._flush_text()
    def _handle_et(self, operands: list[Any]) -> None:
        """Handle ET (End Text) operation - Table 5.4 page 405."""
        self._flush_text()
    def _handle_save_graphics_state(self, operands: list[Any]) -> None:
        """Handle q (Save graphics state) operation - Table 4.7 page 219."""
        self.cm_stack.append(
            (
                self.cm_matrix,
                self.font_resource,
                self.font,
                self.font_size,
                self.char_scale,
                self.space_scale,
                self.TL,
            )
        )
    def _handle_restore_graphics_state(self, operands: list[Any]) -> None:
        """Handle Q (Restore graphics state) operation - Table 4.7 page 219."""
        try:
            (
                self.cm_matrix,
                self.font_resource,
                self.font,
                self.font_size,
                self.char_scale,
                self.space_scale,
                self.TL,
            ) = self.cm_stack.pop()
        except Exception:
            self.cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
    def _handle_cm(self, operands: list[Any]) -> None:
        """Handle cm (Modify current matrix) operation - Table 4.7 page 219."""
        self.output += self.text
        if self.visitor_text is not None:
            self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
        self.text = ""
        try:
            self.cm_matrix = mult([float(operand) for operand in operands[:6]], self.cm_matrix)
        except Exception:
            self.cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.memo_cm = self.cm_matrix.copy()
        self.memo_tm = self.tm_matrix.copy()
    def _handle_tz(self, operands: list[Any]) -> None:
        """Handle Tz (Set horizontal text scaling) operation - Table 5.2 page 398."""
        self.char_scale = float(operands[0]) / 100 if operands else 1.0
    def _handle_tw(self, operands: list[Any]) -> None:
        """Handle Tw (Set word spacing) operation - Table 5.2 page 398."""
        self.space_scale = 1.0 + float(operands[0] if operands else 0.0)
    def _handle_tl(self, operands: list[Any]) -> None:
        """Handle TL (Set Text Leading) operation - Table 5.2 page 398."""
        scale_x = math.sqrt(self.tm_matrix[0] ** 2 + self.tm_matrix[2] ** 2)
        self.TL = float(operands[0] if operands else 0.0) * self.font_size * scale_x
    def _handle_tf(self, operands: list[Any]) -> None:
        """Handle Tf (Set font size) operation - Table 5.2 page 398."""
        if self.text != "":
            self.output += self.text  # .translate(cmap)
            if self.visitor_text is not None:
                self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
        self.text = ""
        self.memo_cm = self.cm_matrix.copy()
        self.memo_tm = self.tm_matrix.copy()
        try:
            self.font_resource = self.font_resources[operands[0]]
            self.font = self.fonts[operands[0]]
        except KeyError:  # font not found
            self.font_resource = None
            font_descriptor = FontDescriptor()
            self.font = Font(
                "Unknown",
                space_width=250,
                encoding=dict.fromkeys(range(256), "<EFBFBD>"),
                font_descriptor=font_descriptor,
                character_map={},
                character_widths=font_descriptor.character_widths
            )
        self._space_width = self.font.space_width / 2  # Actually the width of _half_ a space...
        try:
            self.font_size = float(operands[1])
        except Exception:
            pass  # keep previous size
    def _handle_td(self, operands: list[Any]) -> float:
        """Handle Td (Move text position) operation - Table 5.5 page 406."""
        # A special case is a translating only tm:
        # tm = [1, 0, 0, 1, e, f]
        # i.e. tm[4] += tx, tm[5] += ty.
        tx, ty = float(operands[0]), float(operands[1])
        self.tm_matrix[4] += tx * self.tm_matrix[0] + ty * self.tm_matrix[2]
        self.tm_matrix[5] += tx * self.tm_matrix[1] + ty * self.tm_matrix[3]
        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
        self._actual_str_size["str_widths"] = 0.0
        return str_widths
    def _handle_tm(self, operands: list[Any]) -> float:
        """Handle Tm (Set text matrix) operation - Table 5.5 page 406."""
        self.tm_matrix = [float(operand) for operand in operands[:6]]
        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
        self._actual_str_size["str_widths"] = 0.0
        return str_widths
    def _handle_t_star(self, operands: list[Any]) -> float:
        """Handle T* (Move to next line) operation - Table 5.5 page 406."""
        self.tm_matrix[4] -= self.TL * self.tm_matrix[2]
        self.tm_matrix[5] -= self.TL * self.tm_matrix[3]
        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
        self._actual_str_size["str_widths"] = 0.0
        return str_widths
    def _handle_tj_operation(self, operands: list[Any]) -> float:
        """Handle Tj (Show text) operation - Table 5.5 page 406."""
        self.text, self.rtl_dir, self._actual_str_size = self._handle_tj(
            self.text,
            operands,
            self.cm_matrix,
            self.tm_matrix,
            self.font_resource,
            self.font,
            self.orientations,
            self.font_size,
            self.rtl_dir,
            self.visitor_text,
            self._actual_str_size,
        )
        return 0.0  # str_widths will be handled in post-processing
--- a/venv/lib/python3.12/site-packages/pypdf/_utils.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_utils.py
@@ -0,0 +1,631 @@
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """Utility functions for PDF library."""
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 import functools
 import logging
 import re
 import sys
 import warnings
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from io import DEFAULT_BUFFER_SIZE
 from os import SEEK_CUR
 from re import Pattern
 from typing import (
    IO,
    Any,
    Optional,
    Union,
    overload,
 )
 if sys.version_info[:2] >= (3, 10):
    # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
    from typing import TypeAlias
 else:
    from typing_extensions import TypeAlias
 if sys.version_info >= (3, 11):
    from typing import Self
 else:
    from typing_extensions import Self
 from .errors import (
    STREAM_TRUNCATED_PREMATURELY,
    DeprecationError,
    PdfStreamError,
 )
 TransformationMatrixType: TypeAlias = tuple[
    tuple[float, float, float], tuple[float, float, float], tuple[float, float, float]
 ]
 CompressedTransformationMatrix: TypeAlias = tuple[
    float, float, float, float, float, float
 ]
 StreamType = IO[Any]
 StrByteType = Union[str, StreamType]
 def parse_iso8824_date(text: Optional[str]) -> Optional[datetime]:
    orgtext = text
    if not text:
        return None
    if text[0].isdigit():
        text = "D:" + text
    if text.endswith(("Z", "z")):
        text += "0000"
    text = text.replace("z", "+").replace("Z", "+").replace("'", "")
    i = max(text.find("+"), text.find("-"))
    if i > 0 and i != len(text) - 5:
        text += "00"
    for f in (
        "D:%Y",
        "D:%Y%m",
        "D:%Y%m%d",
        "D:%Y%m%d%H",
        "D:%Y%m%d%H%M",
        "D:%Y%m%d%H%M%S",
        "D:%Y%m%d%H%M%S%z",
    ):
        try:
            d = datetime.strptime(text, f)  # noqa: DTZ007
        except ValueError:
            continue
        else:
            if text.endswith("+0000"):
                d = d.replace(tzinfo=timezone.utc)
            return d
    raise ValueError(f"Can not convert date: {orgtext}")
 def format_iso8824_date(dt: datetime) -> str:
    """
    Convert a datetime object to PDF date string format.
    Converts datetime to the PDF date format D:YYYYMMDDHHmmSSOHH'mm
    as specified in the PDF Reference.
    Args:
        dt: A datetime object to convert.
    Returns:
        A date string in PDF format.
    """
    date_str = dt.strftime("D:%Y%m%d%H%M%S")
    if dt.tzinfo is not None:
        offset = dt.utcoffset()
        assert offset is not None
        total_seconds = int(offset.total_seconds())
        hours, remainder = divmod(abs(total_seconds), 3600)
        minutes = remainder // 60
        sign = "+" if total_seconds >= 0 else "-"
        date_str += f"{sign}{hours:02d}'{minutes:02d}'"
    return date_str
 def _get_max_pdf_version_header(header1: str, header2: str) -> str:
    versions = (
        "%PDF-1.3",
        "%PDF-1.4",
        "%PDF-1.5",
        "%PDF-1.6",
        "%PDF-1.7",
        "%PDF-2.0",
    )
    pdf_header_indices = []
    if header1 in versions:
        pdf_header_indices.append(versions.index(header1))
    if header2 in versions:
        pdf_header_indices.append(versions.index(header2))
    if len(pdf_header_indices) == 0:
        raise ValueError(f"Neither {header1!r} nor {header2!r} are proper headers")
    return versions[max(pdf_header_indices)]
 WHITESPACES = (b"\x00", b"\t", b"\n", b"\f", b"\r", b" ")
 WHITESPACES_AS_BYTES = b"".join(WHITESPACES)
 WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]"
 def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
    """
    Read non-whitespace characters and return them.
    Stops upon encountering whitespace or when maxchars is reached.
    Args:
        stream: The data stream from which was read.
        maxchars: The maximum number of bytes returned; by default unlimited.
    Returns:
        The data which was read.
    """
    txt = b""
    while True:
        tok = stream.read(1)
        if tok.isspace() or not tok:
            break
        txt += tok
        if len(txt) == maxchars:
            break
    return txt
 def read_non_whitespace(stream: StreamType) -> bytes:
    """
    Find and read the next non-whitespace character (ignores whitespace).
    Args:
        stream: The data stream from which was read.
    Returns:
        The data which was read.
    """
    tok = stream.read(1)
    while tok in WHITESPACES:
        tok = stream.read(1)
    return tok
 def skip_over_whitespace(stream: StreamType) -> bool:
    """
    Similar to read_non_whitespace, but return a boolean if at least one
    whitespace character was read.
    Args:
        stream: The data stream from which was read.
    Returns:
        True if one or more whitespace was skipped, otherwise return False.
    """
    tok = stream.read(1)
    cnt = 0
    while tok in WHITESPACES:
        cnt += 1
        tok = stream.read(1)
    return cnt > 0
 def check_if_whitespace_only(value: bytes) -> bool:
    """
    Check if the given value consists of whitespace characters only.
    Args:
        value: The bytes to check.
    Returns:
        True if the value only has whitespace characters, otherwise return False.
    """
    return all(b in WHITESPACES_AS_BYTES for b in value)
 def skip_over_comment(stream: StreamType) -> None:
    tok = stream.read(1)
    stream.seek(-1, 1)
    if tok == b"%":
        while tok not in (b"\n", b"\r"):
            tok = stream.read(1)
            if tok == b"":
                raise PdfStreamError("File ended unexpectedly.")
 def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
    """
    Read until the regular expression pattern matched (ignore the match).
    Treats EOF on the underlying stream as the end of the token to be matched.
    Args:
        regex: re.Pattern
    Returns:
        The read bytes.
    """
    name = b""
    while True:
        tok = stream.read(16)
        if not tok:
            return name
        m = regex.search(name + tok)
        if m is not None:
            stream.seek(m.start() - (len(name) + len(tok)), 1)
            name = (name + tok)[: m.start()]
            break
        name += tok
    return name
 def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
    """
    Given a stream at position X, read a block of size to_read ending at position X.
    This changes the stream's position to the beginning of where the block was
    read.
    Args:
        stream:
        to_read:
    Returns:
        The data which was read.
    """
    if stream.tell() < to_read:
        raise PdfStreamError("Could not read malformed PDF file")
    # Seek to the start of the block we want to read.
    stream.seek(-to_read, SEEK_CUR)
    read = stream.read(to_read)
    # Seek to the start of the block we read after reading it.
    stream.seek(-to_read, SEEK_CUR)
    return read
 def read_previous_line(stream: StreamType) -> bytes:
    """
    Given a byte stream with current position X, return the previous line.
    All characters between the first CR/LF byte found before X
    (or, the start of the file, if no such byte is found) and position X
    After this call, the stream will be positioned one byte after the
    first non-CRLF character found beyond the first CR/LF byte before X,
    or, if no such byte is found, at the beginning of the stream.
    Args:
        stream: StreamType:
    Returns:
        The data which was read.
    """
    line_content = []
    found_crlf = False
    if stream.tell() == 0:
        raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
    while True:
        to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
        if to_read == 0:
            break
        # Read the block. After this, our stream will be one
        # beyond the initial position.
        block = read_block_backwards(stream, to_read)
        idx = len(block) - 1
        if not found_crlf:
            # We haven't found our first CR/LF yet.
            # Read off characters until we hit one.
            while idx >= 0 and block[idx] not in b"\r\n":
                idx -= 1
            if idx >= 0:
                found_crlf = True
        if found_crlf:
            # We found our first CR/LF already (on this block or
            # a previous one).
            # Our combined line is the remainder of the block
            # plus any previously read blocks.
            line_content.append(block[idx + 1 :])
            # Continue to read off any more CRLF characters.
            while idx >= 0 and block[idx] in b"\r\n":
                idx -= 1
        else:
            # Didn't find CR/LF yet - add this block to our
            # previously read blocks and continue.
            line_content.append(block)
        if idx >= 0:
            # We found the next non-CRLF character.
            # Set the stream position correctly, then break
            stream.seek(idx + 1, SEEK_CUR)
            break
    # Join all the blocks in the line (which are in reverse order)
    return b"".join(line_content[::-1])
 def matrix_multiply(
    a: TransformationMatrixType, b: TransformationMatrixType
 ) -> TransformationMatrixType:
    return tuple(  # type: ignore[return-value]
        tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
        for row in a
    )
 def mark_location(stream: StreamType) -> None:
    """Create text file showing current location in context."""
    # Mainly for debugging
    radius = 5000
    stream.seek(-radius, 1)
    with open("pypdf_pdfLocation.txt", "wb") as output_fh:
        output_fh.write(stream.read(radius))
        output_fh.write(b"HERE")
        output_fh.write(stream.read(radius))
    stream.seek(-radius, 1)
@overload
 def ord_(b: str) -> int:
    ...
@overload
 def ord_(b: bytes) -> bytes:
    ...
@overload
 def ord_(b: int) -> int:
    ...
 def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
    if isinstance(b, str):
        return ord(b)
    return b
 def deprecate(msg: str, stacklevel: int = 3) -> None:
    warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
 def deprecation(msg: str) -> None:
    raise DeprecationError(msg)
 def deprecate_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
    """Issue a warning that a feature will be removed, but has a replacement."""
    deprecate(
        f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.",
        4,
    )
 def deprecation_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
    """Raise an exception that a feature was already removed, but has a replacement."""
    deprecation(
        f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead."
    )
 def deprecate_no_replacement(name: str, removed_in: str) -> None:
    """Issue a warning that a feature will be removed without replacement."""
    deprecate(f"{name} is deprecated and will be removed in pypdf {removed_in}.", 4)
 def deprecation_no_replacement(name: str, removed_in: str) -> None:
    """Raise an exception that a feature was already removed without replacement."""
    deprecation(f"{name} is deprecated and was removed in pypdf {removed_in}.")
 def logger_error(msg: str, src: str) -> None:
    """
    Use this instead of logger.error directly.
    That allows people to overwrite it more easily.
    See the docs on when to use which:
    https://pypdf.readthedocs.io/en/latest/user/suppress-warnings.html
    """
    logging.getLogger(src).error(msg)
 def logger_warning(msg: str, src: str) -> None:
    """
    Use this instead of logger.warning directly.
    That allows people to overwrite it more easily.
    ## Exception, warnings.warn, logger_warning
    - Exceptions should be used if the user should write code that deals with
      an error case, e.g. the PDF being completely broken.
    - warnings.warn should be used if the user needs to fix their code, e.g.
      DeprecationWarnings
    - logger_warning should be used if the user needs to know that an issue was
      handled by pypdf, e.g. a non-compliant PDF being read in a way that
      pypdf could apply a robustness fix to still read it. This applies mainly
      to strict=False mode.
    """
    logging.getLogger(src).warning(msg)
 def rename_kwargs(
    func_name: str, kwargs: dict[str, Any], aliases: dict[str, str], fail: bool = False
 ) -> None:
    """
    Helper function to deprecate arguments.
    Args:
        func_name: Name of the function to be deprecated
        kwargs:
        aliases:
        fail:
    """
    for old_term, new_term in aliases.items():
        if old_term in kwargs:
            if fail:
                raise DeprecationError(
                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
                )
            if new_term in kwargs:
                raise TypeError(
                    f"{func_name} received both {old_term} and {new_term} as "
                    f"an argument. {old_term} is deprecated. "
                    f"Use {new_term} instead."
                )
            kwargs[new_term] = kwargs.pop(old_term)
            warnings.warn(
                message=(
                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
                ),
                category=DeprecationWarning,
                stacklevel=3,
            )
 def _human_readable_bytes(bytes: int) -> str:
    if bytes < 10**3:
        return f"{bytes} Byte"
    if bytes < 10**6:
        return f"{bytes / 10**3:.1f} kB"
    if bytes < 10**9:
        return f"{bytes / 10**6:.1f} MB"
    return f"{bytes / 10**9:.1f} GB"
 # The following class has been copied from Django:
 # https://github.com/django/django/blob/adae619426b6f50046b3daaa744db52989c9d6db/django/utils/functional.py#L51-L65
 # It received some modifications to comply with our own coding standards.
 #
 # Original license:
 #
 # ---------------------------------------------------------------------------------
 # Copyright (c) Django Software Foundation and individual contributors.
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without modification,
 # are permitted provided that the following conditions are met:
 #
 #     1. Redistributions of source code must retain the above copyright notice,
 #        this list of conditions and the following disclaimer.
 #
 #     2. Redistributions in binary form must reproduce the above copyright
 #        notice, this list of conditions and the following disclaimer in the
 #        documentation and/or other materials provided with the distribution.
 #
 #     3. Neither the name of Django nor the names of its contributors may be used
 #        to endorse or promote products derived from this software without
 #        specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 # ---------------------------------------------------------------------------------
 class classproperty:  # noqa: N801
    """
    Decorator that converts a method with a single cls argument into a property
    that can be accessed directly from the class.
    """
    def __init__(self, method=None) -> None:  # type: ignore  # noqa: ANN001
        self.fget = method
    def __get__(self, instance, cls=None) -> Any:  # type: ignore  # noqa: ANN001
        return self.fget(cls)
    def getter(self, method) -> Self:  # type: ignore  # noqa: ANN001
        self.fget = method
        return self
@dataclass
 class File:
    from .generic import IndirectObject  # noqa: PLC0415
    name: str = ""
    """
    Filename as identified within the PDF file.
    """
    data: bytes = b""
    """
    Data as bytes.
    """
    indirect_reference: Optional[IndirectObject] = None
    """
    Reference to the object storing the stream.
    """
    def __str__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, data: {_human_readable_bytes(len(self.data))})"
    def __repr__(self) -> str:
        return self.__str__()[:-1] + f", hash: {hash(self.data)})"
@functools.total_ordering
 class Version:
    COMPONENT_PATTERN = re.compile(r"^(\d+)(.*)$")
    def __init__(self, version_str: str) -> None:
        self.version_str = version_str
        self.components = self._parse_version(version_str)
    def _parse_version(self, version_str: str) -> list[tuple[int, str]]:
        components = version_str.split(".")
        parsed_components = []
        for component in components:
            match = Version.COMPONENT_PATTERN.match(component)
            if not match:
                parsed_components.append((0, component))
                continue
            integer_prefix = match.group(1)
            suffix = match.group(2)
            if integer_prefix is None:
                integer_prefix = 0
            parsed_components.append((int(integer_prefix), suffix))
        return parsed_components
    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Version):
            return False
        return self.components == other.components
    def __hash__(self) -> int:
        # Convert to tuple as lists cannot be hashed.
        return hash((self.__class__, tuple(self.components)))
    def __lt__(self, other: Any) -> bool:
        if not isinstance(other, Version):
            raise ValueError(f"Version cannot be compared against {type(other)}")
        for self_component, other_component in zip(self.components, other.components):
            self_value, self_suffix = self_component
            other_value, other_suffix = other_component
            if self_value < other_value:
                return True
            if self_value > other_value:
                return False
            if self_suffix < other_suffix:
                return True
            if self_suffix > other_suffix:
                return False
        return len(self.components) < len(other.components)
--- a/venv/lib/python3.12/site-packages/pypdf/_version.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_version.py
@@ -0,0 +1 @@
 __version__ = "6.6.2"
--- a/venv/lib/python3.12/site-packages/pypdf/_writer.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_writer.py
--- a/venv/lib/python3.12/site-packages/pypdf/_xobj_image_helpers.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_xobj_image_helpers.py
@@ -0,0 +1,577 @@
 """Functions to convert an image XObject to an image"""
 import sys
 from io import BytesIO
 from typing import Any, Literal, Optional, Union, cast
 from ._utils import check_if_whitespace_only, logger_warning
 from .constants import ColorSpaces, StreamAttributes
 from .constants import FilterTypes as FT
 from .constants import ImageAttributes as IA
 from .errors import EmptyImageDataError, PdfReadError
 from .generic import (
    ArrayObject,
    DecodedStreamObject,
    EncodedStreamObject,
    NullObject,
    TextStringObject,
    is_null_or_none,
 )
 if sys.version_info[:2] >= (3, 10):
    from typing import TypeAlias
 else:
    from typing_extensions import TypeAlias
 try:
    from PIL import Image, UnidentifiedImageError
 except ImportError:
    raise ImportError(
        "pillow is required to do image extraction. "
        "It can be installed via 'pip install pypdf[image]'"
    )
 mode_str_type: TypeAlias = Literal[
    "", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"
 ]
 MAX_IMAGE_MODE_NESTING_DEPTH: int = 10
 def _get_image_mode(
    color_space: Union[str, list[Any], Any],
    color_components: int,
    prev_mode: mode_str_type,
    depth: int = 0,
 ) -> tuple[mode_str_type, bool]:
    """
    Returns:
        Image mode, not taking into account mask (transparency).
        ColorInversion is required (like for some DeviceCMYK).
    """
    if depth > MAX_IMAGE_MODE_NESTING_DEPTH:
        raise PdfReadError(
            "Color spaces nested too deeply. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH."
        )
    if is_null_or_none(color_space):
        return "", False
    color_space_str: str = ""
    if isinstance(color_space, str):
        color_space_str = color_space
    elif not isinstance(color_space, list):
        raise PdfReadError(
            "Cannot interpret color space", color_space
        )  # pragma: no cover
    elif not color_space:
        return "", False
    elif color_space[0].startswith("/Cal"):  # /CalRGB or /CalGray
        color_space_str = "/Device" + color_space[0][4:]
    elif color_space[0] == "/ICCBased":
        icc_profile = color_space[1].get_object()
        color_components = cast(int, icc_profile["/N"])
        color_space_str = icc_profile.get("/Alternate", "")
    elif color_space[0] == "/Indexed":
        color_space_str = color_space[1].get_object()
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        if mode in ("RGB", "CMYK"):
            mode = "P"
        return mode, invert_color
    elif color_space[0] == "/Separation":
        color_space_str = color_space[2].get_object()
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        return mode, True
    elif color_space[0] == "/DeviceN":
        original_color_space = color_space
        color_components = len(color_space[1])
        color_space_str = color_space[2].get_object()
        if color_space_str == "/DeviceCMYK" and color_components == 1:
            if original_color_space[1][0] != "/Black":
                logger_warning(
                    f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",
                    __name__,
                )
            return "L", True
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        return mode, invert_color
    mode_map: dict[str, mode_str_type] = {
        "1bit": "1",  # must be zeroth position: color_components may index the values
        "/DeviceGray": "L",  # must be first position: color_components may index the values
        "palette": "P",  # must be second position: color_components may index the values
        "/DeviceRGB": "RGB",  # must be third position: color_components may index the values
        "/DeviceCMYK": "CMYK",  # must be fourth position: color_components may index the values
        "2bit": "2bits",
        "4bit": "4bits",
    }
    mode = (
        mode_map.get(color_space_str)
        or list(mode_map.values())[color_components]
        or prev_mode
    )
    return mode, mode == "CMYK"
 def bits2byte(data: bytes, size: tuple[int, int], bits: int) -> bytes:
    mask = (1 << bits) - 1
    byte_buffer = bytearray(size[0] * size[1])
    data_index = 0
    bit = 8 - bits
    for y in range(size[1]):
        if bit != 8 - bits:
            data_index += 1
            bit = 8 - bits
        for x in range(size[0]):
            byte_buffer[x + y * size[0]] = (data[data_index] >> bit) & mask
            bit -= bits
            if bit < 0:
                data_index += 1
                bit = 8 - bits
    return bytes(byte_buffer)
 def _extended_image_from_bytes(
    mode: str, size: tuple[int, int], data: bytes
 ) -> Image.Image:
    try:
        img = Image.frombytes(mode, size, data)
    except ValueError as exc:
        nb_pix = size[0] * size[1]
        data_length = len(data)
        if data_length == 0:
            raise EmptyImageDataError(
                "Data is 0 bytes, cannot process an image from empty data."
            ) from exc
        if data_length % nb_pix != 0:
            raise exc
        k = nb_pix * len(mode) / data_length
        data = b"".join(bytes((x,) * int(k)) for x in data)
        img = Image.frombytes(mode, size, data)
    return img
 def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, Any]:
    count = len(color_space)
    if count == 4:
        color_space, base, hival, lookup = (value.get_object() for value in color_space)
        return color_space, base, hival, lookup
    # Deal with strange AutoDesk files where `base` and `hival` look like this:
    #   /DeviceRGB\x00255
    element1 = color_space[1]
    element1 = element1 if isinstance(element1, str) else element1.get_object()
    if count == 3 and "\x00" in element1:
        color_space, lookup = color_space[0].get_object(), color_space[2].get_object()
        base, hival = element1.split("\x00")
        hival = int(hival)
        return color_space, base, hival, lookup
    raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}")
 def _handle_flate(
    size: tuple[int, int],
    data: bytes,
    mode: mode_str_type,
    color_space: str,
    colors: int,
    obj_as_text: str,
 ) -> tuple[Image.Image, str, str, bool]:
    """
    Process image encoded in flateEncode
    Returns img, image_format, extension, color inversion
    """
    extension = ".png"  # mime_type: "image/png"
    image_format = "PNG"
    lookup: Any
    base: Any
    hival: Any
    if isinstance(color_space, ArrayObject) and color_space[0] == "/Indexed":
        color_space, base, hival, lookup = __handle_flate__indexed(color_space)
    if mode == "2bits":
        mode = "P"
        data = bits2byte(data, size, 2)
    elif mode == "4bits":
        mode = "P"
        data = bits2byte(data, size, 4)
    img = _extended_image_from_bytes(mode, size, data)
    if color_space == "/Indexed":
        if isinstance(lookup, (EncodedStreamObject, DecodedStreamObject)):
            lookup = lookup.get_data()
        if isinstance(lookup, TextStringObject):
            lookup = lookup.original_bytes
        if isinstance(lookup, str):
            lookup = lookup.encode()
        try:
            nb, conv, mode = {  # type: ignore
                "1": (0, "", ""),
                "L": (1, "P", "L"),
                "P": (0, "", ""),
                "RGB": (3, "P", "RGB"),
                "CMYK": (4, "P", "CMYK"),
            }[_get_image_mode(base, 0, "")[0]]
        except KeyError:  # pragma: no cover
            logger_warning(
                f"Base {base} not coded please share the pdf file with pypdf dev team",
                __name__,
            )
            lookup = None
        else:
            if img.mode == "1":
                # Two values ("high" and "low").
                expected_count = 2 * nb
                actual_count = len(lookup)
                if actual_count != expected_count:
                    if actual_count < expected_count:
                        logger_warning(
                            f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
                            __name__
                        )
                        lookup += bytes([0] * (expected_count - actual_count))
                    elif not check_if_whitespace_only(lookup[expected_count:]):
                        logger_warning(
                            f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
                            __name__
                        )
                    lookup = lookup[:expected_count]
                colors_arr = [lookup[:nb], lookup[nb:]]
                arr = b"".join(
                    b"".join(
                        colors_arr[1 if img.getpixel((x, y)) > 127 else 0]  # type: ignore[operator,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
                        for x in range(img.size[0])
                    )
                    for y in range(img.size[1])
                )
                img = Image.frombytes(mode, img.size, arr)
            else:
                img = img.convert(conv)
                if len(lookup) != (hival + 1) * nb:
                    logger_warning(f"Invalid Lookup Table in {obj_as_text}", __name__)
                    lookup = None
                elif mode == "L":
                    # gray lookup does not work: it is converted to a similar RGB lookup
                    lookup = b"".join([bytes([b, b, b]) for b in lookup])
                    mode = "RGB"
                # TODO: https://github.com/py-pdf/pypdf/pull/2039
                # this is a work around until PIL is able to process CMYK images
                elif mode == "CMYK":
                    _rgb = []
                    for _c, _m, _y, _k in (
                        lookup[n : n + 4] for n in range(0, 4 * (len(lookup) // 4), 4)
                    ):
                        _r = int(255 * (1 - _c / 255) * (1 - _k / 255))
                        _g = int(255 * (1 - _m / 255) * (1 - _k / 255))
                        _b = int(255 * (1 - _y / 255) * (1 - _k / 255))
                        _rgb.append(bytes((_r, _g, _b)))
                    lookup = b"".join(_rgb)
                    mode = "RGB"
                if lookup is not None:
                    img.putpalette(lookup, rawmode=mode)
            img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB")
    elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
        # Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary
        mode2 = _get_image_mode(color_space, colors, mode)[0]
        if mode != mode2:
            img = Image.frombytes(mode2, size, data)  # reloaded as mode may have changed
    if mode == "CMYK":
        extension = ".tif"
        image_format = "TIFF"
    return img, image_format, extension, False
 def _handle_jpx(
    size: tuple[int, int],
    data: bytes,
    mode: mode_str_type,
    color_space: str,
    colors: int,
 ) -> tuple[Image.Image, str, str, bool]:
    """
    Process image encoded in flateEncode
    Returns img, image_format, extension, inversion
    """
    extension = ".jp2"  # mime_type: "image/x-jp2"
    img1: Image.Image = Image.open(BytesIO(data), formats=("JPEG2000",))
    mode, invert_color = _get_image_mode(color_space, colors, mode)
    if mode == "":
        mode = cast(mode_str_type, img1.mode)
        invert_color = mode in ("CMYK",)
    if img1.mode == "RGBA" and mode == "RGB":
        mode = "RGBA"
    # we need to convert to the good mode
    if img1.mode == mode or {img1.mode, mode} == {"L", "P"}:  # compare (unordered) sets
        # L and P are indexed modes which should not be changed.
        img = img1
    elif {img1.mode, mode} == {"RGBA", "CMYK"}:
        # RGBA / CMYK are 4bytes encoding where
        # the encoding should be corrected
        img = Image.frombytes(mode, img1.size, img1.tobytes())
    else:  # pragma: no cover
        img = img1.convert(mode)
    # CMYK conversion
    # https://stcom/questions/38855022/conversion-from-cmyk-to-rgb-with-pillow-is-different-from-that-of-photoshop
    # not implemented for the moment as I need to get properly the ICC
    if img.mode == "CMYK":
        img = img.convert("RGB")
    image_format = "JPEG2000"
    return img, image_format, extension, invert_color
 def _apply_decode(
    img: Image.Image,
    x_object_obj: dict[str, Any],
    lfilters: FT,
    color_space: Union[str, list[Any], Any],
    invert_color: bool,
 ) -> Image.Image:
    # CMYK image and other color spaces without decode
    # requires reverting scale (cf p243,2§ last sentence)
    decode = x_object_obj.get(
        IA.DECODE,
        ([1.0, 0.0] * len(img.getbands()))
        if (
            (img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
            or (invert_color and img.mode == "L")
        )
        else None,
    )
    if (
        isinstance(color_space, ArrayObject)
        and color_space[0].get_object() == "/Indexed"
    ):
        decode = None  # decode is meaningless if Indexed
    if (
        isinstance(color_space, ArrayObject)
        and color_space[0].get_object() == "/Separation"
    ):
        decode = [1.0, 0.0] * len(img.getbands())
    if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):
        lut: list[int] = []
        for i in range(0, len(decode), 2):
            dmin = decode[i]
            dmax = decode[i + 1]
            lut.extend(
                round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)
            )
        img = img.point(lut)
    return img
 def _get_mode_and_invert_color(
    x_object_obj: dict[str, Any], colors: int, color_space: Union[str, list[Any], Any]
 ) -> tuple[mode_str_type, bool]:
    if (
        IA.COLOR_SPACE in x_object_obj
        and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
    ):
        # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
        mode: mode_str_type = "RGB"
    if x_object_obj.get("/BitsPerComponent", 8) < 8:
        mode, invert_color = _get_image_mode(
            f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""
        )
    else:
        mode, invert_color = _get_image_mode(
            color_space,
            2
            if (
                colors == 1
                and (
                    not is_null_or_none(color_space)
                    and "Gray" not in color_space
                )
            )
            else colors,
            "",
        )
    return mode, invert_color
 def _xobj_to_image(
        x_object: dict[str, Any],
        pillow_parameters: Union[dict[str, Any], None] = None
 ) -> tuple[Optional[str], bytes, Any]:
    """
    Users need to have the pillow package installed.
    It's unclear if pypdf will keep this function here, hence it's private.
    It might get removed at any point.
    Args:
        x_object:
        pillow_parameters: parameters provided to Pillow Image.save() method,
            cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>
    Returns:
        Tuple[file extension, bytes, PIL.Image.Image]
    """
    def _apply_alpha(
        img: Image.Image,
        x_object: dict[str, Any],
        obj_as_text: str,
        image_format: str,
        extension: str,
    ) -> tuple[Image.Image, str, str]:
        alpha = None
        if IA.S_MASK in x_object:  # add alpha channel
            alpha = _xobj_to_image(x_object[IA.S_MASK])[2]
            if img.size != alpha.size:
                logger_warning(
                    f"image and mask size not matching: {obj_as_text}", __name__
                )
            else:
                # TODO: implement mask
                if alpha.mode != "L":
                    alpha = alpha.convert("L")
                if img.mode == "P":
                    img = img.convert("RGB")
                elif img.mode == "1":
                    img = img.convert("L")
                img.putalpha(alpha)
            if "JPEG" in image_format:
                image_format = "JPEG2000"
                extension = ".jp2"
            else:
                image_format = "PNG"
                extension = ".png"
        return img, extension, image_format
    # For error reporting
    obj_as_text = (
        x_object.indirect_reference.__repr__()
        if x_object is None  # pragma: no cover
        else x_object.__repr__()
    )
    # Get size and data
    size = (cast(int, x_object[IA.WIDTH]), cast(int, x_object[IA.HEIGHT]))
    data = x_object.get_data()  # type: ignore
    if isinstance(data, str):  # pragma: no cover
        data = data.encode()
    if len(data) % (size[0] * size[1]) == 1 and data[-1] == 0x0A:  # ie. '\n'
        data = data[:-1]
    # Get color properties
    colors = x_object.get("/Colors", 1)
    color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object()
    if isinstance(color_space, list) and len(color_space) == 1:
        color_space = color_space[0].get_object()
    mode, invert_color = _get_mode_and_invert_color(x_object, colors, color_space)
    # Get filters
    filters = x_object.get(StreamAttributes.FILTER, NullObject()).get_object()
    lfilters = filters[-1] if isinstance(filters, list) else filters
    decode_parms = x_object.get(StreamAttributes.DECODE_PARMS, None)
    if decode_parms and isinstance(decode_parms, (tuple, list)):
        decode_parms = decode_parms[0]
    else:
        decode_parms = {}
    if not isinstance(decode_parms, dict):
        decode_parms = {}
    extension = None
    if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):
        img, image_format, extension, _ = _handle_flate(
            size,
            data,
            mode,
            color_space,
            colors,
            obj_as_text,
        )
    elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE):
        # I'm not sure if the following logic is correct.
        # There might not be any relationship between the filters and the
        # extension
        if lfilters == FT.LZW_DECODE:
            image_format = "TIFF"
            extension = ".tiff"  # mime_type = "image/tiff"
        else:
            image_format = "PNG"
            extension = ".png"  # mime_type = "image/png"
        try:
            img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
        except UnidentifiedImageError:
            img = _extended_image_from_bytes(mode, size, data)
    elif lfilters == FT.DCT_DECODE:
        img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
        # invert_color kept unchanged
    elif lfilters == FT.JPX_DECODE:
        img, image_format, extension, invert_color = _handle_jpx(
            size, data, mode, color_space, colors
        )
    elif lfilters == FT.CCITT_FAX_DECODE:
        img, image_format, extension, invert_color = (
            Image.open(BytesIO(data), formats=("TIFF",)),
            "TIFF",
            ".tiff",
            False,
        )
    elif lfilters == FT.JBIG2_DECODE:
        img, image_format, extension, invert_color = (
            Image.open(BytesIO(data), formats=("PNG", "PPM")),
            "PNG",
            ".png",
            False,
        )
    elif mode == "CMYK":
        img, image_format, extension, invert_color = (
            _extended_image_from_bytes(mode, size, data),
            "TIFF",
            ".tif",
            False,
        )
    elif mode == "":
        raise PdfReadError(f"ColorSpace field not found in {x_object}")
    else:
        img, image_format, extension, invert_color = (
            _extended_image_from_bytes(mode, size, data),
            "PNG",
            ".png",
            False,
        )
    img = _apply_decode(img, x_object, lfilters, color_space, invert_color)
    img, extension, image_format = _apply_alpha(
        img, x_object, obj_as_text, image_format, extension
    )
    if pillow_parameters is None:
        pillow_parameters = {}
    # Preserve JPEG image quality - see issue #3515.
    if image_format == "JPEG":
        # This prevents: Cannot use 'keep' when original image is not a JPEG:
        # "JPEG" is the value of PIL.JpegImagePlugin.JpegImageFile.format
        img.format = "JPEG"
        if "quality" not in pillow_parameters:
            pillow_parameters["quality"] = "keep"
    # Save image to bytes
    img_byte_arr = BytesIO()
    try:
        img.save(img_byte_arr, format=image_format, **pillow_parameters)
    except OSError:  # pragma: no cover  # covered with pillow 10.3
        # in case of we convert to RGBA and then to PNG
        img1 = img.convert("RGBA")
        image_format = "PNG"
        extension = ".png"
        img_byte_arr = BytesIO()
        img1.save(img_byte_arr, format=image_format)
    data = img_byte_arr.getvalue()
    try:  # temporary try/except until other fixes of images
        img = Image.open(BytesIO(data))
    except Exception as exception:
        logger_warning(f"Failed loading image: {exception}", __name__)
        img = None  # type: ignore[assignment,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
    return extension, data, img
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/init.py
@@ -0,0 +1,42 @@
 """
 PDF specifies several annotation types which pypdf makes available here.
 The names of the annotations and their attributes do not reflect the names in
 the specification in all cases. For example, the PDF standard defines a
 'Square' annotation that does not actually need to be square. For this reason,
 pypdf calls it 'Rectangle'.
 At their core, all annotation types are DictionaryObjects. That means if pypdf
 does not implement a feature, users can easily extend the given functionality.
 """
 from ._base import NO_FLAGS, AnnotationDictionary
 from ._markup_annotations import (
    Ellipse,
    FreeText,
    Highlight,
    Line,
    MarkupAnnotation,
    Polygon,
    PolyLine,
    Rectangle,
    Text,
 )
 from ._non_markup_annotations import Link, Popup
 __all__ = [
    "NO_FLAGS",
    "AnnotationDictionary",
    "Ellipse",
    "FreeText",
    "Highlight",
    "Line",
    "Link",
    "MarkupAnnotation",
    "PolyLine",
    "Polygon",
    "Popup",
    "Rectangle",
    "Text",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_base.py
@@ -0,0 +1,29 @@
 from abc import ABC
 from ..constants import AnnotationFlag
 from ..generic import NameObject, NumberObject
 from ..generic._data_structures import DictionaryObject
 class AnnotationDictionary(DictionaryObject, ABC):
    def __init__(self) -> None:
        super().__init__()
        from ..generic._base import NameObject  # noqa: PLC0415
        # /Rect should not be added here as Polygon and PolyLine can automatically set it
        self[NameObject("/Type")] = NameObject("/Annot")
        # The flags were NOT added to the constructor on purpose:
        # We expect that most users don't want to change the default.
        # If they do, they can use the property. The default is 0.
    @property
    def flags(self) -> AnnotationFlag:
        return self.get(NameObject("/F"), AnnotationFlag(0))
    @flags.setter
    def flags(self, value: AnnotationFlag) -> None:
        self[NameObject("/F")] = NumberObject(value)
 NO_FLAGS = AnnotationFlag(0)
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_markup_annotations.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_markup_annotations.py
@@ -0,0 +1,305 @@
 import sys
 from abc import ABC
 from typing import Any, Optional, Union
 from ..constants import AnnotationFlag
 from ..generic import ArrayObject, DictionaryObject
 from ..generic._base import (
    BooleanObject,
    FloatObject,
    NameObject,
    NumberObject,
    TextStringObject,
 )
 from ..generic._rectangle import RectangleObject
 from ..generic._utils import hex_to_rgb
 from ._base import NO_FLAGS, AnnotationDictionary
 if sys.version_info[:2] >= (3, 10):
    from typing import TypeAlias
 else:
    # PEP 613 introduced typing.TypeAlias with Python 3.10
    # For older Python versions, the backport typing_extensions is necessary:
    from typing_extensions import TypeAlias
 Vertex: TypeAlias = tuple[float, float]
 def _get_bounding_rectangle(vertices: list[Vertex]) -> RectangleObject:
    x_min, y_min = vertices[0][0], vertices[0][1]
    x_max, y_max = vertices[0][0], vertices[0][1]
    for x, y in vertices:
        x_min = min(x_min, x)
        y_min = min(y_min, y)
        x_max = max(x_max, x)
        y_max = max(y_max, y)
    return RectangleObject((x_min, y_min, x_max, y_max))
 class MarkupAnnotation(AnnotationDictionary, ABC):
    """
    Base class for all markup annotations.
    Args:
        title_bar: Text to be displayed in the title bar of the annotation;
            by convention this is the name of the author
    """
    def __init__(self, *, title_bar: Optional[str] = None) -> None:
        if title_bar is not None:
            self[NameObject("/T")] = TextStringObject(title_bar)
 class Text(MarkupAnnotation):
    """
    A text annotation.
    Args:
        rect: array of four integers ``[xLL, yLL, xUR, yUR]``
            specifying the clickable rectangular area
        text: The text that is added to the document
        open:
        flags:
    """
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        text: str,
        open: bool = False,
        flags: int = NO_FLAGS,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self[NameObject("/Subtype")] = NameObject("/Text")
        self[NameObject("/Rect")] = RectangleObject(rect)
        self[NameObject("/Contents")] = TextStringObject(text)
        self[NameObject("/Open")] = BooleanObject(open)
        self[NameObject("/Flags")] = NumberObject(flags)
 class FreeText(MarkupAnnotation):
    """A FreeText annotation"""
    def __init__(
        self,
        *,
        text: str,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        font: str = "Helvetica",
        bold: bool = False,
        italic: bool = False,
        font_size: str = "14pt",
        font_color: str = "000000",
        border_color: Optional[str] = "000000",
        background_color: Optional[str] = "ffffff",
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self[NameObject("/Subtype")] = NameObject("/FreeText")
        self[NameObject("/Rect")] = RectangleObject(rect)
        # Table 225 of the 1.7 reference ("CSS2 style attributes used in rich text strings")
        font_str = "font: "
        if italic:
            font_str = f"{font_str}italic "
        else:
            font_str = f"{font_str}normal "
        if bold:
            font_str = f"{font_str}bold "
        else:
            font_str = f"{font_str}normal "
        font_str = f"{font_str}{font_size} {font}"
        font_str = f"{font_str};text-align:left;color:#{font_color}"
        default_appearance_string = ""
        if border_color:
            for st in hex_to_rgb(border_color):
                default_appearance_string = f"{default_appearance_string}{st} "
            default_appearance_string = f"{default_appearance_string}rg"
        self.update(
            {
                NameObject("/Subtype"): NameObject("/FreeText"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/Contents"): TextStringObject(text),
                # font size color
                NameObject("/DS"): TextStringObject(font_str),
                NameObject("/DA"): TextStringObject(default_appearance_string),
            }
        )
        if border_color is None:
            # Border Style
            self[NameObject("/BS")] = DictionaryObject(
                {
                    # width of 0 means no border
                    NameObject("/W"): NumberObject(0)
                }
            )
        if background_color is not None:
            self[NameObject("/C")] = ArrayObject(
                [FloatObject(n) for n in hex_to_rgb(background_color)]
            )
 class Line(MarkupAnnotation):
    def __init__(
        self,
        p1: Vertex,
        p2: Vertex,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        text: str = "",
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Subtype"): NameObject("/Line"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/L"): ArrayObject(
                    [
                        FloatObject(p1[0]),
                        FloatObject(p1[1]),
                        FloatObject(p2[0]),
                        FloatObject(p2[1]),
                    ]
                ),
                NameObject("/LE"): ArrayObject(
                    [
                        NameObject("/None"),
                        NameObject("/None"),
                    ]
                ),
                NameObject("/IC"): ArrayObject(
                    [
                        FloatObject(0.5),
                        FloatObject(0.5),
                        FloatObject(0.5),
                    ]
                ),
                NameObject("/Contents"): TextStringObject(text),
            }
        )
 class PolyLine(MarkupAnnotation):
    def __init__(
        self,
        vertices: list[Vertex],
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        if len(vertices) == 0:
            raise ValueError("A polyline needs at least 1 vertex with two coordinates")
        coord_list = []
        for x, y in vertices:
            coord_list.append(NumberObject(x))
            coord_list.append(NumberObject(y))
        self.update(
            {
                NameObject("/Subtype"): NameObject("/PolyLine"),
                NameObject("/Vertices"): ArrayObject(coord_list),
                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
            }
        )
 class Rectangle(MarkupAnnotation):
    def __init__(
        self,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        *,
        interior_color: Optional[str] = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Square"),
                NameObject("/Rect"): RectangleObject(rect),
            }
        )
        if interior_color:
            self[NameObject("/IC")] = ArrayObject(
                [FloatObject(n) for n in hex_to_rgb(interior_color)]
            )
 class Highlight(MarkupAnnotation):
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        quad_points: ArrayObject,
        highlight_color: str = "ff0000",
        printing: bool = False,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Subtype"): NameObject("/Highlight"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/QuadPoints"): quad_points,
                NameObject("/C"): ArrayObject(
                    [FloatObject(n) for n in hex_to_rgb(highlight_color)]
                ),
            }
        )
        if printing:
            self.flags = AnnotationFlag.PRINT
 class Ellipse(MarkupAnnotation):
    def __init__(
        self,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        *,
        interior_color: Optional[str] = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Circle"),
                NameObject("/Rect"): RectangleObject(rect),
            }
        )
        if interior_color:
            self[NameObject("/IC")] = ArrayObject(
                [FloatObject(n) for n in hex_to_rgb(interior_color)]
            )
 class Polygon(MarkupAnnotation):
    def __init__(
        self,
        vertices: list[tuple[float, float]],
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        if len(vertices) == 0:
            raise ValueError("A polygon needs at least 1 vertex with two coordinates")
        coord_list = []
        for x, y in vertices:
            coord_list.append(NumberObject(x))
            coord_list.append(NumberObject(y))
        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Polygon"),
                NameObject("/Vertices"): ArrayObject(coord_list),
                NameObject("/IT"): NameObject("/PolygonCloud"),
                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
            }
        )
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_non_markup_annotations.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_non_markup_annotations.py
@@ -0,0 +1,106 @@
 from typing import TYPE_CHECKING, Any, Optional, Union
 from ..generic._base import (
    BooleanObject,
    NameObject,
    NumberObject,
    TextStringObject,
 )
 from ..generic._data_structures import ArrayObject, DictionaryObject
 from ..generic._fit import DEFAULT_FIT, Fit
 from ..generic._rectangle import RectangleObject
 from ._base import AnnotationDictionary
 class Link(AnnotationDictionary):
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        border: Optional[ArrayObject] = None,
        url: Optional[str] = None,
        target_page_index: Optional[int] = None,
        fit: Fit = DEFAULT_FIT,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        if TYPE_CHECKING:
            from ..types import BorderArrayType  # noqa: PLC0415
        is_external = url is not None
        is_internal = target_page_index is not None
        if not is_external and not is_internal:
            raise ValueError(
                "Either 'url' or 'target_page_index' have to be provided. Both were None."
            )
        if is_external and is_internal:
            raise ValueError(
                "Either 'url' or 'target_page_index' have to be provided. "
                f"{url=}, {target_page_index=}"
            )
        border_arr: BorderArrayType
        if border is not None:
            border_arr = [NumberObject(n) for n in border[:3]]
            if len(border) == 4:
                dash_pattern = ArrayObject([NumberObject(n) for n in border[3]])
                border_arr.append(dash_pattern)
        else:
            border_arr = [NumberObject(0)] * 3
        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Link"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/Border"): ArrayObject(border_arr),
            }
        )
        if is_external:
            self[NameObject("/A")] = DictionaryObject(
                {
                    NameObject("/S"): NameObject("/URI"),
                    NameObject("/Type"): NameObject("/Action"),
                    NameObject("/URI"): TextStringObject(url),
                }
            )
        if is_internal:
            # This needs to be updated later!
            dest_deferred = DictionaryObject(
                {
                    "target_page_index": NumberObject(target_page_index),
                    "fit": NameObject(fit.fit_type),
                    "fit_args": fit.fit_args,
                }
            )
            self[NameObject("/Dest")] = dest_deferred
 class Popup(AnnotationDictionary):
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        parent: Optional[DictionaryObject] = None,
        open: bool = False,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Subtype"): NameObject("/Popup"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/Open"): BooleanObject(open),
            }
        )
        if parent:
            # This needs to be an indirect object
            try:
                self[NameObject("/Parent")] = parent.indirect_reference
            except AttributeError:
                from .._utils import logger_warning  # noqa: PLC0415
                logger_warning(
                    "Unregistered Parent object : No Parent field set",
                    __name__,
                )
--- a/venv/lib/python3.12/site-packages/pypdf/constants.py
+++ b/venv/lib/python3.12/site-packages/pypdf/constants.py
@@ -0,0 +1,796 @@
 """Various constants, enums, and flags to aid readability."""
 from enum import Enum, IntFlag, auto, unique
 class StrEnum(str, Enum):  # Once we are on Python 3.11+: enum.StrEnum
    def __str__(self) -> str:
        return str(self.value)
 class Core:
    """Keywords that don't quite belong anywhere else."""
    OUTLINES = "/Outlines"
    THREADS = "/Threads"
    PAGE = "/Page"
    PAGES = "/Pages"
    CATALOG = "/Catalog"
 class TrailerKeys:
    SIZE = "/Size"
    PREV = "/Prev"
    ROOT = "/Root"
    ENCRYPT = "/Encrypt"
    INFO = "/Info"
    ID = "/ID"
 class CatalogAttributes:
    NAMES = "/Names"
    DESTS = "/Dests"
 class EncryptionDictAttributes:
    """
    Additional encryption dictionary entries for the standard security handler.
    Table 3.19, Page 122.
    Table 21 of the 2.0 manual.
    """
    R = "/R"  # number, required; revision of the standard security handler
    O = "/O"  # 32-byte string, required  # noqa: E741
    U = "/U"  # 32-byte string, required
    P = "/P"  # integer flag, required; permitted operations
    ENCRYPT_METADATA = "/EncryptMetadata"  # boolean flag, optional
 class UserAccessPermissions(IntFlag):
    """
    Table 3.20 User access permissions.
    Table 22 of the 2.0 manual.
    """
    R1 = 1
    R2 = 2
    PRINT = 4
    MODIFY = 8
    EXTRACT = 16
    ADD_OR_MODIFY = 32
    R7 = 64
    R8 = 128
    FILL_FORM_FIELDS = 256
    EXTRACT_TEXT_AND_GRAPHICS = 512
    ASSEMBLE_DOC = 1024
    PRINT_TO_REPRESENTATION = 2048
    R13 = 2**12
    R14 = 2**13
    R15 = 2**14
    R16 = 2**15
    R17 = 2**16
    R18 = 2**17
    R19 = 2**18
    R20 = 2**19
    R21 = 2**20
    R22 = 2**21
    R23 = 2**22
    R24 = 2**23
    R25 = 2**24
    R26 = 2**25
    R27 = 2**26
    R28 = 2**27
    R29 = 2**28
    R30 = 2**29
    R31 = 2**30
    R32 = 2**31
    @classmethod
    def _is_reserved(cls, name: str) -> bool:
        """Check if the given name corresponds to a reserved flag entry."""
        return name.startswith("R") and name[1:].isdigit()
    @classmethod
    def _is_active(cls, name: str) -> bool:
        """Check if the given reserved name defaults to 1 = active."""
        return name not in {"R1", "R2"}
    def to_dict(self) -> dict[str, bool]:
        """Convert the given flag value to a corresponding verbose name mapping."""
        result: dict[str, bool] = {}
        for name, flag in UserAccessPermissions.__members__.items():
            if UserAccessPermissions._is_reserved(name):
                continue
            result[name.lower()] = (self & flag) == flag
        return result
    @classmethod
    def from_dict(cls, value: dict[str, bool]) -> "UserAccessPermissions":
        """Convert the verbose name mapping to the corresponding flag value."""
        value_copy = value.copy()
        result = cls(0)
        for name, flag in cls.__members__.items():
            if cls._is_reserved(name):
                # Reserved names have a required value. Use it.
                if cls._is_active(name):
                    result |= flag
                continue
            is_active = value_copy.pop(name.lower(), False)
            if is_active:
                result |= flag
        if value_copy:
            raise ValueError(f"Unknown dictionary keys: {value_copy!r}")
        return result
    @classmethod
    def all(cls) -> "UserAccessPermissions":
        return cls((2**32 - 1) - cls.R1 - cls.R2)
 class Resources:
    """
    Table 3.30 Entries in a resource dictionary.
    Table 34 in the 2.0 reference.
    """
    EXT_G_STATE = "/ExtGState"  # dictionary, optional
    COLOR_SPACE = "/ColorSpace"  # dictionary, optional
    PATTERN = "/Pattern"  # dictionary, optional
    SHADING = "/Shading"  # dictionary, optional
    XOBJECT = "/XObject"  # dictionary, optional
    FONT = "/Font"  # dictionary, optional
    PROC_SET = "/ProcSet"  # array, optional
    PROPERTIES = "/Properties"  # dictionary, optional
 class PagesAttributes:
    """§7.7.3.2 of the 1.7 and 2.0 reference."""
    TYPE = "/Type"  # name, required; must be /Pages
    PARENT = "/Parent"  # dictionary, required; indirect reference to pages object
    KIDS = "/Kids"  # array, required; List of indirect references
    COUNT = "/Count"
    # integer, required; the number of leaf nodes (page objects)
    # that are descendants of this node within the page tree
 class PageAttributes:
    """§7.7.3.3 of the 1.7 and 2.0 reference."""
    TYPE = "/Type"  # name, required; must be /Page
    PARENT = "/Parent"  # dictionary, required; a pages object
    LAST_MODIFIED = (
        "/LastModified"  # date, optional; date and time of last modification
    )
    RESOURCES = "/Resources"  # dictionary, required if there are any
    MEDIABOX = "/MediaBox"  # rectangle, required; rectangle specifying page size
    CROPBOX = "/CropBox"  # rectangle, optional
    BLEEDBOX = "/BleedBox"  # rectangle, optional
    TRIMBOX = "/TrimBox"  # rectangle, optional
    ARTBOX = "/ArtBox"  # rectangle, optional
    BOX_COLOR_INFO = "/BoxColorInfo"  # dictionary, optional
    CONTENTS = "/Contents"  # stream or array, optional
    ROTATE = "/Rotate"  # integer, optional; page rotation in degrees
    GROUP = "/Group"  # dictionary, optional; page group
    THUMB = "/Thumb"  # stream, optional; indirect reference to image of the page
    B = "/B"  # array, optional
    DUR = "/Dur"  # number, optional
    TRANS = "/Trans"  # dictionary, optional
    ANNOTS = "/Annots"  # array, optional; an array of annotations
    AA = "/AA"  # dictionary, optional
    METADATA = "/Metadata"  # stream, optional
    PIECE_INFO = "/PieceInfo"  # dictionary, optional
    STRUCT_PARENTS = "/StructParents"  # integer, optional
    ID = "/ID"  # byte string, optional
    PZ = "/PZ"  # number, optional
    SEPARATION_INFO = "/SeparationInfo"  # dictionary, optional
    TABS = "/Tabs"  # name, optional
    TEMPLATE_INSTANTIATED = "/TemplateInstantiated"  # name, optional
    PRES_STEPS = "/PresSteps"  # dictionary, optional
    USER_UNIT = "/UserUnit"  # number, optional
    VP = "/VP"  # dictionary, optional
    AF = "/AF"  # array of dictionaries, optional
    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
    D_PART = "/DPart"  # dictionary, required, if this page is within the range of a DPart, not permitted otherwise
 class FileSpecificationDictionaryEntries:
    """Table 3.41 Entries in a file specification dictionary."""
    Type = "/Type"
    FS = "/FS"  # The name of the file system to be used to interpret this file specification
    F = "/F"  # A file specification string of the form described in §3.10.1
    UF = "/UF"  # A Unicode string of the file as described in §3.10.1
    DOS = "/DOS"
    Mac = "/Mac"
    Unix = "/Unix"
    ID = "/ID"
    V = "/V"
    EF = "/EF"  # dictionary, containing a subset of the keys F, UF, DOS, Mac, and Unix
    RF = "/RF"  # dictionary, containing arrays of /EmbeddedFile
    DESC = "/Desc"  # description of the file
    Cl = "/Cl"
 class StreamAttributes:
    """
    Table 4.2.
    Table 5 in the 2.0 reference.
    """
    LENGTH = "/Length"  # integer, required
    FILTER = "/Filter"  # name or array of names, optional
    DECODE_PARMS = "/DecodeParms"  # variable, optional -- 'decodeParams is wrong
@unique
 class FilterTypes(StrEnum):
    """§7.4 of the 1.7 and 2.0 references."""
    ASCII_HEX_DECODE = "/ASCIIHexDecode"  # abbreviation: AHx
    ASCII_85_DECODE = "/ASCII85Decode"  # abbreviation: A85
    LZW_DECODE = "/LZWDecode"  # abbreviation: LZW
    FLATE_DECODE = "/FlateDecode"  # abbreviation: Fl
    RUN_LENGTH_DECODE = "/RunLengthDecode"  # abbreviation: RL
    CCITT_FAX_DECODE = "/CCITTFaxDecode"  # abbreviation: CCF
    DCT_DECODE = "/DCTDecode"  # abbreviation: DCT
    JPX_DECODE = "/JPXDecode"
    JBIG2_DECODE = "/JBIG2Decode"
 class FilterTypeAbbreviations:
    """§8.9.7 of the 1.7 and 2.0 references."""
    AHx = "/AHx"
    A85 = "/A85"
    LZW = "/LZW"
    FL = "/Fl"
    RL = "/RL"
    CCF = "/CCF"
    DCT = "/DCT"
 class LzwFilterParameters:
    """
    Table 4.4.
    Table 8 in the 2.0 reference.
    """
    PREDICTOR = "/Predictor"  # integer
    COLORS = "/Colors"  # integer
    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer
    COLUMNS = "/Columns"  # integer
    EARLY_CHANGE = "/EarlyChange"  # integer
 class CcittFaxDecodeParameters:
    """
    Table 4.5.
    Table 11 in the 2.0 reference.
    """
    K = "/K"  # integer
    END_OF_LINE = "/EndOfLine"  # boolean
    ENCODED_BYTE_ALIGN = "/EncodedByteAlign"  # boolean
    COLUMNS = "/Columns"  # integer
    ROWS = "/Rows"  # integer
    END_OF_BLOCK = "/EndOfBlock"  # boolean
    BLACK_IS_1 = "/BlackIs1"  # boolean
    DAMAGED_ROWS_BEFORE_ERROR = "/DamagedRowsBeforeError"  # integer
 class ImageAttributes:
    """§11.6.5 of the 1.7 and 2.0 references."""
    TYPE = "/Type"  # name, required; must be /XObject
    SUBTYPE = "/Subtype"  # name, required; must be /Image
    NAME = "/Name"  # name, required
    WIDTH = "/Width"  # integer, required
    HEIGHT = "/Height"  # integer, required
    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer, required
    COLOR_SPACE = "/ColorSpace"  # name, required
    DECODE = "/Decode"  # array, optional
    INTENT = "/Intent"  # string, optional
    INTERPOLATE = "/Interpolate"  # boolean, optional
    IMAGE_MASK = "/ImageMask"  # boolean, optional
    MASK = "/Mask"  # 1-bit image mask stream
    S_MASK = "/SMask"  # dictionary or name, optional
 class ColorSpaces:
    DEVICE_RGB = "/DeviceRGB"
    DEVICE_CMYK = "/DeviceCMYK"
    DEVICE_GRAY = "/DeviceGray"
 class TypArguments:
    """Table 8.2 of the PDF 1.7 reference."""
    LEFT = "/Left"
    RIGHT = "/Right"
    BOTTOM = "/Bottom"
    TOP = "/Top"
 class TypFitArguments:
    """Table 8.2 of the PDF 1.7 reference."""
    XYZ = "/XYZ"
    FIT = "/Fit"
    FIT_H = "/FitH"
    FIT_V = "/FitV"
    FIT_R = "/FitR"
    FIT_B = "/FitB"
    FIT_BH = "/FitBH"
    FIT_BV = "/FitBV"
 class GoToActionArguments:
    S = "/S"  # name, required: type of action
    D = "/D"  # name, byte string, or array, required: destination to jump to
    SD = "/SD"  # array, optional: structure destination to jump to
 class AnnotationDictionaryAttributes:
    """Table 8.15 Entries common to all annotation dictionaries."""
    Type = "/Type"
    Subtype = "/Subtype"
    Rect = "/Rect"
    Contents = "/Contents"
    P = "/P"
    NM = "/NM"
    M = "/M"
    F = "/F"
    AP = "/AP"
    AS = "/AS"
    DA = "/DA"
    Border = "/Border"
    C = "/C"
    StructParent = "/StructParent"
    OC = "/OC"
 class InteractiveFormDictEntries:
    Fields = "/Fields"
    NeedAppearances = "/NeedAppearances"
    SigFlags = "/SigFlags"
    CO = "/CO"
    DR = "/DR"
    DA = "/DA"
    Q = "/Q"
    XFA = "/XFA"
 class FieldDictionaryAttributes:
    """
    Entries common to all field dictionaries (Table 8.69 PDF 1.7 reference)
    (*very partially documented here*).
    FFBits provides the constants used for `/Ff` from Table 8.70/8.75/8.77/8.79
    """
    FT = "/FT"  # name, required for terminal fields
    Parent = "/Parent"  # dictionary, required for children
    Kids = "/Kids"  # array, sometimes required
    T = "/T"  # text string, optional
    TU = "/TU"  # text string, optional
    TM = "/TM"  # text string, optional
    Ff = "/Ff"  # integer, optional
    V = "/V"  # text string or array, optional
    DV = "/DV"  # text string, optional
    AA = "/AA"  # dictionary, optional
    Opt = "/Opt"  # array, optional
    class FfBits(IntFlag):
        """
        Ease building /Ff flags
        Some entries may be specific to:
        * Text (Tx) (Table 8.75 PDF 1.7 reference)
        * Buttons (Btn) (Table 8.77 PDF 1.7 reference)
        * Choice (Ch) (Table 8.79 PDF 1.7 reference)
        """
        ReadOnly = 1 << 0
        """common to Tx/Btn/Ch in Table 8.70"""
        Required = 1 << 1
        """common to Tx/Btn/Ch in Table 8.70"""
        NoExport = 1 << 2
        """common to Tx/Btn/Ch in Table 8.70"""
        Multiline = 1 << 12
        """Tx"""
        Password = 1 << 13
        """Tx"""
        NoToggleToOff = 1 << 14
        """Btn"""
        Radio = 1 << 15
        """Btn"""
        Pushbutton = 1 << 16
        """Btn"""
        Combo = 1 << 17
        """Ch"""
        Edit = 1 << 18
        """Ch"""
        Sort = 1 << 19
        """Ch"""
        FileSelect = 1 << 20
        """Tx"""
        MultiSelect = 1 << 21
        """Tx"""
        DoNotSpellCheck = 1 << 22
        """Tx/Ch"""
        DoNotScroll = 1 << 23
        """Tx"""
        Comb = 1 << 24
        """Tx"""
        RadiosInUnison = 1 << 25
        """Btn"""
        RichText = 1 << 25
        """Tx"""
        CommitOnSelChange = 1 << 26
        """Ch"""
    @classmethod
    def attributes(cls) -> tuple[str, ...]:
        """
        Get a tuple of all the attributes present in a Field Dictionary.
        This method returns a tuple of all the attribute constants defined in
        the FieldDictionaryAttributes class. These attributes correspond to the
        entries that are common to all field dictionaries as specified in the
        PDF 1.7 reference.
        Returns:
            A tuple containing all the attribute constants.
        """
        return (
            cls.TM,
            cls.T,
            cls.FT,
            cls.Parent,
            cls.TU,
            cls.Ff,
            cls.V,
            cls.DV,
            cls.Kids,
            cls.AA,
        )
    @classmethod
    def attributes_dict(cls) -> dict[str, str]:
        """
        Get a dictionary of attribute keys and their human-readable names.
        This method returns a dictionary where the keys are the attribute
        constants defined in the FieldDictionaryAttributes class and the values
        are their corresponding human-readable names. These attributes
        correspond to the entries that are common to all field dictionaries as
        specified in the PDF 1.7 reference.
        Returns:
            A dictionary containing attribute keys and their names.
        """
        return {
            cls.FT: "Field Type",
            cls.Parent: "Parent",
            cls.T: "Field Name",
            cls.TU: "Alternate Field Name",
            cls.TM: "Mapping Name",
            cls.Ff: "Field Flags",
            cls.V: "Value",
            cls.DV: "Default Value",
        }
 class CheckboxRadioButtonAttributes:
    """Table 8.76 Field flags common to all field types."""
    Opt = "/Opt"  # Options, Optional
    @classmethod
    def attributes(cls) -> tuple[str, ...]:
        """
        Get a tuple of all the attributes present in a Field Dictionary.
        This method returns a tuple of all the attribute constants defined in
        the CheckboxRadioButtonAttributes class. These attributes correspond to
        the entries that are common to all field dictionaries as specified in
        the PDF 1.7 reference.
        Returns:
            A tuple containing all the attribute constants.
        """
        return (cls.Opt,)
    @classmethod
    def attributes_dict(cls) -> dict[str, str]:
        """
        Get a dictionary of attribute keys and their human-readable names.
        This method returns a dictionary where the keys are the attribute
        constants defined in the CheckboxRadioButtonAttributes class and the
        values are their corresponding human-readable names. These attributes
        correspond to the entries that are common to all field dictionaries as
        specified in the PDF 1.7 reference.
        Returns:
            A dictionary containing attribute keys and their names.
        """
        return {
            cls.Opt: "Options",
        }
 class FieldFlag(IntFlag):
    """Table 8.70 Field flags common to all field types."""
    READ_ONLY = 1
    REQUIRED = 2
    NO_EXPORT = 4
 class DocumentInformationAttributes:
    """Table 10.2 Entries in the document information dictionary."""
    TITLE = "/Title"  # text string, optional
    AUTHOR = "/Author"  # text string, optional
    SUBJECT = "/Subject"  # text string, optional
    KEYWORDS = "/Keywords"  # text string, optional
    CREATOR = "/Creator"  # text string, optional
    PRODUCER = "/Producer"  # text string, optional
    CREATION_DATE = "/CreationDate"  # date, optional
    MOD_DATE = "/ModDate"  # date, optional
    TRAPPED = "/Trapped"  # name, optional
 class PageLayouts:
    """
    Page 84, PDF 1.4 reference.
    Page 115, PDF 2.0 reference.
    """
    SINGLE_PAGE = "/SinglePage"
    ONE_COLUMN = "/OneColumn"
    TWO_COLUMN_LEFT = "/TwoColumnLeft"
    TWO_COLUMN_RIGHT = "/TwoColumnRight"
    TWO_PAGE_LEFT = "/TwoPageLeft"  # (PDF 1.5)
    TWO_PAGE_RIGHT = "/TwoPageRight"  # (PDF 1.5)
 class GraphicsStateParameters:
    """Table 58 – Entries in a Graphics State Parameter Dictionary"""
    TYPE = "/Type"  # name, optional
    LW = "/LW"  # number, optional
    LC = "/LC"  # integer, optional
    LJ = "/LJ"  # integer, optional
    ML = "/ML"  # number, optional
    D = "/D"  # array, optional
    RI = "/RI"  # name, optional
    OP = "/OP"
    op = "/op"
    OPM = "/OPM"
    FONT = "/Font"  # array, optional
    BG = "/BG"
    BG2 = "/BG2"
    UCR = "/UCR"
    UCR2 = "/UCR2"
    TR = "/TR"
    TR2 = "/TR2"
    HT = "/HT"
    FL = "/FL"
    SM = "/SM"
    SA = "/SA"
    BM = "/BM"
    S_MASK = "/SMask"  # dictionary or name, optional
    CA = "/CA"
    ca = "/ca"
    AIS = "/AIS"
    TK = "/TK"
 class CatalogDictionary:
    """§7.7.2 of the 1.7 and 2.0 references."""
    TYPE = "/Type"  # name, required; must be /Catalog
    VERSION = "/Version"  # name
    EXTENSIONS = "/Extensions"  # dictionary, optional; ISO 32000-1
    PAGES = "/Pages"  # dictionary, required
    PAGE_LABELS = "/PageLabels"  # number tree, optional
    NAMES = "/Names"  # dictionary, optional
    DESTS = "/Dests"  # dictionary, optional
    VIEWER_PREFERENCES = "/ViewerPreferences"  # dictionary, optional
    PAGE_LAYOUT = "/PageLayout"  # name, optional
    PAGE_MODE = "/PageMode"  # name, optional
    OUTLINES = "/Outlines"  # dictionary, optional
    THREADS = "/Threads"  # array, optional
    OPEN_ACTION = "/OpenAction"  # array or dictionary or name, optional
    AA = "/AA"  # dictionary, optional
    URI = "/URI"  # dictionary, optional
    ACRO_FORM = "/AcroForm"  # dictionary, optional
    METADATA = "/Metadata"  # stream, optional
    STRUCT_TREE_ROOT = "/StructTreeRoot"  # dictionary, optional
    MARK_INFO = "/MarkInfo"  # dictionary, optional
    LANG = "/Lang"  # text string, optional
    SPIDER_INFO = "/SpiderInfo"  # dictionary, optional
    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
    PIECE_INFO = "/PieceInfo"  # dictionary, optional
    OC_PROPERTIES = "/OCProperties"  # dictionary, optional
    PERMS = "/Perms"  # dictionary, optional
    LEGAL = "/Legal"  # dictionary, optional
    REQUIREMENTS = "/Requirements"  # array, optional
    COLLECTION = "/Collection"  # dictionary, optional
    NEEDS_RENDERING = "/NeedsRendering"  # boolean, optional
    DSS = "/DSS"  # dictionary, optional
    AF = "/AF"  # array of dictionaries, optional
    D_PART_ROOT = "/DPartRoot"  # dictionary, optional
 class OutlineFontFlag(IntFlag):
    """A class used as an enumerable flag for formatting an outline font."""
    italic = 1
    bold = 2
 class PageLabelStyle:
    """
    Table 8.10 in the 1.7 reference.
    Table 161 in the 2.0 reference.
    """
    DECIMAL = "/D"  # Decimal Arabic numerals
    UPPERCASE_ROMAN = "/R"  # Uppercase Roman numerals
    LOWERCASE_ROMAN = "/r"  # Lowercase Roman numerals
    UPPERCASE_LETTER = "/A"  # Uppercase letters
    LOWERCASE_LETTER = "/a"  # Lowercase letters
 class AnnotationFlag(IntFlag):
    """See §12.5.3 "Annotation Flags"."""
    INVISIBLE = 1
    HIDDEN = 2
    PRINT = 4
    NO_ZOOM = 8
    NO_ROTATE = 16
    NO_VIEW = 32
    READ_ONLY = 64
    LOCKED = 128
    TOGGLE_NO_VIEW = 256
    LOCKED_CONTENTS = 512
 PDF_KEYS = (
    AnnotationDictionaryAttributes,
    CatalogAttributes,
    CatalogDictionary,
    CcittFaxDecodeParameters,
    CheckboxRadioButtonAttributes,
    ColorSpaces,
    Core,
    DocumentInformationAttributes,
    EncryptionDictAttributes,
    FieldDictionaryAttributes,
    FileSpecificationDictionaryEntries,
    FilterTypeAbbreviations,
    FilterTypes,
    GoToActionArguments,
    GraphicsStateParameters,
    ImageAttributes,
    InteractiveFormDictEntries,
    LzwFilterParameters,
    PageAttributes,
    PageLayouts,
    PagesAttributes,
    Resources,
    StreamAttributes,
    TrailerKeys,
    TypArguments,
    TypFitArguments,
 )
 class ImageType(IntFlag):
    NONE = 0
    XOBJECT_IMAGES = auto()
    INLINE_IMAGES = auto()
    DRAWING_IMAGES = auto()
    ALL = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES
    IMAGES = ALL  # for consistency with ObjectDeletionFlag
 _INLINE_IMAGE_VALUE_MAPPING = {
    "/G": "/DeviceGray",
    "/RGB": "/DeviceRGB",
    "/CMYK": "/DeviceCMYK",
    "/I": "/Indexed",
    "/AHx": "/ASCIIHexDecode",
    "/A85": "/ASCII85Decode",
    "/LZW": "/LZWDecode",
    "/Fl": "/FlateDecode",
    "/RL": "/RunLengthDecode",
    "/CCF": "/CCITTFaxDecode",
    "/DCT": "/DCTDecode",
    "/DeviceGray": "/DeviceGray",
    "/DeviceRGB": "/DeviceRGB",
    "/DeviceCMYK": "/DeviceCMYK",
    "/Indexed": "/Indexed",
    "/ASCIIHexDecode": "/ASCIIHexDecode",
    "/ASCII85Decode": "/ASCII85Decode",
    "/LZWDecode": "/LZWDecode",
    "/FlateDecode": "/FlateDecode",
    "/RunLengthDecode": "/RunLengthDecode",
    "/CCITTFaxDecode": "/CCITTFaxDecode",
    "/DCTDecode": "/DCTDecode",
    "/RelativeColorimetric": "/RelativeColorimetric",
 }
 _INLINE_IMAGE_KEY_MAPPING = {
    "/BPC": "/BitsPerComponent",
    "/CS": "/ColorSpace",
    "/D": "/Decode",
    "/DP": "/DecodeParms",
    "/F": "/Filter",
    "/H": "/Height",
    "/W": "/Width",
    "/I": "/Interpolate",
    "/Intent": "/Intent",
    "/IM": "/ImageMask",
    "/BitsPerComponent": "/BitsPerComponent",
    "/ColorSpace": "/ColorSpace",
    "/Decode": "/Decode",
    "/DecodeParms": "/DecodeParms",
    "/Filter": "/Filter",
    "/Height": "/Height",
    "/Width": "/Width",
    "/Interpolate": "/Interpolate",
    "/ImageMask": "/ImageMask",
 }
 class AFRelationship:
    """
    Associated file relationship types, defining the relationship between
    the PDF component and the associated file.
    Defined in table 43 of the PDF 2.0 reference.
    """
    SOURCE = "/Source"  # Original content source
    DATA = "/Data"  # Base data for visual presentation
    ALTERNATIVE = "/Alternative"  # Alternative content representation
    SUPPLEMENT = "/Supplement"  # Supplemental representation of original source/data
    ENCRYPTED_PAYLOAD = "/EncryptedPayload"  # Encrypted payload document
    FORM_DATA = "/FormData"  # Data associated with AcroForm of this PDF
    SCHEMA = "/Schema"  # Schema definition for associated object
    UNSPECIFIED = "/Unspecified"  # Not known or cannot be described with values
 class BorderStyles:
    """
    A class defining border styles used in PDF documents.
    Defined in table 168 of the PDF 2.0 reference.
    """
    BEVELED = "/B"
    DASHED = "/D"
    INSET = "/I"
    SOLID = "/S"
    UNDERLINED = "/U"
--- a/venv/lib/python3.12/site-packages/pypdf/errors.py
+++ b/venv/lib/python3.12/site-packages/pypdf/errors.py
@@ -0,0 +1,74 @@
 """
 All errors/exceptions pypdf raises and all of the warnings it uses.
 Please note that broken PDF files might cause other Exceptions.
 """
 class DeprecationError(Exception):
    """Raised when a deprecated feature is used."""
 class DependencyError(Exception):
    """
    Raised when a required dependency (a library or module that pypdf depends on)
    is not available or cannot be imported.
    """
 class PyPdfError(Exception):
    """Base class for all exceptions raised by pypdf."""
 class PdfReadError(PyPdfError):
    """Raised when there is an issue reading a PDF file."""
 class PageSizeNotDefinedError(PyPdfError):
    """Raised when the page size of a PDF document is not defined."""
 class PdfReadWarning(UserWarning):
    """Issued when there is a potential issue reading a PDF file, but it can still be read."""
 class PdfStreamError(PdfReadError):
    """Raised when there is an issue reading the stream of data in a PDF file."""
 class ParseError(PyPdfError):
    """
    Raised when there is an issue parsing (analyzing and understanding the
    structure and meaning of) a PDF file.
    """
 class FileNotDecryptedError(PdfReadError):
    """
    Raised when a PDF file that has been encrypted
    (meaning it requires a password to be accessed) has not been successfully
    decrypted.
    """
 class WrongPasswordError(FileNotDecryptedError):
    """Raised when the wrong password is used to try to decrypt an encrypted PDF file."""
 class EmptyFileError(PdfReadError):
    """Raised when a PDF file is empty or has no content."""
 class EmptyImageDataError(PyPdfError):
    """Raised when trying to process an image that has no data."""
 STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
 class LimitReachedError(PyPdfError):
    """Raised when a limit is reached."""
 class XmpDocumentError(PyPdfError, RuntimeError):
    """Raised when the XMP XML document context is invalid or missing."""
--- a/venv/lib/python3.12/site-packages/pypdf/filters.py
+++ b/venv/lib/python3.12/site-packages/pypdf/filters.py
@@ -0,0 +1,815 @@
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """
 Implementation of stream filters; §7.4 Filters of the PDF 2.0 specification.
 §8.9.7 Inline images of the PDF 2.0 specification has abbreviations that can be
 used for the names of filters in an inline image object.
 """
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 import math
 import os
 import shutil
 import struct
 import subprocess
 import zlib
 from base64 import a85decode
 from dataclasses import dataclass
 from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Any, Optional, Union, cast
 from ._codecs._codecs import LzwCodec as _LzwCodec
 from ._utils import (
    WHITESPACES_AS_BYTES,
    deprecation_with_replacement,
    logger_warning,
 )
 from .constants import CcittFaxDecodeParameters as CCITT
 from .constants import FilterTypeAbbreviations as FTA
 from .constants import FilterTypes as FT
 from .constants import ImageAttributes as IA
 from .constants import LzwFilterParameters as LZW
 from .constants import StreamAttributes as SA
 from .errors import DependencyError, LimitReachedError, PdfReadError, PdfStreamError
 from .generic import (
    ArrayObject,
    DictionaryObject,
    IndirectObject,
    NullObject,
    StreamObject,
    is_null_or_none,
 )
 JBIG2_MAX_OUTPUT_LENGTH = 75_000_000
 LZW_MAX_OUTPUT_LENGTH = 75_000_000
 ZLIB_MAX_OUTPUT_LENGTH = 75_000_000
 def _decompress_with_limit(data: bytes) -> bytes:
    decompressor = zlib.decompressobj()
    result = decompressor.decompress(data, max_length=ZLIB_MAX_OUTPUT_LENGTH)
    if decompressor.unconsumed_tail:
        raise LimitReachedError(
            f"Limit reached while decompressing. {len(decompressor.unconsumed_tail)} bytes remaining."
        )
    return result
 def decompress(data: bytes) -> bytes:
    """
    Decompress the given data using zlib.
    Attempts to decompress the input data using zlib.
    If the decompression fails due to a zlib error, it falls back
    to using a decompression object with a larger window size.
    Please note that the output length is limited to avoid memory
    issues. If you need to process larger content streams, consider
    adapting ``pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH``. In case you
    are only dealing with trusted inputs and/or want to disable these
    limits, set the value to `0`.
    Args:
        data: The input data to be decompressed.
    Returns:
        The decompressed data.
    """
    try:
        return _decompress_with_limit(data)
    except zlib.error:
        # First quick approach: There are known issues with faulty added bytes to the
        # tail of the encoded stream from early Adobe Distiller or Pitstop versions
        # with CR char as the default line separator (assumed by reverse engineering)
        # that breaks the decoding process in the end.
        #
        # Try first to cut off some of the tail byte by byte, but limited to not
        # iterate through too many loops and kill the performance for large streams,
        # to then allow the final fallback to run. Added this intermediate attempt,
        # because starting from the head of the stream byte by byte kills completely
        # the performance for large streams (e.g., 6 MB) with the tail-byte-issue
        # and takes ages. This solution is really fast:
        max_tail_cut_off_bytes: int = 8
        for i in range(1, min(max_tail_cut_off_bytes + 1, len(data))):
            try:
                return _decompress_with_limit(data[:-i])
            except zlib.error:
                pass
        # If still failing, then try with increased window size.
        decompressor = zlib.decompressobj(zlib.MAX_WBITS | 32)
        result_str = b""
        remaining_limit = ZLIB_MAX_OUTPUT_LENGTH
        data_single_bytes = [data[i : i + 1] for i in range(len(data))]
        known_errors = set()
        for index, b in enumerate(data_single_bytes):
            try:
                decompressed = decompressor.decompress(b, max_length=remaining_limit)
                result_str += decompressed
                remaining_limit -= len(decompressed)
                if remaining_limit <= 0:
                    raise LimitReachedError(
                        f"Limit reached while decompressing. {len(data_single_bytes) - index} bytes remaining."
                    )
            except zlib.error as error:
                error_str = str(error)
                if error_str in known_errors:
                    continue
                logger_warning(error_str, __name__)
                known_errors.add(error_str)
        return result_str
 class FlateDecode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode data which is flate-encoded.
        Args:
          data: flate-encoded data.
          decode_parms: a dictionary of values, understanding the
            "/Predictor":<int> key only
        Returns:
          The flate-decoded data.
        Raises:
          PdfReadError:
        """
        str_data = decompress(data)
        predictor = 1
        if decode_parms:
            try:
                predictor = decode_parms.get("/Predictor", 1)
            except (AttributeError, TypeError):  # Type Error is NullObject
                pass  # Usually an array with a null object was read
        # predictor 1 == no predictor
        if predictor != 1:
            # /Columns, the number of samples in each row, has a default value of 1;
            # §7.4.4.3, ISO 32000.
            DEFAULT_BITS_PER_COMPONENT = 8
            try:
                columns = cast(int, decode_parms[LZW.COLUMNS].get_object())  # type: ignore
            except (TypeError, KeyError):
                columns = 1
            try:
                colors = cast(int, decode_parms[LZW.COLORS].get_object())  # type: ignore
            except (TypeError, KeyError):
                colors = 1
            try:
                bits_per_component = cast(
                    int,
                    decode_parms[LZW.BITS_PER_COMPONENT].get_object(),  # type: ignore
                )
            except (TypeError, KeyError):
                bits_per_component = DEFAULT_BITS_PER_COMPONENT
            # PNG predictor can vary by row and so is the lead byte on each row
            rowlength = (
                math.ceil(columns * colors * bits_per_component / 8) + 1
            )  # number of bytes
            # TIFF prediction:
            if predictor == 2:
                rowlength -= 1  # remove the predictor byte
                bpp = rowlength // columns
                str_data = bytearray(str_data)
                for i in range(len(str_data)):
                    if i % rowlength >= bpp:
                        str_data[i] = (str_data[i] + str_data[i - bpp]) % 256
                str_data = bytes(str_data)
            # PNG prediction:
            elif 10 <= predictor <= 15:
                str_data = FlateDecode._decode_png_prediction(
                    str_data, columns, rowlength
                )
            else:
                raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}")
        return str_data
    @staticmethod
    def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
        # PNG prediction can vary from row to row
        if (remainder := len(data) % rowlength) != 0:
            logger_warning("Image data is not rectangular. Adding padding.", __name__)
            data += b"\x00" * (rowlength - remainder)
            assert len(data) % rowlength == 0
        output = []
        prev_rowdata = (0,) * rowlength
        bpp = (rowlength - 1) // columns  # recomputed locally to not change params
        for row in range(0, len(data), rowlength):
            rowdata: list[int] = list(data[row : row + rowlength])
            filter_byte = rowdata[0]
            if filter_byte == 0:
                # PNG None Predictor
                pass
            elif filter_byte == 1:
                # PNG Sub Predictor
                for i in range(bpp + 1, rowlength):
                    rowdata[i] = (rowdata[i] + rowdata[i - bpp]) % 256
            elif filter_byte == 2:
                # PNG Up Predictor
                for i in range(1, rowlength):
                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
            elif filter_byte == 3:
                # PNG Average Predictor
                for i in range(1, bpp + 1):
                    floor = prev_rowdata[i] // 2
                    rowdata[i] = (rowdata[i] + floor) % 256
                for i in range(bpp + 1, rowlength):
                    left = rowdata[i - bpp]
                    floor = (left + prev_rowdata[i]) // 2
                    rowdata[i] = (rowdata[i] + floor) % 256
            elif filter_byte == 4:
                # PNG Paeth Predictor
                for i in range(1, bpp + 1):
                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
                for i in range(bpp + 1, rowlength):
                    left = rowdata[i - bpp]
                    up = prev_rowdata[i]
                    up_left = prev_rowdata[i - bpp]
                    p = left + up - up_left
                    dist_left = abs(p - left)
                    dist_up = abs(p - up)
                    dist_up_left = abs(p - up_left)
                    if dist_left <= dist_up and dist_left <= dist_up_left:
                        paeth = left
                    elif dist_up <= dist_up_left:
                        paeth = up
                    else:
                        paeth = up_left
                    rowdata[i] = (rowdata[i] + paeth) % 256
            else:
                raise PdfReadError(
                    f"Unsupported PNG filter {filter_byte!r}"
                )  # pragma: no cover
            prev_rowdata = tuple(rowdata)
            output.extend(rowdata[1:])
        return bytes(output)
    @staticmethod
    def encode(data: bytes, level: int = -1) -> bytes:
        """
        Compress the input data using zlib.
        Args:
            data: The data to be compressed.
            level: See https://docs.python.org/3/library/zlib.html#zlib.compress
        Returns:
            The compressed data.
        """
        return zlib.compress(data, level)
 class ASCIIHexDecode:
    """
    The ASCIIHexDecode filter decodes data that has been encoded in ASCII
    hexadecimal form into a base-7 ASCII format.
    """
    @staticmethod
    def decode(
        data: Union[str, bytes],
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode an ASCII-Hex encoded data stream.
        Args:
          data: a str sequence of hexadecimal-encoded values to be
            converted into a base-7 ASCII string
          decode_parms: this filter does not use parameters.
        Returns:
          A string conversion in base-7 ASCII, where each of its values
          v is such that 0 <= ord(v) <= 127.
        Raises:
          PdfStreamError:
        """
        if isinstance(data, str):
            data = data.encode()
        retval = b""
        hex_pair = b""
        index = 0
        while True:
            if index >= len(data):
                logger_warning(
                    "missing EOD in ASCIIHexDecode, check if output is OK", __name__
                )
                break  # Reached end of string without an EOD
            char = data[index : index + 1]
            if char == b">":
                break
            if char.isspace():
                index += 1
                continue
            hex_pair += char
            if len(hex_pair) == 2:
                retval += bytes((int(hex_pair, base=16),))
                hex_pair = b""
            index += 1
        # If the filter encounters the EOD marker after reading
        # an odd number of hexadecimal digits,
        # it shall behave as if a 0 (zero) followed the last digit.
        # For every even number of hexadecimal digits, hex_pair is reset to b"".
        if hex_pair != b"":
            hex_pair += b"0"
            retval += bytes((int(hex_pair, base=16),))
        return retval
 class RunLengthDecode:
    """
    The RunLengthDecode filter decodes data that has been encoded in a
    simple byte-oriented format based on run length.
    The encoded data is a sequence of runs, where each run consists of
    a length byte followed by 1 to 128 bytes of data. If the length byte is
    in the range 0 to 127,
    the following length + 1 (1 to 128) bytes are copied literally during
    decompression.
    If length is in the range 129 to 255, the following single byte is to be
    copied 257 − length (2 to 128) times during decompression. A length value
    of 128 denotes EOD.
    """
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode a run length encoded data stream.
        Args:
          data: a bytes sequence of length/data
          decode_parms: this filter does not use parameters.
        Returns:
          A bytes decompressed sequence.
        Raises:
          PdfStreamError:
        """
        lst = []
        index = 0
        while True:
            if index >= len(data):
                logger_warning(
                    "missing EOD in RunLengthDecode, check if output is OK", __name__
                )
                break  # Reached end of string without an EOD
            length = data[index]
            index += 1
            if length == 128:
                data_length = len(data)
                if index < data_length:
                    # We should first check, if we have an inner stream from a multi-encoded
                    # stream with a faulty trailing newline that we can decode properly.
                    # We will just ignore the last byte and raise a warning ...
                    if (index == data_length - 1) and (data[index : index + 1] == b"\n"):
                        logger_warning(
                            "Found trailing newline in stream data, check if output is OK", __name__
                        )
                        break
                    # Raising an exception here breaks all image extraction for this file, which might
                    # not be desirable. For this reason, indicate that the output is most likely wrong,
                    # as processing stopped after the first EOD marker. See issue #3517.
                    logger_warning(
                        "Early EOD in RunLengthDecode, check if output is OK", __name__
                    )
                break
            if length < 128:
                length += 1
                lst.append(data[index : (index + length)])
                index += length
            else:  # >128
                length = 257 - length
                lst.append(bytes((data[index],)) * length)
                index += 1
        return b"".join(lst)
 class LZWDecode:
    class Decoder:
        STOP = 257
        CLEARDICT = 256
        def __init__(self, data: bytes) -> None:
            self.data = data
        def decode(self) -> bytes:
            return _LzwCodec(max_output_length=LZW_MAX_OUTPUT_LENGTH).decode(self.data)
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode an LZW encoded data stream.
        Args:
          data: ``bytes`` or ``str`` text to decode.
          decode_parms: a dictionary of parameter values.
        Returns:
          decoded data.
        """
        # decode_parms is unused here
        return LZWDecode.Decoder(data).decode()
 class ASCII85Decode:
    """Decodes string ASCII85-encoded data into a byte format."""
    @staticmethod
    def decode(
        data: Union[str, bytes],
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode an Ascii85 encoded data stream.
        Args:
          data: ``bytes`` or ``str`` text to decode.
          decode_parms: this filter does not use parameters.
        Returns:
          decoded data.
        """
        if isinstance(data, str):
            data = data.encode()
        data = data.strip(WHITESPACES_AS_BYTES)
        if len(data) > 2 and data.endswith(b">"):
            data = data[:-1].rstrip(WHITESPACES_AS_BYTES) + data[-1:]
        try:
            return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
        except ValueError as error:
            if error.args[0] == "Ascii85 encoded byte sequences must end with b'~>'":
                logger_warning("Ignoring missing Ascii85 end marker.", __name__)
                return a85decode(data, adobe=False, ignorechars=WHITESPACES_AS_BYTES)
            raise
 class DCTDecode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decompresses data encoded using a DCT (discrete cosine transform)
        technique based on the JPEG standard (IS0/IEC 10918),
        reproducing image sample data that approximates the original data.
        Args:
          data: text to decode.
          decode_parms: this filter does not use parameters.
        Returns:
          decoded data.
        """
        return data
 class JPXDecode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decompresses data encoded using the wavelet-based JPEG 2000 standard,
        reproducing the original image data.
        Args:
          data: text to decode.
          decode_parms: this filter does not use parameters.
        Returns:
          decoded data.
        """
        return data
@dataclass
 class CCITTParameters:
    """§7.4.6, optional parameters for the CCITTFaxDecode filter."""
    K: int = 0
    columns: int = 1728
    rows: int = 0
    EndOfLine: Union[bool, None] = False
    EncodedByteAlign: Union[bool, None] = False
    EndOfBlock: Union[bool, None] = True
    BlackIs1: bool = False
    DamagedRowsBeforeError: Union[int, None] = 0
    @property
    def group(self) -> int:
        if self.K < 0:
            # Pure two-dimensional encoding (Group 4)
            CCITTgroup = 4
        else:
            # K == 0: Pure one-dimensional encoding (Group 3, 1-D)
            # K > 0: Mixed one- and two-dimensional encoding (Group 3, 2-D)
            CCITTgroup = 3
        return CCITTgroup
 def __create_old_class_instance(
    K: int = 0,
    columns: int = 0,
    rows: int = 0
 ) -> CCITTParameters:
    deprecation_with_replacement("CCITParameters", "CCITTParameters", "6.0.0")
    return CCITTParameters(K, columns, rows)
 # Create an alias for the old class name
 CCITParameters = __create_old_class_instance
 class CCITTFaxDecode:
    """
    §7.4.6, CCITTFaxDecode filter (ISO 32000).
    Either Group 3 or Group 4 CCITT facsimile (fax) encoding.
    CCITT encoding is bit-oriented, not byte-oriented.
    §7.4.6, optional parameters for the CCITTFaxDecode filter.
    """
    @staticmethod
    def _get_parameters(
        parameters: Union[None, ArrayObject, DictionaryObject, IndirectObject],
        rows: Union[int, IndirectObject],
    ) -> CCITTParameters:
        ccitt_parameters = CCITTParameters(rows=int(rows))
        if parameters:
            parameters_unwrapped = cast(
                Union[ArrayObject, DictionaryObject], parameters.get_object()
            )
            if isinstance(parameters_unwrapped, ArrayObject):
                for decode_parm in parameters_unwrapped:
                    if CCITT.K in decode_parm:
                        ccitt_parameters.K = decode_parm[CCITT.K].get_object()
                    if CCITT.COLUMNS in decode_parm:
                        ccitt_parameters.columns = decode_parm[CCITT.COLUMNS].get_object()
                    if CCITT.BLACK_IS_1 in decode_parm:
                        ccitt_parameters.BlackIs1 = decode_parm[CCITT.BLACK_IS_1].get_object().value
            else:
                if CCITT.K in parameters_unwrapped:
                    ccitt_parameters.K = parameters_unwrapped[CCITT.K].get_object()  # type: ignore
                if CCITT.COLUMNS in parameters_unwrapped:
                    ccitt_parameters.columns = parameters_unwrapped[CCITT.COLUMNS].get_object()  # type: ignore
                if CCITT.BLACK_IS_1 in parameters_unwrapped:
                    ccitt_parameters.BlackIs1 = parameters_unwrapped[CCITT.BLACK_IS_1].get_object().value  # type: ignore
        return ccitt_parameters
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        height: int = 0,
        **kwargs: Any,
    ) -> bytes:
        params = CCITTFaxDecode._get_parameters(decode_parms, height)
        img_size = len(data)
        tiff_header_struct = "<2shlh" + "hhll" * 8 + "h"
        tiff_header = struct.pack(
            tiff_header_struct,
            b"II",  # Byte order indication: Little endian
            42,     # Version number (always 42)
            8,      # Offset to the first image file directory (IFD)
            8,      # Number of tags in IFD
            256,    # ImageWidth, LONG, 1, width
            4,
            1,
            params.columns,
            257,    # ImageLength, LONG, 1, length
            4,
            1,
            params.rows,
            258,    # BitsPerSample, SHORT, 1, 1
            3,
            1,
            1,
            259,    # Compression, SHORT, 1, compression Type
            3,
            1,
            params.group,
            262,    # Thresholding, SHORT, 1, 0 = BlackIs1
            3,
            1,
            int(params.BlackIs1),
            273,    # StripOffsets, LONG, 1, length of header
            4,
            1,
              struct.calcsize(
                tiff_header_struct
            ),
            278,    # RowsPerStrip, LONG, 1, length
            4,
            1,
            params.rows,
            279,    # StripByteCounts, LONG, 1, size of image
            4,
            1,
            img_size,
            0,      # last IFD
        )
        return tiff_header + data
 JBIG2DEC_BINARY = shutil.which("jbig2dec")
 class JBIG2Decode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        if JBIG2DEC_BINARY is None:
            raise DependencyError("jbig2dec binary is not available.")
        with TemporaryDirectory() as tempdir:
            directory = Path(tempdir)
            paths: list[Path] = []
            if decode_parms and "/JBIG2Globals" in decode_parms:
                jbig2_globals = decode_parms["/JBIG2Globals"]
                if not is_null_or_none(jbig2_globals) and not is_null_or_none(pointer := jbig2_globals.get_object()):
                    assert pointer is not None, "mypy"
                    if isinstance(pointer, StreamObject):
                        path = directory.joinpath("globals.jbig2")
                        path.write_bytes(pointer.get_data())
                        paths.append(path)
            path = directory.joinpath("image.jbig2")
            path.write_bytes(data)
            paths.append(path)
            environment = os.environ.copy()
            environment["LC_ALL"] = "C"
            result = subprocess.run(  # noqa: S603
                [
                    JBIG2DEC_BINARY,
                    "--embedded",
                    "--format", "png",
                    "--output", "-",
                    "-M", str(JBIG2_MAX_OUTPUT_LENGTH),
                    *paths
                ],
                capture_output=True,
                env=environment,
            )
            if b"unrecognized option '--embedded'" in result.stderr or b"unrecognized option '-M'" in result.stderr:
                raise DependencyError("jbig2dec>=0.19 is required.")
            if b"FATAL ERROR failed to allocate image data buffer" in result.stderr:
                raise LimitReachedError(
                    f"Memory limit reached while reading JBIG2 data:\n{result.stderr.decode('utf-8')}"
                )
            if result.stderr:
                for line in result.stderr.decode("utf-8").splitlines():
                    logger_warning(line, __name__)
            if result.returncode != 0:
                raise PdfStreamError(f"Unable to decode JBIG2 data. Exit code: {result.returncode}")
        return result.stdout
    @staticmethod
    def _is_binary_compatible() -> bool:
        if not JBIG2DEC_BINARY:  # pragma: no cover
            return False
        result = subprocess.run(  # noqa: S603
            [JBIG2DEC_BINARY, "--version"],
            capture_output=True,
            text=True,
        )
        version = result.stdout.split(" ", maxsplit=1)[1]
        from ._utils import Version  # noqa: PLC0415
        return Version(version) >= Version("0.19")
 def decode_stream_data(stream: Any) -> bytes:
    """
    Decode the stream data based on the specified filters.
    This function decodes the stream data using the filters provided in the
    stream.
    Args:
        stream: The input stream object containing the data and filters.
    Returns:
        The decoded stream data.
    Raises:
        NotImplementedError: If an unsupported filter type is encountered.
    """
    filters = stream.get(SA.FILTER, ())
    if isinstance(filters, IndirectObject):
        filters = cast(ArrayObject, filters.get_object())
    if not isinstance(filters, ArrayObject):
        # We have a single filter instance
        filters = (filters,)
    decode_parms = stream.get(SA.DECODE_PARMS, ({},) * len(filters))
    if not isinstance(decode_parms, (list, tuple)):
        decode_parms = (decode_parms,)
    data: bytes = stream._data
    # If there is no data to decode, we should not try to decode it.
    if not data:
        return data
    for filter_name, params in zip(filters, decode_parms):
        if isinstance(params, NullObject):
            params = {}
        if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
            data = ASCIIHexDecode.decode(data)
        elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
            data = ASCII85Decode.decode(data)
        elif filter_name in (FT.LZW_DECODE, FTA.LZW):
            data = LZWDecode.decode(data, params)
        elif filter_name in (FT.FLATE_DECODE, FTA.FL):
            data = FlateDecode.decode(data, params)
        elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
            data = RunLengthDecode.decode(data)
        elif filter_name == FT.CCITT_FAX_DECODE:
            height = stream.get(IA.HEIGHT, ())
            data = CCITTFaxDecode.decode(data, params, height)
        elif filter_name == FT.DCT_DECODE:
            data = DCTDecode.decode(data)
        elif filter_name == FT.JPX_DECODE:
            data = JPXDecode.decode(data)
        elif filter_name == FT.JBIG2_DECODE:
            data = JBIG2Decode.decode(data, params)
        elif filter_name == "/Crypt":
            if "/Name" in params or "/Type" in params:
                raise NotImplementedError(
                    "/Crypt filter with /Name or /Type not supported yet"
                )
        else:
            raise NotImplementedError(f"Unsupported filter {filter_name}")
    return data
--- a/venv/lib/python3.12/site-packages/pypdf/generic/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/init.py
@@ -0,0 +1,115 @@
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 """Implementation of generic PDF objects (dictionary, number, string, ...)."""
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 from ..constants import OutlineFontFlag
 from ._base import (
    BooleanObject,
    ByteStringObject,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    TextStringObject,
    encode_pdfdocencoding,
    is_null_or_none,
 )
 from ._data_structures import (
    ArrayObject,
    ContentStream,
    DecodedStreamObject,
    Destination,
    DictionaryObject,
    EncodedStreamObject,
    Field,
    StreamObject,
    TreeObject,
    read_object,
 )
 from ._files import EmbeddedFile
 from ._fit import Fit
 from ._link import DirectReferenceLink, NamedReferenceLink, ReferenceLink, extract_links
 from ._outline import OutlineItem
 from ._rectangle import RectangleObject
 from ._utils import (
    create_string_object,
    decode_pdfdocencoding,
    hex_to_rgb,
    read_hex_string_from_stream,
    read_string_from_stream,
 )
 from ._viewerpref import ViewerPreferences
 PAGE_FIT = Fit.fit()
 __all__ = [
    "PAGE_FIT",
    "ArrayObject",
    "BooleanObject",
    "ByteStringObject",
    "ContentStream",
    "DecodedStreamObject",
    "Destination",
    "DictionaryObject",
    "DirectReferenceLink",
    "EmbeddedFile",
    "EncodedStreamObject",
    "Field",
    "Fit",
    "FloatObject",
    "IndirectObject",
    "NameObject",
    "NamedReferenceLink",
    "NullObject",
    "NumberObject",
    "OutlineFontFlag",
    "OutlineItem",
    "PdfObject",
    "RectangleObject",
    "ReferenceLink",
    "StreamObject",
    "TextStringObject",
    "TreeObject",
    "ViewerPreferences",
    # Utility functions
    "create_string_object",
    "decode_pdfdocencoding",
    "encode_pdfdocencoding",
    "extract_links",
    "hex_to_rgb",
    "is_null_or_none",
    "read_hex_string_from_stream",
    # Data structures core functions
    "read_object",
    "read_string_from_stream",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_appearance_stream.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_appearance_stream.py
@@ -0,0 +1,547 @@
 import re
 from dataclasses import dataclass
 from enum import IntEnum
 from typing import Any, Optional, Union, cast
 from .._codecs import fill_from_encoding
 from .._codecs.core_fontmetrics import CORE_FONT_METRICS
 from .._font import Font
 from .._utils import logger_warning
 from ..constants import AnnotationDictionaryAttributes, BorderStyles, FieldDictionaryAttributes
 from ..generic import (
    DecodedStreamObject,
    DictionaryObject,
    NameObject,
    NumberObject,
    RectangleObject,
 )
 from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
 DEFAULT_FONT_SIZE_IN_MULTILINE = 12
@dataclass
 class BaseStreamConfig:
    """A container representing the basic layout of an appearance stream."""
    rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0)
    border_width: int = 1  # The width of the border in points
    border_style: str = BorderStyles.SOLID
 class BaseStreamAppearance(DecodedStreamObject):
    """A class representing the very base of an appearance stream, that is, a rectangle and a border."""
    def __init__(self, layout: Optional[BaseStreamConfig] = None) -> None:
        """
        Takes the appearance stream layout as an argument.
        Args:
            layout: The basic layout parameters.
        """
        super().__init__()
        self._layout = layout or BaseStreamConfig()
        self[NameObject("/Type")] = NameObject("/XObject")
        self[NameObject("/Subtype")] = NameObject("/Form")
        self[NameObject("/BBox")] = RectangleObject(self._layout.rectangle)
 class TextAlignment(IntEnum):
    """Defines the alignment options for text within a form field's appearance stream."""
    LEFT = 0
    CENTER = 1
    RIGHT = 2
 class TextStreamAppearance(BaseStreamAppearance):
    """
    A class representing the appearance stream for a text-based form field.
    This class generates the content stream (the `ap_stream_data`) that dictates
    how text is rendered within a form field's bounding box. It handles properties
    like font, font size, color, multiline text, and text selection highlighting.
    """
    def _scale_text(
        self,
        font: Font,
        font_size: float,
        leading_factor: float,
        field_width: float,
        field_height: float,
        text: str,
        min_font_size: float,
        font_size_step: float = 0.2
    ) -> tuple[list[tuple[float, str]], float]:
        """
        Takes a piece of text and scales it to field_width or field_height, given font_name
        and font_size. Wraps text where necessary.
        Args:
            font: The font to be used.
            font_size: The font size in points.
            leading_factor: The line distance.
            field_width: The width of the field in which to fit the text.
            field_height: The height of the field in which to fit the text.
            text: The text to fit with the field.
            min_font_size: The minimum font size at which to scale the text.
            font_size_step: The amount by which to decrement font size per step while scaling.
        Returns:
            The text in the form of list of tuples, each tuple containing the length of a line
            and its contents, and the font_size for these lines and lengths.
        """
        orig_text = text
        paragraphs = text.replace("\n", "\r").split("\r")
        wrapped_lines = []
        current_line_words: list[str] = []
        current_line_width: float = 0
        space_width = font.space_width * font_size / 1000
        for paragraph in paragraphs:
            if not paragraph.strip():
                wrapped_lines.append((0.0, ""))
                continue
            words = paragraph.split(" ")
            for i, word in enumerate(words):
                word_width = font.text_width(word) * font_size / 1000
                test_width = current_line_width + word_width + (space_width if i else 0)
                if test_width > field_width and current_line_words:
                    wrapped_lines.append((current_line_width, " ".join(current_line_words)))
                    current_line_words = [word]
                    current_line_width = word_width
                elif not current_line_words and word_width > field_width:
                    wrapped_lines.append((word_width, word))
                    current_line_words = []
                    current_line_width = 0
                else:
                    if current_line_words:
                        current_line_width += space_width
                    current_line_words.append(word)
                    current_line_width += word_width
            if current_line_words:
                wrapped_lines.append((current_line_width, " ".join(current_line_words)))
                current_line_words = []
                current_line_width = 0
        # Estimate total height.
        estimated_total_height = font_size + (len(wrapped_lines) - 1) * leading_factor * font_size
        if estimated_total_height > field_height:
            # Text overflows height; Retry with smaller font size.
            new_font_size = font_size - font_size_step
            if new_font_size >= min_font_size:
                return self._scale_text(
                    font,
                    new_font_size,
                    leading_factor,
                    field_width,
                    field_height,
                    orig_text,
                    min_font_size,
                    font_size_step
                )
        return wrapped_lines, round(font_size, 1)
    def _generate_appearance_stream_data(
        self,
        text: str,
        selection: Union[list[str], None],
        font: Font,
        font_glyph_byte_map: Optional[dict[str, bytes]] = None,
        font_name: str = "/Helv",
        font_size: float = 0.0,
        font_color: str = "0 g",
        is_multiline: bool = False,
        alignment: TextAlignment = TextAlignment.LEFT,
        is_comb: bool = False,
        max_length: Optional[int] = None
    ) -> bytes:
        """
        Generates the raw bytes of the PDF appearance stream for a text field.
        This private method assembles the PDF content stream operators to draw
        the provided text within the specified rectangle. It handles text positioning,
        font application, color, and special formatting like selected text.
        Args:
            text: The text to be rendered in the form field.
            selection: An optional list of strings that should be highlighted as selected.
            font: The font to use.
            font_glyph_byte_map: An optional dictionary mapping characters to their
                byte representation for glyph encoding.
            font_name: The name of the font resource to use (e.g., "/Helv").
            font_size: The font size. If 0, it is automatically calculated
                based on whether the field is multiline or not.
            font_color: The color to apply to the font, represented as a PDF
                graphics state string (e.g., "0 g" for black).
            is_multiline: A boolean indicating if the text field is multiline.
            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
            is_comb: Boolean that designates fixed-length fields, where every character
                fills one "cell", such as in a postcode.
            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
                length field.
        Returns:
            A byte string containing the PDF content stream data.
        """
        rectangle = self._layout.rectangle
        font_glyph_byte_map = font_glyph_byte_map or {}
        if isinstance(rectangle, tuple):
            rectangle = RectangleObject(rectangle)
        leading_factor = (font.font_descriptor.bbox[3] - font.font_descriptor.bbox[1]) / 1000.0
        # Set margins based on border width and style, but never less than 1 point
        factor = 2 if self._layout.border_style in {"/B", "/I"} else 1
        margin = max(self._layout.border_width * factor, 1)
        field_height = rectangle.height - 2 * margin
        field_width = rectangle.width - 4 * margin
        # If font_size is 0, apply the logic for multiline or large-as-possible font
        if font_size == 0:
            min_font_size = 4.0       # The mininum font size
            if selection:             # Don't wrap text when dealing with a /Ch field, in order to prevent problems
                is_multiline = False  # with matching "selection" with "line" later on.
            if is_multiline:
                font_size = DEFAULT_FONT_SIZE_IN_MULTILINE
                lines, font_size = self._scale_text(
                    font,
                    font_size,
                    leading_factor,
                    field_width,
                    field_height,
                    text,
                    min_font_size
                )
            else:
                max_vertical_size = field_height / leading_factor
                text_width_unscaled = font.text_width(text) / 1000
                max_horizontal_size = field_width / (text_width_unscaled or 1)
                font_size = round(max(min(max_vertical_size, max_horizontal_size), min_font_size), 1)
                lines = [(text_width_unscaled * font_size, text)]
        elif is_comb:
            if max_length and len(text) > max_length:
                logger_warning (
                    f"Length of text {text} exceeds maximum length ({max_length}) of field, input truncated.",
                    __name__
                )
            # We act as if each character is one line, because we draw it separately later on
            lines = [(
                font.text_width(char) * font_size / 1000,
                char
            ) for index, char in enumerate(text) if index < (max_length or len(text))]
        else:
            lines = [(
                font.text_width(line) * font_size / 1000,
                line
            ) for line in text.replace("\n", "\r").split("\r")]
        # Set the vertical offset
        if is_multiline:
            y_offset = rectangle.height + margin - font.font_descriptor.bbox[3] * font_size / 1000.0
        else:
            y_offset = margin + ((field_height - font.font_descriptor.ascent * font_size / 1000) / 2)
        default_appearance = f"{font_name} {font_size} Tf {font_color}"
        ap_stream = (
            f"q\n/Tx BMC \nq\n{2 * margin} {margin} {field_width} {field_height} "
            f"re\nW\nBT\n{default_appearance}\n"
        ).encode()
        current_x_pos: float = 0  # Initial virtual position within the text object.
        for line_number, (line_width, line) in enumerate(lines):
            if selection and line in selection:
                # Might be improved, but cannot find how to get fill working => replaced with lined box
                ap_stream += (
                    f"1 {y_offset - (line_number * font_size * leading_factor) - 1} "
                    f"{rectangle.width - 2} {font_size + 2} re\n"
                    f"0.5 0.5 0.5 rg s\n{default_appearance}\n"
                ).encode()
            # Calculate the desired absolute starting X for the current line
            desired_abs_x_start: float = 0
            if is_comb and max_length:
                # Calculate the width of a cell for one character
                cell_width = rectangle.width / max_length
                # Space from the left edge of the cell to the character's baseline start
                # line_width here is the *actual* character width in points for the single character 'line'
                centering_offset_in_cell = (cell_width - line_width) / 2
                # Absolute start X = (Cell Index, i.e., line_number * Cell Width) + Centering Offset
                desired_abs_x_start = (line_number * cell_width) + centering_offset_in_cell
            elif alignment == TextAlignment.RIGHT:
                desired_abs_x_start = rectangle.width - margin * 2 - line_width
            elif alignment == TextAlignment.CENTER:
                desired_abs_x_start = (rectangle.width - line_width) / 2
            else:  # Left aligned; default
                desired_abs_x_start = margin * 2
            # Calculate x_rel_offset: how much to move from the current_x_pos
            # to reach the desired_abs_x_start.
            x_rel_offset = desired_abs_x_start - current_x_pos
            # Y-offset:
            y_rel_offset: float = 0
            if line_number == 0:
                y_rel_offset = y_offset  # Initial vertical position
            elif is_comb:
                y_rel_offset = 0.0  # DO NOT move vertically for subsequent characters
            else:
                y_rel_offset = - font_size * leading_factor  # Move down by line height
            # Td is a relative translation (Tx and Ty).
            # It updates the current text position.
            ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode()
            # Update current_x_pos based on the Td operation for the next iteration.
            # This is the X position where the *current line* will start.
            current_x_pos = desired_abs_x_start
            encoded_line: list[bytes] = [
                font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line
            ]
            if any(len(c) >= 2 for c in encoded_line):
                ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"
            else:
                ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"
        ap_stream += b"ET\nQ\nEMC\nQ\n"
        return ap_stream
    def __init__(
        self,
        layout: Optional[BaseStreamConfig] = None,
        text: str = "",
        selection: Optional[list[str]] = None,
        font_resource: Optional[DictionaryObject] = None,
        font_name: str = "/Helv",
        font_size: float = 0.0,
        font_color: str = "0 g",
        is_multiline: bool = False,
        alignment: TextAlignment = TextAlignment.LEFT,
        is_comb: bool = False,
        max_length: Optional[int] = None
    ) -> None:
        """
        Initializes a TextStreamAppearance object.
        This constructor creates a new PDF stream object configured as an XObject
        of subtype Form. It uses the `_appearance_stream_data` method to generate
        the content for the stream.
        Args:
            layout: The basic layout parameters.
            text: The text to be rendered in the form field.
            selection: An optional list of strings that should be highlighted as selected.
            font_resource: An optional variable that represents a PDF font dictionary.
            font_name: The name of the font resource, e.g., "/Helv".
            font_size: The font size. If 0, it's auto-calculated.
            font_color: The font color string.
            is_multiline: A boolean indicating if the text field is multiline.
            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
            is_comb: Boolean that designates fixed-length fields, where every character
                fills one "cell", such as in a postcode.
            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
                length field.
        """
        super().__init__(layout)
        # If a font resource was added, get the font character map
        if font_resource:
            font_resource = cast(DictionaryObject, font_resource.get_object())
            font = Font.from_font_resource(font_resource)
        else:
            logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__)
            font_name = "/Helv"
            font_resource = DictionaryObject({
                NameObject("/Subtype"): NameObject("/Type1"),
                NameObject("/Name"): NameObject("/Helv"),
                NameObject("/Type"): NameObject("/Font"),
                NameObject("/BaseFont"): NameObject("/Helvetica"),
                NameObject("/Encoding"): NameObject("/WinAnsiEncoding")
            })
            font_descriptor = CORE_FONT_METRICS["Helvetica"]
            font_descriptor.character_widths["default"] = 2 * font_descriptor.character_widths[" "]
            font = Font(
                name="Helvetica",
                character_map={},
                encoding=dict(zip(range(256), fill_from_encoding("cp1252"))),  # WinAnsiEncoding
                sub_type="Type1",
                font_descriptor = font_descriptor,
                character_widths = font_descriptor.character_widths
            )
        font_glyph_byte_map: dict[str, bytes]
        if isinstance(font.encoding, str):
            font_glyph_byte_map = {
                v: k.encode(font.encoding) for k, v in font.character_map.items()
            }
        else:
            font_glyph_byte_map = {v: bytes((k,)) for k, v in font.encoding.items()}
            font_encoding_rev = {v: bytes((k,)) for k, v in font.encoding.items()}
            for key, value in font.character_map.items():
                font_glyph_byte_map[value] = font_encoding_rev.get(key, key)
        ap_stream_data = self._generate_appearance_stream_data(
            text,
            selection,
            font,
            font_glyph_byte_map,
            font_name=font_name,
            font_size=font_size,
            font_color=font_color,
            is_multiline=is_multiline,
            alignment=alignment,
            is_comb=is_comb,
            max_length=max_length
        )
        self.set_data(ByteStringObject(ap_stream_data))
        self[NameObject("/Length")] = NumberObject(len(ap_stream_data))
        # Update Resources with font information
        self[NameObject("/Resources")] = DictionaryObject({
            NameObject("/Font"): DictionaryObject({
                NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)
            })
        })
    @classmethod
    def from_text_annotation(
        cls,
        acro_form: DictionaryObject,  # _root_object[CatalogDictionary.ACRO_FORM])
        field: DictionaryObject,
        annotation: DictionaryObject,
        user_font_name: str = "",
        user_font_size: float = -1,
    ) -> "TextStreamAppearance":
        """
        Creates a TextStreamAppearance object from a text field annotation.
        This class method is a factory for creating a `TextStreamAppearance`
        instance by extracting all necessary information (bounding box, font,
        text content, etc.) from the PDF field and annotation dictionaries.
        It respects inheritance for properties like default appearance (`/DA`).
        Args:
            acro_form: The root AcroForm dictionary from the PDF catalog.
            field: The field dictionary object.
            annotation: The widget annotation dictionary object associated with the field.
            user_font_name: An optional user-provided font name to override the
                default. Defaults to an empty string.
            user_font_size: An optional user-provided font size to override the
                default. A value of -1 indicates no override.
        Returns:
            A new `TextStreamAppearance` instance configured for the given field.
        """
        # Calculate rectangle dimensions
        _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])
        rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))
        # Get default appearance dictionary from annotation
        default_appearance = annotation.get_inherited(
            AnnotationDictionaryAttributes.DA,
            acro_form.get(AnnotationDictionaryAttributes.DA, None),
        )
        if not default_appearance:
            # Create a default appearance if none was found in the annotation
            default_appearance = TextStringObject("/Helv 0 Tf 0 g")
        else:
            default_appearance = default_appearance.get_object()
        # Derive font name, size and color from the default appearance. Also set
        # user-provided font name and font size in the default appearance, if given.
        # For a font name, this presumes that we can find an associated font resource
        # dictionary. Uses the variable font_properties as an intermediate.
        # As per the PDF spec:
        # "At a minimum, the string [that is, default_appearance] shall include a Tf (text
        # font) operator along with its two operands, font and size" (Section 12.7.4.3
        # "Variable text" of the PDF 2.0 specification).
        font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]
        font_name = font_properties.pop(font_properties.index("Tf") - 2)
        font_size = float(font_properties.pop(font_properties.index("Tf") - 1))
        font_properties.remove("Tf")
        font_color = " ".join(font_properties)
        # Determine the font name to use, prioritizing the user's input
        if user_font_name:
            font_name = user_font_name
        # Determine the font size to use, prioritizing the user's input
        if user_font_size > 0:
            font_size = user_font_size
        # Try to find a resource dictionary for the font
        document_resources: Any = cast(
            DictionaryObject,
            cast(
                DictionaryObject,
                annotation.get_inherited(
                    "/DR",
                    acro_form.get("/DR", DictionaryObject()),
                ),
            ).get_object(),
        )
        document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object()
        # CORE_FONT_METRICS is the dict with Standard font metrics
        if font_name not in document_font_resources and font_name.removeprefix("/") not in CORE_FONT_METRICS:
            # ...or AcroForm dictionary
            document_resources = cast(
                dict[Any, Any],
                acro_form.get("/DR", {}),
            )
            document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object()
        font_resource = document_font_resources.get(font_name, None)
        if not is_null_or_none(font_resource):
            font_resource = cast(DictionaryObject, font_resource.get_object())
        # Retrieve field text and selected values
        field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
        if (
                field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
                field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
        ):
            text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))
            selection = field.get("/V", [])
            if not isinstance(selection, list):
                selection = [selection]
        else:  # /Tx
            text = field.get("/V", "")
            selection = []
        # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
        text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
        # Retrieve formatting information
        is_comb = False
        max_length = None
        if field_flags & FieldDictionaryAttributes.FfBits.Comb:
            is_comb = True
            max_length = annotation.get("/MaxLen")
        is_multiline = False
        if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
            is_multiline = True
        alignment = field.get("/Q", TextAlignment.LEFT)
        border_width = 1
        border_style = BorderStyles.SOLID
        if "/BS" in field:
            border_width = cast(DictionaryObject, field["/BS"]).get("/W", border_width)
            border_style = cast(DictionaryObject, field["/BS"]).get("/S", border_style)
        # Create the TextStreamAppearance instance
        layout = BaseStreamConfig(rectangle=rectangle, border_width=border_width, border_style=border_style)
        new_appearance_stream = cls(
            layout,
            text,
            selection,
            font_resource,
            font_name=font_name,
            font_size=font_size,
            font_color=font_color,
            is_multiline=is_multiline,
            alignment=alignment,
            is_comb=is_comb,
            max_length=max_length
        )
        if AnnotationDictionaryAttributes.AP in annotation:
            for key, value in (
                cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()
            ):
                if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
                    new_appearance_stream[key] = value
        return new_appearance_stream
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_base.py
@@ -0,0 +1,937 @@
 # Copyright (c) 2006, Mathieu Fenniak
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 import binascii
 import codecs
 import hashlib
 import re
 import sys
 from binascii import unhexlify
 from collections.abc import Sequence
 from math import log10
 from struct import iter_unpack
 from typing import Any, Callable, ClassVar, Optional, Union, cast
 if sys.version_info[:2] >= (3, 10):
    from typing import TypeGuard
 else:
    from typing_extensions import TypeGuard  # PEP 647
 from .._codecs import _pdfdoc_encoding_rev
 from .._protocols import PdfObjectProtocol, PdfWriterProtocol
 from .._utils import (
    StreamType,
    classproperty,
    deprecation_no_replacement,
    deprecation_with_replacement,
    logger_warning,
    read_non_whitespace,
    read_until_regex,
 )
 from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
 __author__ = "Mathieu Fenniak"
 __author_email__ = "biziqe@mathieu.fenniak.net"
 class PdfObject(PdfObjectProtocol):
    # function for calculating a hash value
    hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
    indirect_reference: Optional["IndirectObject"]
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement .hash_bin() so far"
        )
    def hash_value_data(self) -> bytes:
        return f"{self}".encode()
    def hash_value(self) -> bytes:
        return (
            f"{self.__class__.__name__}:"
            f"{self.hash_func(self.hash_value_data()).hexdigest()}"
        ).encode()
    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "PdfObject":
        """
        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
        without ensuring links. This is used in clone_document_from_root with incremental = True.
        Args:
          pdf_dest: Target to clone to.
        Returns:
          The cloned PdfObject
        """
        return self.clone(pdf_dest)
    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "PdfObject":
        """
        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).
        By default, this method will call ``_reference_clone`` (see ``_reference``).
        Args:
          pdf_dest: Target to clone to.
          force_duplicate: By default, if the object has already been cloned and referenced,
            the copy will be returned; when ``True``, a new copy will be created.
            (Default value = ``False``)
          ignore_fields: List/tuple of field names (for dictionaries) that will be ignored
            during cloning (applies to children duplication as well). If fields are to be
            considered for a limited number of levels, you have to add it as integer, for
            example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first
            level only but ``"/TOTO"`` on all levels.
        Returns:
          The cloned PdfObject
        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement .clone so far"
        )
    def _reference_clone(
        self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False
    ) -> PdfObjectProtocol:
        """
        Reference the object within the _objects of pdf_dest only if
        indirect_reference attribute exists (which means the objects was
        already identified in xref/xobjstm) if object has been already
        referenced do nothing.
        Args:
          clone:
          pdf_dest:
        Returns:
          The clone
        """
        try:
            if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:
                return clone
        except Exception:
            pass
        # if hasattr(clone, "indirect_reference"):
        try:
            ind = self.indirect_reference
        except AttributeError:
            return clone
        if (
            pdf_dest.incremental
            and ind is not None
            and ind.pdf == pdf_dest._reader
            and ind.idnum <= len(pdf_dest._objects)
        ):
            i = ind.idnum
        else:
            i = len(pdf_dest._objects) + 1
        if ind is not None:
            if id(ind.pdf) not in pdf_dest._id_translated:
                pdf_dest._id_translated[id(ind.pdf)] = {}
                pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf  # type: ignore[index]
            if (
                not force_duplicate
                and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]
            ):
                obj = pdf_dest.get_object(
                    pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
                )
                assert obj is not None
                return obj
            pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
        try:
            pdf_dest._objects[i - 1] = clone
        except IndexError:
            pdf_dest._objects.append(clone)
            i = len(pdf_dest._objects)
        clone.indirect_reference = IndirectObject(i, 0, pdf_dest)
        return clone
    def get_object(self) -> Optional["PdfObject"]:
        """Resolve indirect references."""
        return self
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        raise NotImplementedError
 class NullObject(PdfObject):
    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "NullObject":
        """Clone object into pdf_dest."""
        return cast(
            "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__,))
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"null")
    @staticmethod
    def read_from_stream(stream: StreamType) -> "NullObject":
        nulltxt = stream.read(4)
        if nulltxt != b"null":
            raise PdfReadError("Could not read Null object")
        return NullObject()
    def __repr__(self) -> str:
        return "NullObject"
    def __eq__(self, other: object) -> bool:
        return isinstance(other, NullObject)
    def __hash__(self) -> int:
        return self.hash_bin()
 class BooleanObject(PdfObject):
    def __init__(self, value: Any) -> None:
        self.value = value
    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "BooleanObject":
        """Clone object into pdf_dest."""
        return cast(
            "BooleanObject",
            self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, self.value))
    def __eq__(self, o: object, /) -> bool:
        if isinstance(o, BooleanObject):
            return self.value == o.value
        if isinstance(o, bool):
            return self.value == o
        return False
    def __hash__(self) -> int:
        return self.hash_bin()
    def __repr__(self) -> str:
        return "True" if self.value else "False"
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        if self.value:
            stream.write(b"true")
        else:
            stream.write(b"false")
    @staticmethod
    def read_from_stream(stream: StreamType) -> "BooleanObject":
        word = stream.read(4)
        if word == b"true":
            return BooleanObject(True)
        if word == b"fals":
            stream.read(1)
            return BooleanObject(False)
        raise PdfReadError("Could not read Boolean object")
 class IndirectObject(PdfObject):
    def __init__(self, idnum: int, generation: int, pdf: Any) -> None:  # PdfReader
        self.idnum = idnum
        self.generation = generation
        self.pdf = pdf
    def __hash__(self) -> int:
        return hash((self.idnum, self.generation, id(self.pdf)))
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))
    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "PdfObject":
        return IndirectObject(self.idnum, self.generation, pdf_dest)
    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "IndirectObject":
        """Clone object into pdf_dest."""
        if self.pdf == pdf_dest and not force_duplicate:
            # Already duplicated and no extra duplication required
            return self
        if id(self.pdf) not in pdf_dest._id_translated:
            pdf_dest._id_translated[id(self.pdf)] = {}
            pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf  # type: ignore[index]
        if self.idnum in pdf_dest._id_translated[id(self.pdf)]:
            dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
            if force_duplicate:
                assert dup is not None
                assert dup.indirect_reference is not None
                idref = dup.indirect_reference
                return IndirectObject(idref.idnum, idref.generation, idref.pdf)
        else:
            obj = self.get_object()
            # case observed : a pointed object can not be found
            if obj is None:
                # this normally
                obj = NullObject()
                assert isinstance(self, (IndirectObject,))
                obj.indirect_reference = self
            dup = pdf_dest._add_object(
                obj.clone(pdf_dest, force_duplicate, ignore_fields)
            )
        assert dup is not None, "mypy"
        assert dup.indirect_reference is not None, "mypy"
        return dup.indirect_reference
    @property
    def indirect_reference(self) -> "IndirectObject":  # type: ignore[override]
        return self
    def get_object(self) -> Optional["PdfObject"]:
        return self.pdf.get_object(self)
    def __deepcopy__(self, memo: Any) -> "IndirectObject":
        return IndirectObject(self.idnum, self.generation, self.pdf)
    def _get_object_with_check(self) -> Optional["PdfObject"]:
        o = self.get_object()
        # the check is done here to not slow down get_object()
        if isinstance(o, IndirectObject):
            raise PdfStreamError(
                f"{self.__repr__()} references an IndirectObject {o.__repr__()}"
            )
        return o
    def __getattr__(self, name: str) -> Any:
        # Attribute not found in object: look in pointed object
        try:
            return getattr(self._get_object_with_check(), name)
        except AttributeError:
            raise AttributeError(
                f"No attribute {name} found in IndirectObject or pointed object"
            )
    def __getitem__(self, key: Any) -> Any:
        # items should be extracted from pointed Object
        return self._get_object_with_check()[key]  # type: ignore
    def __contains__(self, key: Any) -> bool:
        return key in self._get_object_with_check()  # type: ignore
    def __iter__(self) -> Any:
        return self._get_object_with_check().__iter__()  # type: ignore
    def __float__(self) -> str:
        # in this case we are looking for the pointed data
        return self.get_object().__float__()  # type: ignore
    def __int__(self) -> int:
        # in this case we are looking for the pointed data
        return self.get_object().__int__()  # type: ignore
    def __str__(self) -> str:
        # in this case we are looking for the pointed data
        return self.get_object().__str__()
    def __repr__(self) -> str:
        return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
    def __eq__(self, other: object) -> bool:
        return (
            other is not None
            and isinstance(other, IndirectObject)
            and self.idnum == other.idnum
            and self.generation == other.generation
            and self.pdf is other.pdf
        )
    def __ne__(self, other: object) -> bool:
        return not self.__eq__(other)
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(f"{self.idnum} {self.generation} R".encode())
    @staticmethod
    def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject":  # PdfReader
        idnum = b""
        while True:
            tok = stream.read(1)
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
            if tok.isspace():
                break
            idnum += tok
        generation = b""
        while True:
            tok = stream.read(1)
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
            if tok.isspace():
                if not generation:
                    continue
                break
            generation += tok
        r = read_non_whitespace(stream)
        if r != b"R":
            raise PdfReadError(
                f"Error reading indirect object reference at byte {hex(stream.tell())}"
            )
        return IndirectObject(int(idnum), int(generation), pdf)
 FLOAT_WRITE_PRECISION = 8  # shall be min 5 digits max, allow user adj
 class FloatObject(float, PdfObject):
    def __new__(
        cls, value: Any = "0.0", context: Optional[Any] = None
    ) -> "FloatObject":
        try:
            value = float(value)
            return float.__new__(cls, value)
        except Exception as e:
            # If this isn't a valid decimal (happens in malformed PDFs)
            # fallback to 0
            logger_warning(
                f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__
            )
            return float.__new__(cls, 0.0)
    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "FloatObject":
        """Clone object into pdf_dest."""
        return cast(
            "FloatObject",
            self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, self.as_numeric))
    def myrepr(self) -> str:
        if self == 0:
            return "0.0"
        nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))
        return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")
    def __repr__(self) -> str:
        return self.myrepr()  # repr(float(self))
    def as_numeric(self) -> float:
        return float(self)
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(self.myrepr().encode("utf8"))
 class NumberObject(int, PdfObject):
    NumberPattern = re.compile(b"[^+-.0-9]")
    def __new__(cls, value: Any) -> "NumberObject":
        try:
            return int.__new__(cls, int(value))
        except ValueError:
            logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)
            return int.__new__(cls, 0)
    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "NumberObject":
        """Clone object into pdf_dest."""
        return cast(
            "NumberObject",
            self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, self.as_numeric()))
    def as_numeric(self) -> int:
        return int(repr(self).encode("utf8"))
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(repr(self).encode("utf8"))
    @staticmethod
    def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
        num = read_until_regex(stream, NumberObject.NumberPattern)
        if b"." in num:
            return FloatObject(num)
        return NumberObject(num)
 class ByteStringObject(bytes, PdfObject):
    """
    Represents a string object where the text encoding could not be determined.
    This occurs quite often, as the PDF spec doesn't provide an alternate way to
    represent strings -- for example, the encryption data stored in files (like
    /O) is clearly not text, but is still stored in a "String" object.
    """
    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "ByteStringObject":
        """Clone object into pdf_dest."""
        return cast(
            "ByteStringObject",
            self._reference_clone(
                ByteStringObject(bytes(self)), pdf_dest, force_duplicate
            ),
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, bytes(self)))
    @property
    def original_bytes(self) -> bytes:
        """For compatibility with TextStringObject.original_bytes."""
        return self
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"<")
        stream.write(binascii.hexlify(self))
        stream.write(b">")
    def __str__(self) -> str:
        charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
        for enc in charset_to_try:
            try:
                return self.decode(enc)
            except UnicodeDecodeError:
                pass
        raise PdfReadError("Cannot decode ByteStringObject.")
 class TextStringObject(str, PdfObject):  # noqa: SLOT000
    """
    A string object that has been decoded into a real unicode string.
    If read from a PDF document, this string appeared to match the
    PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding
    to occur.
    """
    autodetect_pdfdocencoding: bool
    autodetect_utf16: bool
    utf16_bom: bytes
    _original_bytes: Optional[bytes] = None
    def __new__(cls, value: Any) -> "TextStringObject":
        original_bytes = None
        if isinstance(value, bytes):
            original_bytes = value
            value = value.decode("charmap")
        text_string_object = str.__new__(cls, value)
        text_string_object._original_bytes = original_bytes
        text_string_object.autodetect_utf16 = False
        text_string_object.autodetect_pdfdocencoding = False
        text_string_object.utf16_bom = b""
        if original_bytes is not None and original_bytes[:2] in {codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE}:
            # The value of `original_bytes` is only set for inputs being `bytes`.
            # If this is UTF-16 data according to the BOM (first two characters),
            # perform special handling. All other cases should not need any special conversion
            # due to already being a string.
            try:
                text_string_object = str.__new__(cls, original_bytes.decode("utf-16"))
            except UnicodeDecodeError as exception:
                logger_warning(
                    f"{exception!s}\ninitial string:{exception.object!r}",
                    __name__,
                )
                text_string_object = str.__new__(cls, exception.object[: exception.start].decode("utf-16"))
            text_string_object._original_bytes = original_bytes
            text_string_object.autodetect_utf16 = True
            text_string_object.utf16_bom = original_bytes[:2]
        else:
            try:
                encode_pdfdocencoding(text_string_object)
                text_string_object.autodetect_pdfdocencoding = True
            except UnicodeEncodeError:
                text_string_object.autodetect_utf16 = True
                text_string_object.utf16_bom = codecs.BOM_UTF16_BE
        return text_string_object
    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "TextStringObject":
        """Clone object into pdf_dest."""
        obj = TextStringObject(self)
        obj._original_bytes = self._original_bytes
        obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding
        obj.autodetect_utf16 = self.autodetect_utf16
        obj.utf16_bom = self.utf16_bom
        return cast(
            "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, self.original_bytes))
    @property
    def original_bytes(self) -> bytes:
        """
        It is occasionally possible that a text string object gets created where
        a byte string object was expected due to the autodetection mechanism --
        if that occurs, this "original_bytes" property can be used to
        back-calculate what the original encoded bytes were.
        """
        if self._original_bytes is not None:
            return self._original_bytes
        return self.get_original_bytes()
    def get_original_bytes(self) -> bytes:
        # We're a text string object, but the library is trying to get our raw
        # bytes. This can happen if we auto-detected this string as text, but
        # we were wrong. It's pretty common. Return the original bytes that
        # would have been used to create this object, based upon the autodetect
        # method.
        if self.autodetect_utf16:
            if self.utf16_bom == codecs.BOM_UTF16_LE:
                return codecs.BOM_UTF16_LE + self.encode("utf-16le")
            if self.utf16_bom == codecs.BOM_UTF16_BE:
                return codecs.BOM_UTF16_BE + self.encode("utf-16be")
            return self.encode("utf-16be")
        if self.autodetect_pdfdocencoding:
            return encode_pdfdocencoding(self)
        raise Exception("no information about original bytes")  # pragma: no cover
    def get_encoded_bytes(self) -> bytes:
        # Try to write the string out as a PDFDocEncoding encoded string. It's
        # nicer to look at in the PDF file. Sadly, we take a performance hit
        # here for trying...
        try:
            if self._original_bytes is not None:
                return self._original_bytes
            if self.autodetect_utf16:
                raise UnicodeEncodeError("", "forced", -1, -1, "")
            bytearr = encode_pdfdocencoding(self)
        except UnicodeEncodeError:
            if self.utf16_bom == codecs.BOM_UTF16_LE:
                bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")
            elif self.utf16_bom == codecs.BOM_UTF16_BE:
                bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
            else:
                bytearr = self.encode("utf-16be")
        return bytearr
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        bytearr = self.get_encoded_bytes()
        stream.write(b"(")
        for c_ in iter_unpack("c", bytearr):
            c = cast(bytes, c_[0])
            if not c.isalnum() and c != b" ":
                # This:
                #   stream.write(rf"\{c:0>3o}".encode())
                # gives
                #   https://github.com/davidhalter/parso/issues/207
                stream.write(b"\\%03o" % ord(c))
            else:
                stream.write(c)
        stream.write(b")")
 class NameObject(str, PdfObject):  # noqa: SLOT000
    delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
    prefix = b"/"
    renumber_table: ClassVar[dict[str, bytes]] = {
        **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},
        **{chr(i): f"#{i:02X}".encode() for i in range(33)},
    }
    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "NameObject":
        """Clone object into pdf_dest."""
        return cast(
            "NameObject",
            self._reference_clone(NameObject(self), pdf_dest, force_duplicate),
        )
    def hash_bin(self) -> int:
        """
        Used to detect modified object.
        Returns:
            Hash considering type and value.
        """
        return hash((self.__class__, self))
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(self.renumber())
    def renumber(self) -> bytes:
        out = self[0].encode("utf-8")
        if out != b"/":
            deprecation_no_replacement(
                f"Incorrect first char in NameObject, should start with '/': ({self})",
                "5.0.0",
            )
        for c in self[1:]:
            if c > "~":
                for x in c.encode("utf-8"):
                    out += f"#{x:02X}".encode()
            else:
                try:
                    out += self.renumber_table[c]
                except KeyError:
                    out += c.encode("utf-8")
        return out
    def _sanitize(self) -> "NameObject":
        """
        Sanitize the NameObject's name to be a valid PDF name part
        (alphanumeric, underscore, hyphen). The _sanitize method replaces
        spaces and any non-alphanumeric/non-underscore/non-hyphen with
        underscores.
        Returns:
            NameObject with sanitized name.
        """
        name = str(self).removeprefix("/")
        name = re.sub(r"\ ", "_", name)
        name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)
        return NameObject("/" + name)
    @classproperty
    def surfix(cls) -> bytes:  # noqa: N805
        deprecation_with_replacement("surfix", "prefix", "5.0.0")
        return b"/"
    @staticmethod
    def unnumber(sin: bytes) -> bytes:
        i = sin.find(b"#", 0)
        while i >= 0:
            try:
                sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]
                i = sin.find(b"#", i + 1)
            except ValueError:
                # if the 2 characters after # can not be converted to hex
                # we change nothing and carry on
                i = i + 1
        return sin
    CHARSETS = ("utf-8", "gbk", "latin1")
    @staticmethod
    def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject":  # PdfReader
        name = stream.read(1)
        if name != NameObject.prefix:
            raise PdfReadError("Name read error")
        name += read_until_regex(stream, NameObject.delimiter_pattern)
        try:
            # Name objects should represent irregular characters
            # with a '#' followed by the symbol's hex number
            name = NameObject.unnumber(name)
            for enc in NameObject.CHARSETS:
                try:
                    ret = name.decode(enc)
                    return NameObject(ret)
                except Exception:
                    pass
            raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")
        except (UnicodeEncodeError, UnicodeDecodeError) as e:
            if not pdf.strict:
                logger_warning(
                    f"Illegal character in NameObject ({name!r}), "
                    "you may need to adjust NameObject.CHARSETS",
                    __name__,
                )
                return NameObject(name.decode("charmap"))
            raise PdfReadError(
                f"Illegal character in NameObject ({name!r}). "
                "You may need to adjust NameObject.CHARSETS.",
            ) from e
 def encode_pdfdocencoding(unicode_string: str) -> bytes:
    try:
        return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])
    except KeyError:
        raise UnicodeEncodeError(
            "pdfdocencoding",
            unicode_string,
            -1,
            -1,
            "does not exist in translation table",
        )
 def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:
    """
    Returns:
        True if x is None or NullObject.
    """
    return x is None or (
        isinstance(x, PdfObject)
        and (x.get_object() is None or isinstance(x.get_object(), NullObject))
    )
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_data_structures.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_data_structures.py
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_files.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_files.py
@@ -0,0 +1,401 @@
 from __future__ import annotations
 import bisect
 from functools import cached_property
 from typing import TYPE_CHECKING, cast
 from pypdf._utils import format_iso8824_date, parse_iso8824_date
 from pypdf.constants import CatalogAttributes as CA
 from pypdf.constants import FileSpecificationDictionaryEntries
 from pypdf.constants import PageAttributes as PG
 from pypdf.errors import PdfReadError, PyPdfError
 from pypdf.generic import (
    ArrayObject,
    ByteStringObject,
    DecodedStreamObject,
    DictionaryObject,
    NameObject,
    NullObject,
    NumberObject,
    StreamObject,
    TextStringObject,
    is_null_or_none,
 )
 if TYPE_CHECKING:
    import datetime
    from collections.abc import Generator
    from pypdf._writer import PdfWriter
 class EmbeddedFile:
    """
    Container holding the information on an embedded file.
    Attributes are evaluated lazily if possible.
    Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
    """
    def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
        """
        Args:
            name: The (primary) name as provided in the name tree.
            pdf_object: The corresponding PDF object to allow retrieving further data.
            parent: The parent list.
        """
        self._name = name
        self.pdf_object = pdf_object
        self._parent = parent
    @property
    def name(self) -> str:
        """The (primary) name of the embedded file as provided in the name tree."""
        return self._name
    @classmethod
    def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> EmbeddedFile:
        """
        Create a new embedded file and add it to the PdfWriter.
        Args:
            writer: The PdfWriter instance to add the embedded file to.
            name: The filename to display.
            content: The data in the file.
        Returns:
            EmbeddedFile instance for the newly created embedded file.
        """
        # Convert string content to bytes if needed
        if isinstance(content, str):
            content = content.encode("latin-1")
        # Create the file entry (the actual embedded file stream)
        file_entry = DecodedStreamObject()
        file_entry.set_data(content)
        file_entry.update({NameObject(PG.TYPE): NameObject("/EmbeddedFile")})
        # Create the /EF entry
        ef_entry = DictionaryObject()
        ef_entry.update({NameObject("/F"): writer._add_object(file_entry)})
        # Create the filespec dictionary
        from pypdf.generic import create_string_object  # noqa: PLC0415
        filespec = DictionaryObject()
        filespec_reference = writer._add_object(filespec)
        name_object = cast(TextStringObject, create_string_object(name))
        filespec.update(
            {
                NameObject(PG.TYPE): NameObject("/Filespec"),
                NameObject(FileSpecificationDictionaryEntries.F): name_object,
                NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
            }
        )
        # Add the name and filespec to the names array.
        # We use the inverse order for insertion, as this allows us to re-use the
        # same index.
        names_array = cls._get_names_array(writer)
        insertion_index = cls._get_insertion_index(names_array, name_object)
        names_array.insert(insertion_index, filespec_reference)
        names_array.insert(insertion_index, name_object)
        # Return an EmbeddedFile instance
        return cls(name=name, pdf_object=filespec, parent=names_array)
    @classmethod
    def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
        """Get the names array for embedded files, possibly creating and flattening it."""
        if CA.NAMES not in writer.root_object:
            # Add the /Names entry to the catalog.
            writer.root_object[NameObject(CA.NAMES)] = writer._add_object(DictionaryObject())
        names_dict = cast(DictionaryObject, writer.root_object[CA.NAMES])
        if "/EmbeddedFiles" not in names_dict:
            # We do not yet have an entry for embedded files. Create and return it.
            names = ArrayObject()
            embedded_files_names_dictionary = DictionaryObject(
                {NameObject(CA.NAMES): names}
            )
            names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
            return names
        # We have an existing embedded files entry.
        embedded_files_names_tree = cast(DictionaryObject, names_dict["/EmbeddedFiles"])
        if "/Names" in embedded_files_names_tree:
            # Simple case: We already have a flat list.
            return cast(ArrayObject, embedded_files_names_tree[NameObject(CA.NAMES)])
        if "/Kids" not in embedded_files_names_tree:
            # Invalid case: This is no name tree.
            raise PdfReadError("Got neither Names nor Kids in embedded files tree.")
        # Complex case: Convert a /Kids-based name tree to a /Names-based one.
        # /Name-based ones are much easier to handle and allow us to simplify the
        # actual insertion logic by only having to consider one case.
        names = ArrayObject()
        kids = cast(ArrayObject, embedded_files_names_tree["/Kids"].get_object())
        embedded_files_names_dictionary = DictionaryObject(
            {NameObject(CA.NAMES): names}
        )
        names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
        for kid in kids:
            # Write the flattened file entries. As we do not change the actual files,
            # this should not have any impact on references to them.
            # There might be further (nested) kids here.
            # Wait for an example before evaluating an implementation.
            for name in kid.get_object().get("/Names", []):
                names.append(name)
        return names
    @classmethod
    def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
        keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
        name_bytes = name.encode("utf-8")
        start = bisect.bisect_left(keys, name_bytes)
        end = bisect.bisect_right(keys, name_bytes)
        if start != end:
            return end * 2
        if start == 0:
            return 0
        if start == (key_count := len(keys)):
            return key_count * 2
        return end * 2
    @property
    def alternative_name(self) -> str | None:
        """Retrieve the alternative name (file specification)."""
        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
            # PDF 2.0 reference, table 43:
            #   > A PDF reader shall use the value of the UF key, when present, instead of the F key.
            if key in self.pdf_object:
                value = self.pdf_object[key].get_object()
                if not is_null_or_none(value):
                    return cast(str, value)
        return None
    @alternative_name.setter
    def alternative_name(self, value: TextStringObject | None) -> None:
        """Set the alternative name (file specification)."""
        if value is None:
            if FileSpecificationDictionaryEntries.UF in self.pdf_object:
                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = NullObject()
            if FileSpecificationDictionaryEntries.F in self.pdf_object:
                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = NullObject()
        else:
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = value
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = value
    @property
    def description(self) -> str | None:
        """Retrieve the description."""
        value = self.pdf_object.get(FileSpecificationDictionaryEntries.DESC)
        if is_null_or_none(value):
            return None
        return value
    @description.setter
    def description(self, value: TextStringObject | None) -> None:
        """Set the description."""
        if value is None:
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = NullObject()
        else:
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = value
    @property
    def associated_file_relationship(self) -> str:
        """Retrieve the relationship of the referring document to this embedded file."""
        return self.pdf_object.get("/AFRelationship", "/Unspecified")
    @associated_file_relationship.setter
    def associated_file_relationship(self, value: NameObject) -> None:
        """Set the relationship of the referring document to this embedded file."""
        self.pdf_object[NameObject("/AFRelationship")] = value
    @property
    def _embedded_file(self) -> StreamObject:
        """Retrieve the actual embedded file stream."""
        if "/EF" not in self.pdf_object:
            raise PdfReadError(f"/EF entry not found: {self.pdf_object}")
        ef = cast(DictionaryObject, self.pdf_object["/EF"])
        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
            if key in ef:
                return cast(StreamObject, ef[key].get_object())
        raise PdfReadError(f"No /(U)F key found in file dictionary: {ef}")
    @property
    def _params(self) -> DictionaryObject:
        """Retrieve the file-specific parameters."""
        return self._embedded_file.get("/Params", DictionaryObject()).get_object()
    @cached_property
    def _ensure_params(self) -> DictionaryObject:
        """Ensure the /Params dictionary exists and return it."""
        embedded_file = self._embedded_file
        if "/Params" not in embedded_file:
            embedded_file[NameObject("/Params")] = DictionaryObject()
        return cast(DictionaryObject, embedded_file["/Params"])
    @property
    def subtype(self) -> str | None:
        """Retrieve the subtype. This is a MIME media type, prefixed by a slash."""
        value = self._embedded_file.get("/Subtype")
        if is_null_or_none(value):
            return None
        return value
    @subtype.setter
    def subtype(self, value: NameObject | None) -> None:
        """Set the subtype. This should be a MIME media type, prefixed by a slash."""
        embedded_file = self._embedded_file
        if value is None:
            embedded_file[NameObject("/Subtype")] = NullObject()
        else:
            embedded_file[NameObject("/Subtype")] = value
    @property
    def content(self) -> bytes:
        """Retrieve the actual file content."""
        return self._embedded_file.get_data()
    @content.setter
    def content(self, value: str | bytes) -> None:
        """Set the file content."""
        if isinstance(value, str):
            value = value.encode("latin-1")
        self._embedded_file.set_data(value)
    @property
    def size(self) -> int | None:
        """Retrieve the size of the uncompressed file in bytes."""
        value = self._params.get("/Size")
        if is_null_or_none(value):
            return None
        return value
    @size.setter
    def size(self, value: NumberObject | None) -> None:
        """Set the size of the uncompressed file in bytes."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/Size")] = NullObject()
        else:
            params[NameObject("/Size")] = value
    @property
    def creation_date(self) -> datetime.datetime | None:
        """Retrieve the file creation datetime."""
        return parse_iso8824_date(self._params.get("/CreationDate"))
    @creation_date.setter
    def creation_date(self, value: datetime.datetime | None) -> None:
        """Set the file creation datetime."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/CreationDate")] = NullObject()
        else:
            date_str = format_iso8824_date(value)
            params[NameObject("/CreationDate")] = TextStringObject(date_str)
    @property
    def modification_date(self) -> datetime.datetime | None:
        """Retrieve the datetime of the last file modification."""
        return parse_iso8824_date(self._params.get("/ModDate"))
    @modification_date.setter
    def modification_date(self, value: datetime.datetime | None) -> None:
        """Set the datetime of the last file modification."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/ModDate")] = NullObject()
        else:
            date_str = format_iso8824_date(value)
            params[NameObject("/ModDate")] = TextStringObject(date_str)
    @property
    def checksum(self) -> bytes | None:
        """Retrieve the MD5 checksum of the (uncompressed) file."""
        value = self._params.get("/CheckSum")
        if is_null_or_none(value):
            return None
        return value
    @checksum.setter
    def checksum(self, value: ByteStringObject | None) -> None:
        """Set the MD5 checksum of the (uncompressed) file."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/CheckSum")] = NullObject()
        else:
            params[NameObject("/CheckSum")] = value
    def delete(self) -> None:
        """Delete the file from the document."""
        if not self._parent:
            raise PyPdfError("Parent required to delete file from document.")
        if self.pdf_object in self._parent:
            index = self._parent.index(self.pdf_object)
        elif (
                (indirect_reference := getattr(self.pdf_object, "indirect_reference", None)) is not None
                and indirect_reference in self._parent
        ):
            index = self._parent.index(indirect_reference)
        else:
            raise PyPdfError("File not found in parent object.")
        self._parent.pop(index)  # Reference.
        self._parent.pop(index - 1)  # Name.
        self.pdf_object = DictionaryObject()  # Invalidate.
    def __repr__(self) -> str:
        return f"<{self.__class__.__name__} name={self.name!r}>"
    @classmethod
    def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
        """
        Convert the given name tree into class instances.
        Args:
            names: The name tree to load the data from.
        Returns:
            Iterable of class instances for the files found.
        """
        # This is a name tree of the format [name_1, reference_1, name_2, reference_2, ...]
        for i, name in enumerate(names):
            if not isinstance(name, str):
                # Skip plain strings and retrieve them as `direct_name` by index.
                file_dictionary = name.get_object()
                direct_name = names[i - 1].get_object()
                yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)
    @classmethod
    def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
        """
        Load the embedded files for the given document catalog.
        This method and its signature are considered internal API and thus not exposed publicly for now.
        Args:
            catalog: The document catalog to load from.
        Returns:
            Iterable of class instances for the files found.
        """
        try:
            container = cast(
                DictionaryObject,
                cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
            )
        except KeyError:
            return
        if "/Kids" in container:
            for kid in cast(ArrayObject, container["/Kids"].get_object()):
                # There might be further (nested) kids here.
                # Wait for an example before evaluating an implementation.
                kid = kid.get_object()
                if "/Names" in kid:
                    yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
        if "/Names" in container:
            yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_fit.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_fit.py
@@ -0,0 +1,174 @@
 from typing import Any, Optional, Union
 from ._base import is_null_or_none
 class Fit:
    def __init__(
        self, fit_type: str, fit_args: tuple[Union[None, float, Any], ...] = ()
    ) -> None:
        from ._base import FloatObject, NameObject, NullObject, NumberObject  # noqa: PLC0415
        self.fit_type = NameObject(fit_type)
        self.fit_args: list[Union[NullObject, FloatObject, NumberObject]] = [
            NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
        ]
    @classmethod
    def xyz(
        cls,
        left: Optional[float] = None,
        top: Optional[float] = None,
        zoom: Optional[float] = None,
    ) -> "Fit":
        """
        Display the page designated by page, with the coordinates (left, top)
        positioned at the upper-left corner of the window and the contents
        of the page magnified by the factor zoom.
        A null value for any of the parameters left, top, or zoom specifies
        that the current value of that parameter is to be retained unchanged.
        A zoom value of 0 has the same meaning as a null value.
        Args:
            left:
            top:
            zoom:
        Returns:
            The created fit object.
        """
        return Fit(fit_type="/XYZ", fit_args=(left, top, zoom))
    @classmethod
    def fit(cls) -> "Fit":
        """
        Display the page designated by page, with its contents magnified just
        enough to fit the entire page within the window both horizontally and
        vertically.
        If the required horizontal and vertical magnification factors are
        different, use the smaller of the two, centering the page within the
        window in the other dimension.
        """
        return Fit(fit_type="/Fit")
    @classmethod
    def fit_horizontally(cls, top: Optional[float] = None) -> "Fit":
        """
        Display the page designated by page, with the vertical coordinate top
        positioned at the top edge of the window and the contents of the page
        magnified just enough to fit the entire width of the page within the
        window.
        A null value for ``top`` specifies that the current value of that
        parameter is to be retained unchanged.
        Args:
            top:
        Returns:
            The created fit object.
        """
        return Fit(fit_type="/FitH", fit_args=(top,))
    @classmethod
    def fit_vertically(cls, left: Optional[float] = None) -> "Fit":
        return Fit(fit_type="/FitV", fit_args=(left,))
    @classmethod
    def fit_rectangle(
        cls,
        left: Optional[float] = None,
        bottom: Optional[float] = None,
        right: Optional[float] = None,
        top: Optional[float] = None,
    ) -> "Fit":
        """
        Display the page designated by page, with its contents magnified
        just enough to fit the rectangle specified by the coordinates
        left, bottom, right, and top entirely within the window
        both horizontally and vertically.
        If the required horizontal and vertical magnification factors are
        different, use the smaller of the two, centering the rectangle within
        the window in the other dimension.
        A null value for any of the parameters may result in unpredictable
        behavior.
        Args:
            left:
            bottom:
            right:
            top:
        Returns:
            The created fit object.
        """
        return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top))
    @classmethod
    def fit_box(cls) -> "Fit":
        """
        Display the page designated by page, with its contents magnified just
        enough to fit its bounding box entirely within the window both
        horizontally and vertically.
        If the required horizontal and vertical magnification factors are
        different, use the smaller of the two, centering the bounding box
        within the window in the other dimension.
        """
        return Fit(fit_type="/FitB")
    @classmethod
    def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit":
        """
        Display the page designated by page, with the vertical coordinate top
        positioned at the top edge of the window and the contents of the page
        magnified just enough to fit the entire width of its bounding box
        within the window.
        A null value for top specifies that the current value of that parameter
        is to be retained unchanged.
        Args:
            top:
        Returns:
            The created fit object.
        """
        return Fit(fit_type="/FitBH", fit_args=(top,))
    @classmethod
    def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit":
        """
        Display the page designated by page, with the horizontal coordinate
        left positioned at the left edge of the window and the contents of the
        page magnified just enough to fit the entire height of its bounding box
        within the window.
        A null value for left specifies that the current value of that
        parameter is to be retained unchanged.
        Args:
            left:
        Returns:
            The created fit object.
        """
        return Fit(fit_type="/FitBV", fit_args=(left,))
    def __str__(self) -> str:
        if not self.fit_args:
            return f"Fit({self.fit_type})"
        return f"Fit({self.fit_type}, {self.fit_args})"
 DEFAULT_FIT = Fit.fit()
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_image_inline.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_image_inline.py
@@ -0,0 +1,314 @@
 # Copyright (c) 2024, pypdf contributors
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 import logging
 from io import BytesIO
 from typing import IO
 from .._utils import (
    WHITESPACES,
    WHITESPACES_AS_BYTES,
    StreamType,
    logger_warning,
    read_non_whitespace,
 )
 from ..errors import PdfReadError
 logger = logging.getLogger(__name__)
 # An inline image should be used only for small images (4096 bytes or less),
 # but allow twice this for cases where this has been exceeded.
 BUFFER_SIZE = 8192
 def _check_end_image_marker(stream: StreamType) -> bool:
    ei_tok = read_non_whitespace(stream)
    ei_tok += stream.read(2)
    stream.seek(-3, 1)
    return ei_tok[:2] == b"EI" and (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES)
 def extract_inline__ascii_hex_decode(stream: StreamType) -> bytes:
    """
    Extract HexEncoded stream from inline image.
    The stream will be moved onto the EI.
    """
    data_out: bytes = b""
    # Read data until delimiter > and EI as backup.
    while True:
        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_tok = data_buffered.find(b">")
        if pos_tok >= 0:  # found >
            data_out += data_buffered[: pos_tok + 1]
            stream.seek(-len(data_buffered) + pos_tok + 1, 1)
            break
        pos_ei = data_buffered.find(b"EI")
        if pos_ei >= 0:  # found EI
            stream.seek(-len(data_buffered) + pos_ei - 1, 1)
            c = stream.read(1)
            while c in WHITESPACES:
                stream.seek(-2, 1)
                c = stream.read(1)
                pos_ei -= 1
            data_out += data_buffered[:pos_ei]
            break
        if len(data_buffered) == 2:
            data_out += data_buffered
            raise PdfReadError("Unexpected end of stream")
        # Neither > nor EI found
        data_out += data_buffered[:-2]
        stream.seek(-2, 1)
    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out
 def extract_inline__ascii85_decode(stream: StreamType) -> bytes:
    """
    Extract A85 stream from inline image.
    The stream will be moved onto the EI.
    """
    data_out: bytes = b""
    # Read data until delimiter ~>
    while True:
        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_tok = data_buffered.find(b"~>")
        if pos_tok >= 0:  # found!
            data_out += data_buffered[: pos_tok + 2]
            stream.seek(-len(data_buffered) + pos_tok + 2, 1)
            break
        if len(data_buffered) == 2:  # end of buffer
            data_out += data_buffered
            raise PdfReadError("Unexpected end of stream")
        data_out += data_buffered[
            :-2
        ]  # back by one char in case of in the middle of ~>
        stream.seek(-2, 1)
    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out
 def extract_inline__run_length_decode(stream: StreamType) -> bytes:
    """
    Extract RL (RunLengthDecode) stream from inline image.
    The stream will be moved onto the EI.
    """
    data_out: bytes = b""
    # Read data until delimiter 128
    while True:
        data_buffered = stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_tok = data_buffered.find(b"\x80")
        if pos_tok >= 0:  # found
            # Ideally, we could just use plain run-length decoding here, where 80_16 = 128_10
            # marks the EOD. But there apparently are cases like in issue #3517, where we have
            # an inline image with up to 51 EOD markers. In these cases, be resilient here and
            # use the default `EI` marker detection instead. Please note that this fallback
            # still omits special `EI` handling within the stream, but for now assume that having
            # both of these cases occur at the same time is very unlikely (and the image stream
            # is broken anyway).
            # For now, do not skip over more than one whitespace character.
            after_token = data_buffered[pos_tok + 1 : pos_tok + 4]
            if after_token.startswith(b"EI") or after_token.endswith(b"EI"):
                data_out += data_buffered[: pos_tok + 1]
                stream.seek(-len(data_buffered) + pos_tok + 1, 1)
            else:
                logger_warning("Early EOD in RunLengthDecode of inline image, using fallback.", __name__)
                ei_marker = data_buffered.find(b"EI")
                if ei_marker > 0:
                    data_out += data_buffered[: ei_marker]
                    stream.seek(-len(data_buffered) + ei_marker - 1, 1)
            break
        data_out += data_buffered
    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out
 def extract_inline__dct_decode(stream: StreamType) -> bytes:
    """
    Extract DCT (JPEG) stream from inline image.
    The stream will be moved onto the EI.
    """
    def read(length: int) -> bytes:
        # If 0 bytes are returned, and *size* was not 0, this indicates end of file.
        # If the object is in non-blocking mode and no bytes are available, `None` is returned.
        _result = stream.read(length)
        if _result is None or len(_result) != length:
            raise PdfReadError("Unexpected end of stream")
        return _result
    data_out: bytes = b""
    # Read Blocks of data (ID/Size/data) up to ID=FF/D9
    # https://www.digicamsoft.com/itu/itu-t81-36.html
    not_first = False
    while True:
        c = read(1)
        if not_first or (c == b"\xff"):
            data_out += c
        if c != b"\xff":
            continue
        not_first = True
        c = read(1)
        data_out += c
        if c == b"\xff":
            stream.seek(-1, 1)  # pragma: no cover
        elif c == b"\x00":  # stuffing
            pass
        elif c == b"\xd9":  # end
            break
        elif c in (
            b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
            b"\xda\xdb\xdc\xdd\xde\xdf"
            b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
        ):
            c = read(2)
            data_out += c
            sz = c[0] * 256 + c[1]
            data_out += read(sz - 2)
    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out
 def extract_inline_default(stream: StreamType) -> bytes:
    """Legacy method, used by default"""
    stream_out = BytesIO()
    # Read the inline image, while checking for EI (End Image) operator.
    while True:
        data_buffered = stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_ei = data_buffered.find(
            b"E"
        )  # We can not look straight for "EI" because it may not have been loaded in the buffer
        if pos_ei == -1:
            stream_out.write(data_buffered)
        else:
            # Write out everything including E (the one from EI to be removed)
            stream_out.write(data_buffered[0 : pos_ei + 1])
            sav_pos_ei = stream_out.tell() - 1
            # Seek back in the stream to read the E next
            stream.seek(pos_ei + 1 - len(data_buffered), 1)
            saved_pos = stream.tell()
            # Check for End Image
            tok2 = stream.read(1)  # I of "EI"
            if tok2 != b"I":
                stream.seek(saved_pos, 0)
                continue
            tok3 = stream.read(1)  # possible space after "EI"
            if tok3 not in WHITESPACES:
                stream.seek(saved_pos, 0)
                continue
            while tok3 in WHITESPACES:
                tok3 = stream.read(1)
            if data_buffered[pos_ei - 1 : pos_ei] not in WHITESPACES and tok3 not in {
                b"Q",
                b"E",
            }:  # for Q or EMC
                stream.seek(saved_pos, 0)
                continue
            if is_followed_by_binary_data(stream):
                # Inline image contains `EI ` sequence usually marking the end of it, but
                # is followed by binary data which does not make sense for the actual end.
                stream.seek(saved_pos, 0)
                continue
            # Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficient
            # remove E(I) wrongly inserted earlier
            stream.seek(saved_pos - 1, 0)
            stream_out.truncate(sav_pos_ei)
            break
    return stream_out.getvalue()
 def is_followed_by_binary_data(stream: IO[bytes], length: int = 10) -> bool:
    """
    Check if the next bytes of the stream look like binary image data or regular page content.
    This is just some heuristics due to the PDF specification being too imprecise about
    inline images containing the `EI` marker which would end an image. Starting with PDF 2.0,
    we finally get a mandatory length field, but with (proper) PDF 2.0 support being very limited
    everywhere, we should not expect to be able to remove such hacks in the near future - especially
    considering legacy documents as well.
    The actual implementation draws some inspiration from
    https://github.com/itext/itext-java/blob/9.1.0/kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/util/InlineImageParsingUtils.java
    """
    position = stream.tell()
    data = stream.read(length)
    stream.seek(position)
    if not data:
        return False
    operator_start = None
    operator_end = None
    for index, byte in enumerate(data):
        if byte < 32 and byte not in WHITESPACES_AS_BYTES:
            # This covers all characters not being displayable directly, although omitting whitespace
            # to allow for operator detection.
            return True
        is_whitespace = byte in WHITESPACES_AS_BYTES
        if operator_start is None and not is_whitespace:
            # Interpret all other non-whitespace characters as the start of an operation.
            operator_start = index
        if operator_start is not None and is_whitespace:
            # A whitespace stops an operation.
            # Assume that having an inline image with tons of whitespace is rather unlikely.
            operator_end = index
            break
    if operator_start is None:
        # Inline images should not have tons of whitespaces, which would lead to no operator start.
        return False
    if operator_end is None:
        # We probably are inside an operation.
        operator_end = length
    operator_length = operator_end - operator_start
    operator = data[operator_start:operator_end]
    if operator.startswith(b"/") and operator_length > 1:
        # Name object.
        return False
    if operator.replace(b".", b"").isdigit():
        # Graphics operator, for example a move. A number (integer or float).
        return False
    if operator_length > 3:  # noqa: SIM103
        # Usually, the operators inside a content stream should not have more than three characters,
        # especially after an inline image.
        return True
    return False
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_link.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_link.py
@@ -0,0 +1,118 @@
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 # This module contains code used by _writer.py to track links in pages
 # being added to the writer until the links can be resolved.
 from typing import TYPE_CHECKING, Optional, Union, cast
 from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject
 if TYPE_CHECKING:
    from .._page import PageObject
    from .._reader import PdfReader
    from .._writer import PdfWriter
 class NamedReferenceLink:
    """Named reference link being preserved until we can resolve it correctly."""
    def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
        """reference: TextStringObject with named reference"""
        self._reference = reference
        self._source_pdf = source_pdf
    def find_referenced_page(self) -> Union[IndirectObject, None]:
        destination = self._source_pdf.named_destinations.get(str(self._reference))
        return destination.page if destination else None
    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
        """target_pdf: PdfWriter which the new link went into"""
        # point named destination in new PDF to the new page
        if str(self._reference) not in target_pdf.named_destinations:
            target_pdf.add_named_destination(str(self._reference), new_page.page_number)
 class DirectReferenceLink:
    """Direct reference link being preserved until we can resolve it correctly."""
    def __init__(self, reference: ArrayObject) -> None:
        """reference: an ArrayObject whose first element is the Page indirect object"""
        self._reference = reference
    def find_referenced_page(self) -> IndirectObject:
        return self._reference[0]
    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
        """target_pdf: PdfWriter which the new link went into"""
        self._reference[0] = new_page
 ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
 def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
    """Extracts links from two pages on the assumption that the two pages are
    the same. Produces one list of (new link, old link) tuples.
    """
    new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])]
    old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])]
    return [
        (new_link, old_link) for (new_link, old_link)
        in zip(new_links, old_links)
        if new_link and old_link
    ]
 def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
    src = cast("PdfReader", page.pdf)
    link = cast(DictionaryObject, indirect_object.get_object())
    if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
        return None
    if "/A" in link:
        action = cast(DictionaryObject, link["/A"])
        if action.get("/S") != "/GoTo":
            return None
        if "/D" not in action:
            return None
        return _create_link(action["/D"], src)
    if "/Dest" in link:
        return _create_link(link["/Dest"], src)
    return None  # Nothing to do here
 def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]:
    if isinstance(reference, TextStringObject):
        return NamedReferenceLink(reference, source_pdf)
    if isinstance(reference, ArrayObject):
        return DirectReferenceLink(reference)
    return None
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_outline.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_outline.py
@@ -0,0 +1,33 @@
 from typing import Union
 from .._utils import StreamType, deprecation_no_replacement
 from ._base import NameObject
 from ._data_structures import Destination
 class OutlineItem(Destination):
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"<<\n")
        for key in [
            NameObject(x)
            for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"]
            if x in self
        ]:
            key.write_to_stream(stream)
            stream.write(b" ")
            value = self.raw_get(key)
            value.write_to_stream(stream)
            stream.write(b"\n")
        key = NameObject("/Dest")
        key.write_to_stream(stream)
        stream.write(b" ")
        value = self.dest_array
        value.write_to_stream(stream)
        stream.write(b"\n")
        stream.write(b">>")
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_rectangle.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_rectangle.py
@@ -0,0 +1,132 @@
 from typing import Any, Union
 from ._base import FloatObject, NumberObject
 from ._data_structures import ArrayObject
 class RectangleObject(ArrayObject):
    """
    This class is used to represent *page boxes* in pypdf.
    These boxes include:
    * :attr:`artbox <pypdf._page.PageObject.artbox>`
    * :attr:`bleedbox <pypdf._page.PageObject.bleedbox>`
    * :attr:`cropbox <pypdf._page.PageObject.cropbox>`
    * :attr:`mediabox <pypdf._page.PageObject.mediabox>`
    * :attr:`trimbox <pypdf._page.PageObject.trimbox>`
    """
    def __init__(
        self, arr: Union["RectangleObject", tuple[float, float, float, float]]
    ) -> None:
        # must have four points
        assert len(arr) == 4
        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
        ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr])
    def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
        if not isinstance(value, (FloatObject, NumberObject)):
            value = FloatObject(value)
        return value
    def scale(self, sx: float, sy: float) -> "RectangleObject":
        return RectangleObject(
            (
                float(self.left) * sx,
                float(self.bottom) * sy,
                float(self.right) * sx,
                float(self.top) * sy,
            )
        )
    def __repr__(self) -> str:
        return f"RectangleObject({list(self)!r})"
    @property
    def left(self) -> FloatObject:
        return self[0]
    @left.setter
    def left(self, f: float) -> None:
        self[0] = FloatObject(f)
    @property
    def bottom(self) -> FloatObject:
        return self[1]
    @bottom.setter
    def bottom(self, f: float) -> None:
        self[1] = FloatObject(f)
    @property
    def right(self) -> FloatObject:
        return self[2]
    @right.setter
    def right(self, f: float) -> None:
        self[2] = FloatObject(f)
    @property
    def top(self) -> FloatObject:
        return self[3]
    @top.setter
    def top(self, f: float) -> None:
        self[3] = FloatObject(f)
    @property
    def lower_left(self) -> tuple[float, float]:
        """
        Property to read and modify the lower left coordinate of this box
        in (x,y) form.
        """
        return self.left, self.bottom
    @lower_left.setter
    def lower_left(self, value: tuple[float, float]) -> None:
        self[0], self[1] = (self._ensure_is_number(x) for x in value)
    @property
    def lower_right(self) -> tuple[float, float]:
        """
        Property to read and modify the lower right coordinate of this box
        in (x,y) form.
        """
        return self.right, self.bottom
    @lower_right.setter
    def lower_right(self, value: tuple[float, float]) -> None:
        self[2], self[1] = (self._ensure_is_number(x) for x in value)
    @property
    def upper_left(self) -> tuple[float, float]:
        """
        Property to read and modify the upper left coordinate of this box
        in (x,y) form.
        """
        return self.left, self.top
    @upper_left.setter
    def upper_left(self, value: tuple[float, float]) -> None:
        self[0], self[3] = (self._ensure_is_number(x) for x in value)
    @property
    def upper_right(self) -> tuple[float, float]:
        """
        Property to read and modify the upper right coordinate of this box
        in (x,y) form.
        """
        return self.right, self.top
    @upper_right.setter
    def upper_right(self, value: tuple[float, float]) -> None:
        self[2], self[3] = (self._ensure_is_number(x) for x in value)
    @property
    def width(self) -> float:
        return self.right - self.left
    @property
    def height(self) -> float:
        return self.top - self.bottom
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_utils.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_utils.py
@@ -0,0 +1,208 @@
 import codecs
 from typing import Union
 from .._codecs import _pdfdoc_encoding
 from .._utils import StreamType, logger_warning, read_non_whitespace
 from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
 from ._base import ByteStringObject, TextStringObject
 def hex_to_rgb(value: str) -> tuple[float, float, float]:
    return tuple(int(value.lstrip("#")[i : i + 2], 16) / 255.0 for i in (0, 2, 4))  # type: ignore
 def read_hex_string_from_stream(
    stream: StreamType,
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
 ) -> Union["TextStringObject", "ByteStringObject"]:
    stream.read(1)
    arr = []
    x = b""
    while True:
        tok = read_non_whitespace(stream)
        if not tok:
            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
        if tok == b">":
            break
        x += tok
        if len(x) == 2:
            arr.append(int(x, base=16))
            x = b""
    if len(x) == 1:
        x += b"0"
    if x != b"":
        arr.append(int(x, base=16))
    return create_string_object(bytes(arr), forced_encoding)
 __ESCAPE_DICT__ = {
    b"n": ord(b"\n"),
    b"r": ord(b"\r"),
    b"t": ord(b"\t"),
    b"b": ord(b"\b"),
    b"f": ord(b"\f"),
    b"(": ord(b"("),
    b")": ord(b")"),
    b"/": ord(b"/"),
    b"\\": ord(b"\\"),
    b" ": ord(b" "),
    b"%": ord(b"%"),
    b"<": ord(b"<"),
    b">": ord(b">"),
    b"[": ord(b"["),
    b"]": ord(b"]"),
    b"#": ord(b"#"),
    b"_": ord(b"_"),
    b"&": ord(b"&"),
    b"$": ord(b"$"),
 }
 __BACKSLASH_CODE__ = 92
 def read_string_from_stream(
    stream: StreamType,
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
 ) -> Union["TextStringObject", "ByteStringObject"]:
    tok = stream.read(1)
    parens = 1
    txt = []
    while True:
        tok = stream.read(1)
        if not tok:
            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
        if tok == b"(":
            parens += 1
        elif tok == b")":
            parens -= 1
            if parens == 0:
                break
        elif tok == b"\\":
            tok = stream.read(1)
            try:
                txt.append(__ESCAPE_DICT__[tok])
                continue
            except KeyError:
                if b"0" <= tok <= b"7":
                    # "The number ddd may consist of one, two, or three
                    # octal digits; high-order overflow shall be ignored.
                    # Three octal digits shall be used, with leading zeros
                    # as needed, if the next character of the string is also
                    # a digit." (PDF reference 7.3.4.2, p 16)
                    sav = stream.tell() - 1
                    for _ in range(2):
                        ntok = stream.read(1)
                        if b"0" <= ntok <= b"7":
                            tok += ntok
                        else:
                            stream.seek(-1, 1)  # ntok has to be analyzed
                            break
                    i = int(tok, base=8)
                    if i > 255:
                        txt.append(__BACKSLASH_CODE__)
                        stream.seek(sav)
                    else:
                        txt.append(i)
                    continue
                if tok in b"\n\r":
                    # This case is hit when a backslash followed by a line
                    # break occurs. If it's a multi-char EOL, consume the
                    # second character:
                    tok = stream.read(1)
                    if tok not in b"\n\r":
                        stream.seek(-1, 1)
                    # Then don't add anything to the actual string, since this
                    # line break was escaped:
                    continue
                msg = f"Unexpected escaped string: {tok.decode('utf-8', 'ignore')}"
                logger_warning(msg, __name__)
                txt.append(__BACKSLASH_CODE__)
        txt.append(ord(tok))
    return create_string_object(bytes(txt), forced_encoding)
 def create_string_object(
    string: Union[str, bytes],
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
 ) -> Union[TextStringObject, ByteStringObject]:
    """
    Create a ByteStringObject or a TextStringObject from a string to represent the string.
    Args:
        string: The data being used
        forced_encoding: Typically None, or an encoding string
    Returns:
        A ByteStringObject
    Raises:
        TypeError: If string is not of type str or bytes.
    """
    if isinstance(string, str):
        return TextStringObject(string)
    if isinstance(string, bytes):
        if isinstance(forced_encoding, (list, dict)):
            out = ""
            for x in string:
                try:
                    out += forced_encoding[x]
                except Exception:
                    out += bytes((x,)).decode("charmap")
            obj = TextStringObject(out)
            obj._original_bytes = string
            return obj
        if isinstance(forced_encoding, str):
            if forced_encoding == "bytes":
                return ByteStringObject(string)
            obj = TextStringObject(string.decode(forced_encoding))
            obj._original_bytes = string
            return obj
        try:
            if string.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
                retval = TextStringObject(string.decode("utf-16"))
                retval._original_bytes = string
                retval.autodetect_utf16 = True
                retval.utf16_bom = string[:2]
                return retval
            if string.startswith(b"\x00"):
                retval = TextStringObject(string.decode("utf-16be"))
                retval._original_bytes = string
                retval.autodetect_utf16 = True
                retval.utf16_bom = codecs.BOM_UTF16_BE
                return retval
            if string[1:2] == b"\x00":
                retval = TextStringObject(string.decode("utf-16le"))
                retval._original_bytes = string
                retval.autodetect_utf16 = True
                retval.utf16_bom = codecs.BOM_UTF16_LE
                return retval
            # This is probably a big performance hit here, but we need
            # to convert string objects into the text/unicode-aware
            # version if possible... and the only way to check if that's
            # possible is to try.
            # Some strings are strings, some are just byte arrays.
            retval = TextStringObject(decode_pdfdocencoding(string))
            retval._original_bytes = string
            retval.autodetect_pdfdocencoding = True
            return retval
        except UnicodeDecodeError:
            return ByteStringObject(string)
    else:
        raise TypeError("create_string_object should have str or unicode arg")
 def decode_pdfdocencoding(byte_array: bytes) -> str:
    retval = ""
    for b in byte_array:
        c = _pdfdoc_encoding[b]
        if c == "\u0000":
            raise UnicodeDecodeError(
                "pdfdocencoding",
                bytearray(b),
                -1,
                -1,
                "does not exist in translation table",
            )
        retval += c
    return retval
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_viewerpref.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_viewerpref.py
@@ -0,0 +1,163 @@
 # Copyright (c) 2023, Pubpub-ZZ
 #
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
 #
 # * Redistributions of source code must retain the above copyright notice,
 # this list of conditions and the following disclaimer.
 # * Redistributions in binary form must reproduce the above copyright notice,
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.
 # * The name of the author may not be used to endorse or promote products
 # derived from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 # POSSIBILITY OF SUCH DAMAGE.
 from typing import (
    Any,
    Optional,
 )
 from ._base import BooleanObject, NameObject, NumberObject, is_null_or_none
 from ._data_structures import ArrayObject, DictionaryObject
 f_obj = BooleanObject(False)
 class ViewerPreferences(DictionaryObject):
    def __init__(self, obj: Optional[DictionaryObject] = None) -> None:
        super().__init__(self)
        if not is_null_or_none(obj):
            self.update(obj.items())  # type: ignore
        try:
            self.indirect_reference = obj.indirect_reference  # type: ignore
        except AttributeError:
            pass
    def _get_bool(self, key: str, default: Optional[BooleanObject]) -> Optional[BooleanObject]:
        return self.get(key, default)
    def _set_bool(self, key: str, v: bool) -> None:
        self[NameObject(key)] = BooleanObject(v is True)
    def _get_name(self, key: str, default: Optional[NameObject]) -> Optional[NameObject]:
        return self.get(key, default)
    def _set_name(self, key: str, lst: list[str], v: NameObject) -> None:
        if v[0] != "/":
            raise ValueError(f"{v} does not start with '/'")
        if lst != [] and v not in lst:
            raise ValueError(f"{v} is an unacceptable value")
        self[NameObject(key)] = NameObject(v)
    def _get_arr(self, key: str, default: Optional[list[Any]]) -> Optional[ArrayObject]:
        return self.get(key, None if default is None else ArrayObject(default))
    def _set_arr(self, key: str, v: Optional[ArrayObject]) -> None:
        if v is None:
            try:
                del self[NameObject(key)]
            except KeyError:
                pass
            return
        if not isinstance(v, ArrayObject):
            raise ValueError("ArrayObject is expected")
        self[NameObject(key)] = v
    def _get_int(self, key: str, default: Optional[NumberObject]) -> Optional[NumberObject]:
        return self.get(key, default)
    def _set_int(self, key: str, v: int) -> None:
        self[NameObject(key)] = NumberObject(v)
    @property
    def PRINT_SCALING(self) -> NameObject:
        return NameObject("/PrintScaling")
    def __new__(cls: Any, value: Any = None) -> "ViewerPreferences":
        def _add_prop_bool(key: str, default: Optional[BooleanObject]) -> property:
            return property(
                lambda self: self._get_bool(key, default),
                lambda self, v: self._set_bool(key, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined
            """,
            )
        def _add_prop_name(
            key: str, lst: list[str], default: Optional[NameObject]
        ) -> property:
            return property(
                lambda self: self._get_name(key, default),
                lambda self, v: self._set_name(key, lst, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined.
            Acceptable values: {lst}
            """,
            )
        def _add_prop_arr(key: str, default: Optional[ArrayObject]) -> property:
            return property(
                lambda self: self._get_arr(key, default),
                lambda self, v: self._set_arr(key, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined
            """,
            )
        def _add_prop_int(key: str, default: Optional[int]) -> property:
            return property(
                lambda self: self._get_int(key, default),
                lambda self, v: self._set_int(key, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined
            """,
            )
        cls.hide_toolbar = _add_prop_bool("/HideToolbar", f_obj)
        cls.hide_menubar = _add_prop_bool("/HideMenubar", f_obj)
        cls.hide_windowui = _add_prop_bool("/HideWindowUI", f_obj)
        cls.fit_window = _add_prop_bool("/FitWindow", f_obj)
        cls.center_window = _add_prop_bool("/CenterWindow", f_obj)
        cls.display_doctitle = _add_prop_bool("/DisplayDocTitle", f_obj)
        cls.non_fullscreen_pagemode = _add_prop_name(
            "/NonFullScreenPageMode",
            ["/UseNone", "/UseOutlines", "/UseThumbs", "/UseOC"],
            NameObject("/UseNone"),
        )
        cls.direction = _add_prop_name(
            "/Direction", ["/L2R", "/R2L"], NameObject("/L2R")
        )
        cls.view_area = _add_prop_name("/ViewArea", [], None)
        cls.view_clip = _add_prop_name("/ViewClip", [], None)
        cls.print_area = _add_prop_name("/PrintArea", [], None)
        cls.print_clip = _add_prop_name("/PrintClip", [], None)
        cls.print_scaling = _add_prop_name("/PrintScaling", [], None)
        cls.duplex = _add_prop_name(
            "/Duplex", ["/Simplex", "/DuplexFlipShortEdge", "/DuplexFlipLongEdge"], None
        )
        cls.pick_tray_by_pdfsize = _add_prop_bool("/PickTrayByPDFSize", None)
        cls.print_pagerange = _add_prop_arr("/PrintPageRange", None)
        cls.num_copies = _add_prop_int("/NumCopies", None)
        cls.enforce = _add_prop_arr("/Enforce", ArrayObject())
        return DictionaryObject.__new__(cls)
--- a/venv/lib/python3.12/site-packages/pypdf/pagerange.py
+++ b/venv/lib/python3.12/site-packages/pypdf/pagerange.py
@@ -0,0 +1,200 @@
 """
 Representation and utils for ranges of PDF file pages.
 Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
 All rights reserved. This software is available under a BSD license;
 see https://github.com/py-pdf/pypdf/blob/main/LICENSE
 """
 import re
 from typing import Any, Union
 from .errors import ParseError
 _INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
 PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$"
 # groups:         12     34     5 6     7 8
 class PageRange:
    """
    A slice-like representation of a range of page indices.
    For example, page numbers, only starting at zero.
    The syntax is like what you would put between brackets [ ].
    The slice is one of the few Python types that can't be subclassed,
    but this class converts to and from slices, and allows similar use.
      -  PageRange(str) parses a string representing a page range.
      -  PageRange(slice) directly "imports" a slice.
      -  to_slice() gives the equivalent slice.
      -  str() and repr() allow printing.
      -  indices(n) is like slice.indices(n).
    """
    def __init__(self, arg: Union[slice, "PageRange", str]) -> None:
        """
        Initialize with either a slice -- giving the equivalent page range,
        or a PageRange object -- making a copy,
        or a string like
            "int", "[int]:[int]" or "[int]:[int]:[int]",
            where the brackets indicate optional ints.
        Remember, page indices start with zero.
        Page range expression examples:
            :     all pages.                   -1    last page.
            22    just the 23rd page.          :-1   all but the last page.
            0:3   the first three pages.       -2    second-to-last page.
            :3    the first three pages.       -2:   last two pages.
            5:    from the sixth page onward.  -3:-1 third & second to last.
        The third, "stride" or "step" number is also recognized.
            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
            1:10:2    1 3 5 7 9                2::-1     2 1 0.
            ::-1      all pages in reverse order.
        Note the difference between this notation and arguments to slice():
            slice(3) means the first three pages;
            PageRange("3") means the range of only the fourth page.
            However PageRange(slice(3)) means the first three pages.
        """
        if isinstance(arg, slice):
            self._slice = arg
            return
        if isinstance(arg, PageRange):
            self._slice = arg.to_slice()
            return
        m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg)
        if not m:
            raise ParseError(arg)
        if m.group(2):
            # Special case: just an int means a range of one page.
            start = int(m.group(2))
            stop = start + 1 if start != -1 else None
            self._slice = slice(start, stop)
        else:
            self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)])
    @staticmethod
    def valid(input: Any) -> bool:
        """
        True if input is a valid initializer for a PageRange.
        Args:
            input: A possible PageRange string or a PageRange object.
        Returns:
            True, if the ``input`` is a valid PageRange.
        """
        return isinstance(input, (slice, PageRange)) or (
            isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input))
        )
    def to_slice(self) -> slice:
        """Return the slice equivalent of this page range."""
        return self._slice
    def __str__(self) -> str:
        """A string like "1:2:3"."""
        s = self._slice
        indices: Union[tuple[int, int], tuple[int, int, int]]
        if s.step is None:
            if s.start is not None and s.stop == s.start + 1:
                return str(s.start)
            indices = s.start, s.stop
        else:
            indices = s.start, s.stop, s.step
        return ":".join("" if i is None else str(i) for i in indices)
    def __repr__(self) -> str:
        """A string like "PageRange('1:2:3')"."""
        return "PageRange(" + repr(str(self)) + ")"
    def indices(self, n: int) -> tuple[int, int, int]:
        """
        Assuming a sequence of length n, calculate the start and stop indices,
        and the stride length of the PageRange.
        See help(slice.indices).
        Args:
            n:  the length of the list of pages to choose from.
        Returns:
            Arguments for range().
        """
        return self._slice.indices(n)
    def __eq__(self, other: object) -> bool:
        if not isinstance(other, PageRange):
            return False
        return self._slice == other._slice
    def __hash__(self) -> int:
        return hash((self.__class__, (self._slice.start, self._slice.stop, self._slice.step)))
    def __add__(self, other: "PageRange") -> "PageRange":
        if not isinstance(other, PageRange):
            raise TypeError(f"Can't add PageRange and {type(other)}")
        if self._slice.step is not None or other._slice.step is not None:
            raise ValueError("Can't add PageRange with stride")
        a = self._slice.start, self._slice.stop
        b = other._slice.start, other._slice.stop
        if a[0] > b[0]:
            a, b = b, a
        # Now a[0] is the smallest
        if b[0] > a[1]:
            # There is a gap between a and b.
            raise ValueError("Can't add PageRanges with gap")
        return PageRange(slice(a[0], max(a[1], b[1])))
 PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
 def parse_filename_page_ranges(
    args: list[Union[str, PageRange, None]]
 ) -> list[tuple[str, PageRange]]:
    """
    Given a list of filenames and page ranges, return a list of (filename, page_range) pairs.
    Args:
        args: A list where the first element is a filename. The other elements are
            filenames, page-range expressions, slice objects, or PageRange objects.
            A filename not followed by a page range indicates all pages of the file.
    Returns:
        A list of (filename, page_range) pairs.
    """
    pairs: list[tuple[str, PageRange]] = []
    pdf_filename: Union[str, None] = None
    did_page_range = False
    for arg in [*args, None]:
        if PageRange.valid(arg):
            if not pdf_filename:
                raise ValueError(
                    "The first argument must be a filename, not a page range."
                )
            assert arg is not None
            pairs.append((pdf_filename, PageRange(arg)))
            did_page_range = True
        else:
            # New filename or end of list - use the complete previous file?
            if pdf_filename and not did_page_range:
                pairs.append((pdf_filename, PAGE_RANGE_ALL))
            assert not isinstance(arg, PageRange), arg
            pdf_filename = arg
            did_page_range = False
    return pairs
 PageRangeSpec = Union[str, PageRange, tuple[int, int], tuple[int, int, int], list[int]]
--- a/venv/lib/python3.12/site-packages/pypdf/papersizes.py
+++ b/venv/lib/python3.12/site-packages/pypdf/papersizes.py
@@ -0,0 +1,52 @@
 """Helper to get paper sizes."""
 from typing import NamedTuple
 class Dimensions(NamedTuple):
    width: int
    height: int
 class PaperSize:
    """(width, height) of the paper in portrait mode in pixels at 72 ppi."""
    # Notes of how to calculate it:
    # 1. Get the size of the paper in millimeters
    # 2. Convert it to inches (25.4 millimeters is equal to 1 inch)
    # 3. Convert it to pixels at 72dpi (1 inch is equal to 72 pixels)
    # All Din-A paper sizes follow this pattern:
    # 2 x A(n - 1) = A(n)
    # So the height of the next bigger one is the width of the smaller one
    # The ratio is always approximately 1:2**0.5
    # Additionally, A0 is defined to have an area of 1 m**2
    # https://en.wikipedia.org/wiki/ISO_216
    # Be aware of rounding issues!
    A0 = Dimensions(2384, 3370)  # 841mm x 1189mm
    A1 = Dimensions(1684, 2384)
    A2 = Dimensions(1191, 1684)
    A3 = Dimensions(842, 1191)
    A4 = Dimensions(
        595, 842
    )  # Printer paper, documents - this is by far the most common
    A5 = Dimensions(420, 595)  # Paperback books
    A6 = Dimensions(298, 420)  # Postcards
    A7 = Dimensions(210, 298)
    A8 = Dimensions(147, 210)
    # Envelopes
    C4 = Dimensions(649, 918)
 _din_a = (
    PaperSize.A0,
    PaperSize.A1,
    PaperSize.A2,
    PaperSize.A3,
    PaperSize.A4,
    PaperSize.A5,
    PaperSize.A6,
    PaperSize.A7,
    PaperSize.A8,
 )
--- a/venv/lib/python3.12/site-packages/pypdf/py.typed
+++ b/venv/lib/python3.12/site-packages/pypdf/py.typed
--- a/venv/lib/python3.12/site-packages/pypdf/types.py
+++ b/venv/lib/python3.12/site-packages/pypdf/types.py
@@ -0,0 +1,80 @@
 """Helpers for working with PDF types."""
 import sys
 from typing import Literal, Union
 if sys.version_info[:2] >= (3, 10):
    # Python 3.10+: https://www.python.org/dev/peps/pep-0484
    from typing import TypeAlias
 else:
    from typing_extensions import TypeAlias
 from .generic._base import NameObject, NullObject, NumberObject
 from .generic._data_structures import ArrayObject, Destination
 from .generic._outline import OutlineItem
 BorderArrayType: TypeAlias = list[Union[NameObject, NumberObject, ArrayObject]]
 OutlineItemType: TypeAlias = Union[OutlineItem, Destination]
 FitType: TypeAlias = Literal[
    "/XYZ", "/Fit", "/FitH", "/FitV", "/FitR", "/FitB", "/FitBH", "/FitBV"
 ]
 # These go with the FitType, they specify values for the fit
 ZoomArgType: TypeAlias = Union[NumberObject, NullObject, float]
 ZoomArgsType: TypeAlias = list[ZoomArgType]
 # Recursive types like the following are not yet supported by Sphinx:
 #    OutlineType = List[Union[Destination, "OutlineType"]]
 # Hence use this for the moment:
 OutlineType = list[Union[Destination, list[Union[Destination, list[Destination]]]]]
 LayoutType: TypeAlias = Literal[
    "/NoLayout",
    "/SinglePage",
    "/OneColumn",
    "/TwoColumnLeft",
    "/TwoColumnRight",
    "/TwoPageLeft",
    "/TwoPageRight",
 ]
 PagemodeType: TypeAlias = Literal[
    "/UseNone",
    "/UseOutlines",
    "/UseThumbs",
    "/FullScreen",
    "/UseOC",
    "/UseAttachments",
 ]
 AnnotationSubtype: TypeAlias = Literal[
    "/Text",
    "/Link",
    "/FreeText",
    "/Line",
    "/Square",
    "/Circle",
    "/Polygon",
    "/PolyLine",
    "/Highlight",
    "/Underline",
    "/Squiggly",
    "/StrikeOut",
    "/Caret",
    "/Stamp",
    "/Ink",
    "/Popup",
    "/FileAttachment",
    "/Sound",
    "/Movie",
    "/Screen",
    "/Widget",
    "/PrinterMark",
    "/TrapNet",
    "/Watermark",
    "/3D",
    "/Redact",
    "/Projection",
    "/RichMedia",
 ]
--- a/venv/lib/python3.12/site-packages/pypdf/xmp.py
+++ b/venv/lib/python3.12/site-packages/pypdf/xmp.py
@@ -0,0 +1,748 @@
 """
 Anything related to Extensible Metadata Platform (XMP) metadata.
 https://en.wikipedia.org/wiki/Extensible_Metadata_Platform
 """
 import datetime
 import decimal
 import re
 from collections.abc import Iterator
 from typing import (
    Any,
    Callable,
    Optional,
    TypeVar,
    Union,
 )
 from xml.dom.minidom import Document, parseString
 from xml.dom.minidom import Element as XmlElement
 from xml.parsers.expat import ExpatError
 from ._protocols import XmpInformationProtocol
 from ._utils import StreamType, deprecate_with_replacement, deprecation_no_replacement
 from .errors import PdfReadError, XmpDocumentError
 from .generic import ContentStream, PdfObject
 RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
 DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
 XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
 PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
 XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
 # What is the PDFX namespace, you might ask?
 # It's documented here: https://github.com/adobe/xmp-docs/raw/master/XMPSpecifications/XMPSpecificationPart3.pdf
 # This namespace is used to place "custom metadata"
 # properties, which are arbitrary metadata properties with no semantic or
 # documented meaning.
 #
 # Elements in the namespace are key/value-style storage,
 # where the element name is the key and the content is the value. The keys
 # are transformed into valid XML identifiers by substituting an invalid
 # identifier character with \u2182 followed by the unicode hex ID of the
 # original character. A key like "my car" is therefore "my\u21820020car".
 #
 # \u2182 is the unicode character \u{ROMAN NUMERAL TEN THOUSAND}
 #
 # The pdfx namespace should be avoided.
 # A custom data schema and sensical XML elements could be used instead, as is
 # suggested by Adobe's own documentation on XMP under "Extensibility of
 # Schemas".
 PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
 # PDF/A
 PDFAID_NAMESPACE = "http://www.aiim.org/pdfa/ns/id/"
 # Internal mapping of namespace URI → prefix
 _NAMESPACE_PREFIX_MAP = {
    DC_NAMESPACE: "dc",
    XMP_NAMESPACE: "xmp",
    PDF_NAMESPACE: "pdf",
    XMPMM_NAMESPACE: "xmpMM",
    PDFAID_NAMESPACE: "pdfaid",
    PDFX_NAMESPACE: "pdfx",
 }
 iso8601 = re.compile(
    """
        (?P<year>[0-9]{4})
        (-
            (?P<month>[0-9]{2})
            (-
                (?P<day>[0-9]+)
                (T
                    (?P<hour>[0-9]{2}):
                    (?P<minute>[0-9]{2})
                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
                )?
            )?
        )?
        """,
    re.VERBOSE,
 )
 K = TypeVar("K")
 # Minimal XMP template
 _MINIMAL_XMP = f"""<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>
 <x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pypdf">
    <rdf:RDF xmlns:rdf="{RDF_NAMESPACE}">
        <rdf:Description rdf:about=""
            xmlns:dc="{DC_NAMESPACE}"
            xmlns:xmp="{XMP_NAMESPACE}"
            xmlns:pdf="{PDF_NAMESPACE}"
            xmlns:xmpMM="{XMPMM_NAMESPACE}"
            xmlns:pdfaid="{PDFAID_NAMESPACE}"
            xmlns:pdfx="{PDFX_NAMESPACE}">
        </rdf:Description>
    </rdf:RDF>
 </x:xmpmeta>
 <?xpacket end="r"?>"""
 def _identity(value: K) -> K:
    return value
 def _converter_date(value: str) -> datetime.datetime:
    matches = iso8601.match(value)
    if matches is None:
        raise ValueError(f"Invalid date format: {value}")
    year = int(matches.group("year"))
    month = int(matches.group("month") or "1")
    day = int(matches.group("day") or "1")
    hour = int(matches.group("hour") or "0")
    minute = int(matches.group("minute") or "0")
    second = decimal.Decimal(matches.group("second") or "0")
    seconds_dec = second.to_integral(decimal.ROUND_FLOOR)
    milliseconds_dec = (second - seconds_dec) * 1_000_000
    seconds = int(seconds_dec)
    milliseconds = int(milliseconds_dec)
    tzd = matches.group("tzd") or "Z"
    dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
    if tzd != "Z":
        tzd_hours, tzd_minutes = (int(x) for x in tzd.split(":"))
        tzd_hours *= -1
        if tzd_hours < 0:
            tzd_minutes *= -1
        dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
    return dt
 def _format_datetime_utc(value: datetime.datetime) -> str:
    """Format a datetime as UTC with trailing 'Z'.
    - If the input is timezone-aware, convert to UTC first.
    - If naive, assume UTC.
    """
    if value.tzinfo is not None and value.utcoffset() is not None:
        value = value.astimezone(datetime.timezone.utc)
    value = value.replace(tzinfo=None)
    return value.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
 def _generic_get(
        element: XmlElement, self: "XmpInformation", list_type: str, converter: Callable[[Any], Any] = _identity
 ) -> Optional[list[str]]:
    containers = element.getElementsByTagNameNS(RDF_NAMESPACE, list_type)
    retval: list[Any] = []
    if len(containers):
        for container in containers:
            for item in container.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                value = self._get_text(item)
                value = converter(value)
                retval.append(value)
        return retval
    return None
 class XmpInformation(XmpInformationProtocol, PdfObject):
    """
    An object that represents Extensible Metadata Platform (XMP) metadata.
    Usually accessed by :py:attr:`xmp_metadata()<pypdf.PdfReader.xmp_metadata>`.
    Raises:
      PdfReadError: if XML is invalid
    """
    def __init__(self, stream: ContentStream) -> None:
        self.stream = stream
        try:
            data = self.stream.get_data()
            doc_root: Document = parseString(data)  # noqa: S318
        except (AttributeError, ExpatError) as e:
            raise PdfReadError(f"XML in XmpInformation was invalid: {e}")
        self.rdf_root: XmlElement = doc_root.getElementsByTagNameNS(
            RDF_NAMESPACE, "RDF"
        )[0]
        self.cache: dict[Any, Any] = {}
    @classmethod
    def create(cls) -> "XmpInformation":
        """
        Create a new XmpInformation object with minimal structure.
        Returns:
            A new XmpInformation instance with empty metadata fields.
        """
        stream = ContentStream(None, None)
        stream.set_data(_MINIMAL_XMP.encode("utf-8"))
        return cls(stream)
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        deprecate_with_replacement(
            "XmpInformation.write_to_stream",
            "PdfWriter.xmp_metadata",
            "6.0.0"
        )
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        self.stream.write_to_stream(stream)
    def get_element(self, about_uri: str, namespace: str, name: str) -> Iterator[Any]:
        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
                attr = desc.getAttributeNodeNS(namespace, name)
                if attr is not None:
                    yield attr
                yield from desc.getElementsByTagNameNS(namespace, name)
    def get_nodes_in_namespace(self, about_uri: str, namespace: str) -> Iterator[Any]:
        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
                for i in range(desc.attributes.length):
                    attr = desc.attributes.item(i)
                    if attr and attr.namespaceURI == namespace:
                        yield attr
                for child in desc.childNodes:
                    if child.namespaceURI == namespace:
                        yield child
    def _get_text(self, element: XmlElement) -> str:
        text = ""
        for child in element.childNodes:
            if child.nodeType == child.TEXT_NODE:
                text += child.data
        return text
    def _get_single_value(
        self,
        namespace: str,
        name: str,
        converter: Callable[[str], Any] = _identity,
    ) -> Optional[Any]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        value = None
        for element in self.get_element("", namespace, name):
            if element.nodeType == element.ATTRIBUTE_NODE:
                value = element.nodeValue
            else:
                value = self._get_text(element)
            break
        if value is not None:
            value = converter(value)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = value
        return value
    def _getter_bag(self, namespace: str, name: str) -> Optional[list[str]]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        retval: list[str] = []
        for element in self.get_element("", namespace, name):
            if (bags := _generic_get(element, self, list_type="Bag")) is not None:
                retval.extend(bags)
            else:
                value = self._get_text(element)
                retval.append(value)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = retval
        return retval
    def _get_seq_values(
        self,
        namespace: str,
        name: str,
        converter: Callable[[Any], Any] = _identity,
    ) -> Optional[list[Any]]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        retval: list[Any] = []
        for element in self.get_element("", namespace, name):
            if (seqs := _generic_get(element, self, list_type="Seq", converter=converter)) is not None:
                retval.extend(seqs)
            elif (bags := _generic_get(element, self, list_type="Bag")) is not None:
                # See issue at https://github.com/py-pdf/pypdf/issues/3324
                # Some applications violate the XMP metadata standard regarding `dc:creator` which should
                # be an "ordered array" and thus a sequence, but use an unordered array (bag) instead.
                # This seems to stem from the fact that the original Dublin Core specification does indeed
                # use bags or direct values, while PDFs are expected to follow the XMP standard and ignore
                # the plain Dublin Core variant. For this reason, add a fallback here to deal with such
                # issues accordingly.
                retval.extend(bags)
            else:
                value = converter(self._get_text(element))
                retval.append(value)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = retval
        return retval
    def _get_langalt_values(self, namespace: str, name: str) -> Optional[dict[Any, Any]]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        retval: dict[Any, Any] = {}
        for element in self.get_element("", namespace, name):
            alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
            if len(alts):
                for alt in alts:
                    for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                        value = self._get_text(item)
                        retval[item.getAttribute("xml:lang")] = value
            else:
                retval["x-default"] = self._get_text(element)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = retval
        return retval
    @property
    def dc_contributor(self) -> Optional[list[str]]:
        """Contributors to the resource (other than the authors)."""
        return self._getter_bag(DC_NAMESPACE, "contributor")
    @dc_contributor.setter
    def dc_contributor(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "contributor", values)
    @property
    def dc_coverage(self) -> Optional[str]:
        """Text describing the extent or scope of the resource."""
        return self._get_single_value(DC_NAMESPACE, "coverage")
    @dc_coverage.setter
    def dc_coverage(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "coverage", value)
    @property
    def dc_creator(self) -> Optional[list[str]]:
        """A sorted array of names of the authors of the resource, listed in order of precedence."""
        return self._get_seq_values(DC_NAMESPACE, "creator")
    @dc_creator.setter
    def dc_creator(self, values: Optional[list[str]]) -> None:
        self._set_seq_values(DC_NAMESPACE, "creator", values)
    @property
    def dc_date(self) -> Optional[list[datetime.datetime]]:
        """A sorted array of dates of significance to the resource. The dates and times are in UTC."""
        return self._get_seq_values(DC_NAMESPACE, "date", _converter_date)
    @dc_date.setter
    def dc_date(self, values: Optional[list[Union[str, datetime.datetime]]]) -> None:
        if values is None:
            self._set_seq_values(DC_NAMESPACE, "date", None)
        else:
            date_strings = []
            for value in values:
                if isinstance(value, datetime.datetime):
                    date_strings.append(_format_datetime_utc(value))
                else:
                    date_strings.append(str(value))
            self._set_seq_values(DC_NAMESPACE, "date", date_strings)
    @property
    def dc_description(self) -> Optional[dict[str, str]]:
        """A language-keyed dictionary of textual descriptions of the content of the resource."""
        return self._get_langalt_values(DC_NAMESPACE, "description")
    @dc_description.setter
    def dc_description(self, values: Optional[dict[str, str]]) -> None:
        self._set_langalt_values(DC_NAMESPACE, "description", values)
    @property
    def dc_format(self) -> Optional[str]:
        """The mime-type of the resource."""
        return self._get_single_value(DC_NAMESPACE, "format")
    @dc_format.setter
    def dc_format(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "format", value)
    @property
    def dc_identifier(self) -> Optional[str]:
        """Unique identifier of the resource."""
        return self._get_single_value(DC_NAMESPACE, "identifier")
    @dc_identifier.setter
    def dc_identifier(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "identifier", value)
    @property
    def dc_language(self) -> Optional[list[str]]:
        """An unordered array specifying the languages used in the resource."""
        return self._getter_bag(DC_NAMESPACE, "language")
    @dc_language.setter
    def dc_language(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "language", values)
    @property
    def dc_publisher(self) -> Optional[list[str]]:
        """An unordered array of publisher names."""
        return self._getter_bag(DC_NAMESPACE, "publisher")
    @dc_publisher.setter
    def dc_publisher(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "publisher", values)
    @property
    def dc_relation(self) -> Optional[list[str]]:
        """An unordered array of text descriptions of relationships to other documents."""
        return self._getter_bag(DC_NAMESPACE, "relation")
    @dc_relation.setter
    def dc_relation(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "relation", values)
    @property
    def dc_rights(self) -> Optional[dict[str, str]]:
        """A language-keyed dictionary of textual descriptions of the rights the user has to this resource."""
        return self._get_langalt_values(DC_NAMESPACE, "rights")
    @dc_rights.setter
    def dc_rights(self, values: Optional[dict[str, str]]) -> None:
        self._set_langalt_values(DC_NAMESPACE, "rights", values)
    @property
    def dc_source(self) -> Optional[str]:
        """Unique identifier of the work from which this resource was derived."""
        return self._get_single_value(DC_NAMESPACE, "source")
    @dc_source.setter
    def dc_source(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "source", value)
    @property
    def dc_subject(self) -> Optional[list[str]]:
        """An unordered array of descriptive phrases or keywords that specify the topic of the content."""
        return self._getter_bag(DC_NAMESPACE, "subject")
    @dc_subject.setter
    def dc_subject(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "subject", values)
    @property
    def dc_title(self) -> Optional[dict[str, str]]:
        """A language-keyed dictionary of the title of the resource."""
        return self._get_langalt_values(DC_NAMESPACE, "title")
    @dc_title.setter
    def dc_title(self, values: Optional[dict[str, str]]) -> None:
        self._set_langalt_values(DC_NAMESPACE, "title", values)
    @property
    def dc_type(self) -> Optional[list[str]]:
        """An unordered array of textual descriptions of the document type."""
        return self._getter_bag(DC_NAMESPACE, "type")
    @dc_type.setter
    def dc_type(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "type", values)
    @property
    def pdf_keywords(self) -> Optional[str]:
        """An unformatted text string representing document keywords."""
        return self._get_single_value(PDF_NAMESPACE, "Keywords")
    @pdf_keywords.setter
    def pdf_keywords(self, value: Optional[str]) -> None:
        self._set_single_value(PDF_NAMESPACE, "Keywords", value)
    @property
    def pdf_pdfversion(self) -> Optional[str]:
        """The PDF file version, for example 1.0 or 1.3."""
        return self._get_single_value(PDF_NAMESPACE, "PDFVersion")
    @pdf_pdfversion.setter
    def pdf_pdfversion(self, value: Optional[str]) -> None:
        self._set_single_value(PDF_NAMESPACE, "PDFVersion", value)
    @property
    def pdf_producer(self) -> Optional[str]:
        """The name of the tool that saved the document as a PDF."""
        return self._get_single_value(PDF_NAMESPACE, "Producer")
    @pdf_producer.setter
    def pdf_producer(self, value: Optional[str]) -> None:
        self._set_single_value(PDF_NAMESPACE, "Producer", value)
    @property
    def xmp_create_date(self) -> Optional[datetime.datetime]:
        """The date and time the resource was originally created. Returned as a UTC datetime object."""
        return self._get_single_value(XMP_NAMESPACE, "CreateDate", _converter_date)
    @xmp_create_date.setter
    def xmp_create_date(self, value: Optional[datetime.datetime]) -> None:
        if value:
            date_str = _format_datetime_utc(value)
            self._set_single_value(XMP_NAMESPACE, "CreateDate", date_str)
        else:
            self._set_single_value(XMP_NAMESPACE, "CreateDate", None)
    @property
    def xmp_modify_date(self) -> Optional[datetime.datetime]:
        """The date and time the resource was last modified. Returned as a UTC datetime object."""
        return self._get_single_value(XMP_NAMESPACE, "ModifyDate", _converter_date)
    @xmp_modify_date.setter
    def xmp_modify_date(self, value: Optional[datetime.datetime]) -> None:
        if value:
            date_str = _format_datetime_utc(value)
            self._set_single_value(XMP_NAMESPACE, "ModifyDate", date_str)
        else:
            self._set_single_value(XMP_NAMESPACE, "ModifyDate", None)
    @property
    def xmp_metadata_date(self) -> Optional[datetime.datetime]:
        """The date and time that any metadata for this resource was last changed. Returned as a UTC datetime object."""
        return self._get_single_value(XMP_NAMESPACE, "MetadataDate", _converter_date)
    @xmp_metadata_date.setter
    def xmp_metadata_date(self, value: Optional[datetime.datetime]) -> None:
        if value:
            date_str = _format_datetime_utc(value)
            self._set_single_value(XMP_NAMESPACE, "MetadataDate", date_str)
        else:
            self._set_single_value(XMP_NAMESPACE, "MetadataDate", None)
    @property
    def xmp_creator_tool(self) -> Optional[str]:
        """The name of the first known tool used to create the resource."""
        return self._get_single_value(XMP_NAMESPACE, "CreatorTool")
    @xmp_creator_tool.setter
    def xmp_creator_tool(self, value: Optional[str]) -> None:
        self._set_single_value(XMP_NAMESPACE, "CreatorTool", value)
    @property
    def xmpmm_document_id(self) -> Optional[str]:
        """The common identifier for all versions and renditions of this resource."""
        return self._get_single_value(XMPMM_NAMESPACE, "DocumentID")
    @xmpmm_document_id.setter
    def xmpmm_document_id(self, value: Optional[str]) -> None:
        self._set_single_value(XMPMM_NAMESPACE, "DocumentID", value)
    @property
    def xmpmm_instance_id(self) -> Optional[str]:
        """An identifier for a specific incarnation of a document, updated each time a file is saved."""
        return self._get_single_value(XMPMM_NAMESPACE, "InstanceID")
    @xmpmm_instance_id.setter
    def xmpmm_instance_id(self, value: Optional[str]) -> None:
        self._set_single_value(XMPMM_NAMESPACE, "InstanceID", value)
    @property
    def pdfaid_part(self) -> Optional[str]:
        """The part of the PDF/A standard that the document conforms to (e.g., 1, 2, 3)."""
        return self._get_single_value(PDFAID_NAMESPACE, "part")
    @pdfaid_part.setter
    def pdfaid_part(self, value: Optional[str]) -> None:
        self._set_single_value(PDFAID_NAMESPACE, "part", value)
    @property
    def pdfaid_conformance(self) -> Optional[str]:
        """The conformance level within the PDF/A standard (e.g., 'A', 'B', 'U')."""
        return self._get_single_value(PDFAID_NAMESPACE, "conformance")
    @pdfaid_conformance.setter
    def pdfaid_conformance(self, value: Optional[str]) -> None:
        self._set_single_value(PDFAID_NAMESPACE, "conformance", value)
    @property
    def custom_properties(self) -> dict[Any, Any]:
        """
        Retrieve custom metadata properties defined in the undocumented pdfx
        metadata schema.
        Returns:
            A dictionary of key/value items for custom metadata properties.
        """
        if not hasattr(self, "_custom_properties"):
            self._custom_properties = {}
            for node in self.get_nodes_in_namespace("", PDFX_NAMESPACE):
                key = node.localName
                while True:
                    # see documentation about PDFX_NAMESPACE earlier in file
                    idx = key.find("\u2182")
                    if idx == -1:
                        break
                    key = (
                        key[:idx]
                        + chr(int(key[idx + 1 : idx + 5], base=16))
                        + key[idx + 5 :]
                    )
                if node.nodeType == node.ATTRIBUTE_NODE:
                    value = node.nodeValue
                else:
                    value = self._get_text(node)
                self._custom_properties[key] = value
        return self._custom_properties
    def _get_or_create_description(self, about_uri: str = "") -> XmlElement:
        """Get or create an rdf:Description element with the given about URI."""
        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
                return desc
        doc = self.rdf_root.ownerDocument
        if doc is None:
            raise XmpDocumentError("XMP Document is None")
        desc = doc.createElementNS(RDF_NAMESPACE, "rdf:Description")
        desc.setAttributeNS(RDF_NAMESPACE, "rdf:about", about_uri)
        self.rdf_root.appendChild(desc)
        return desc
    def _clear_cache_entry(self, namespace: str, name: str) -> None:
        """Remove a cached value for a given namespace/name if present."""
        ns_cache = self.cache.get(namespace)
        if ns_cache and name in ns_cache:
            del ns_cache[name]
    def _set_single_value(self, namespace: str, name: str, value: Optional[str]) -> None:
        """Set or remove a single metadata value."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()
        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)
        if existing_attr := desc.getAttributeNodeNS(namespace, name):
            desc.removeAttributeNode(existing_attr)
        if value is not None:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            text_node = doc.createTextNode(str(value))
            elem.appendChild(text_node)
            desc.appendChild(elem)
        self._update_stream()
    def _set_bag_values(self, namespace: str, name: str, values: Optional[list[str]]) -> None:
        """Set or remove bag values (unordered array)."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()
        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)
        if values:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            bag = doc.createElementNS(RDF_NAMESPACE, "rdf:Bag")
            for value in values:
                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
                text_node = doc.createTextNode(str(value))
                li.appendChild(text_node)
                bag.appendChild(li)
            elem.appendChild(bag)
            desc.appendChild(elem)
        self._update_stream()
    def _set_seq_values(self, namespace: str, name: str, values: Optional[list[str]]) -> None:
        """Set or remove sequence values (ordered array)."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()
        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)
        if values:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            seq = doc.createElementNS(RDF_NAMESPACE, "rdf:Seq")
            for value in values:
                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
                text_node = doc.createTextNode(str(value))
                li.appendChild(text_node)
                seq.appendChild(li)
            elem.appendChild(seq)
            desc.appendChild(elem)
        self._update_stream()
    def _set_langalt_values(self, namespace: str, name: str, values: Optional[dict[str, str]]) -> None:
        """Set or remove language alternative values."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()
        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)
        if values:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            alt = doc.createElementNS(RDF_NAMESPACE, "rdf:Alt")
            for lang, value in values.items():
                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
                li.setAttribute("xml:lang", lang)
                text_node = doc.createTextNode(str(value))
                li.appendChild(text_node)
                alt.appendChild(li)
            elem.appendChild(alt)
            desc.appendChild(elem)
        self._update_stream()
    def _get_namespace_prefix(self, namespace: str) -> str:
        """Get the appropriate namespace prefix for a given namespace URI."""
        return _NAMESPACE_PREFIX_MAP.get(namespace, "unknown")
    def _update_stream(self) -> None:
        """Update the stream with the current XML content."""
        doc = self.rdf_root.ownerDocument
        if doc is None:
            raise XmpDocumentError("XMP Document is None")
        xml_data = doc.toxml(encoding="utf-8")
        self.stream.set_data(xml_data)