Update ashboard, dashboard, memory +1 more (+2 ~3)

2026-02-02 22:27:24 +00:00
parent 4f00131184
commit b0c9b254f1
65 changed files with 42112 additions and 53 deletions
--- a/dashboard/index.html
+++ b/dashboard/index.html
@@ -687,6 +687,11 @@
            border-color: var(--accent);
        }

+        .issue-checkbox.in-progress {
+            background: rgba(59, 130, 246, 0.3);
+            border-color: #3b82f6;
+        }
+
        .issue-checkbox svg {
            width: 12px;
            height: 12px;
@@ -698,6 +703,14 @@
            display: block;
        }

+        .issue-checkbox.in-progress::after {
+            content: '';
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+            background: #3b82f6;
+        }
+
        .issue-content {
            flex: 1;
            min-width: 0;
@@ -738,6 +751,27 @@
        .issue-owner.marius { color: #22c55e; }
        .issue-owner.robert { color: #f59e0b; }

+        .issue-status {
+            padding: 2px 8px;
+            border-radius: var(--radius-sm);
+            font-size: 11px;
+            font-weight: 600;
+            text-transform: uppercase;
+            letter-spacing: 0.3px;
+        }
+        .issue-status.todo {
+            background: rgba(156, 163, 175, 0.2);
+            color: #9ca3af;
+        }
+        .issue-status.in-progress {
+            background: rgba(59, 130, 246, 0.2);
+            color: #3b82f6;
+        }
+        .issue-status.done {
+            background: rgba(34, 197, 94, 0.2);
+            color: #22c55e;
+        }
+
        /* Todo's Panel */
        .todos-panel { border-left: 3px solid #8b5cf6; }
        .todo-section { margin-bottom: 16px; }
@@ -1266,10 +1300,21 @@
                        <option value="backlog">⚪ Backlog</option>
                    </select>
                </div>
+                <div class="form-group">
+                    <label class="form-label">Status</label>
+                    <select class="input" id="issueStatus">
+                        <option value="todo">Todo</option>
+                        <option value="in-progress">In Progress</option>
+                        <option value="done">Done</option>
+                    </select>
+                </div>
+            </div>
+            <div class="form-row">
                <div class="form-group">
                    <label class="form-label">Deadline</label>
                    <input type="date" class="input" id="issueDeadline">
                </div>
+                <div class="form-group"></div>
            </div>
            <div class="modal-actions">
                <button class="btn btn-danger" id="issueDeleteBtn" onclick="deleteIssue()" style="margin-right: auto; display: none;">Șterge</button>
@@ -2129,7 +2174,6 @@
                    <div class="priority-group">
                        <div class="priority-header ${isCollapsed ? 'collapsed' : ''}" onclick="togglePriority('${priority}')">
                            <i data-lucide="chevron-down"></i>
-                            <span class="priority-dot ${priority}"></span>
                            <span>${priorityLabels[priority]}</span>
                            <span style="margin-left: auto; opacity: 0.7">${todoCount}/${issues.length}</span>
                        </div>
@@ -2146,18 +2190,23 @@

        function renderIssueItem(issue) {
            const isDone = issue.status === 'done';
+            const isInProgress = issue.status === 'in-progress';
            const ownerIcons = { 'clawdbot': '🤖', 'robert': '👷', 'marius': '👤' };
            const ownerIcon = ownerIcons[issue.owner] || '👤';
            const dateStr = new Date(issue.created).toLocaleDateString('ro-RO', { day: 'numeric', month: 'short' });
+            const statusLabels = { 'todo': 'Todo', 'in-progress': 'In Progress', 'done': 'Done' };
+            const statusLabel = statusLabels[issue.status] || 'Todo';
+            const checkboxClass = isDone ? 'checked' : (isInProgress ? 'in-progress' : '');

            return `
                <div class="issue-item ${isDone ? 'done' : ''}" data-id="${issue.id}">
-                    <div class="issue-checkbox ${isDone ? 'checked' : ''}" onclick="toggleIssue('${issue.id}')">
+                    <div class="issue-checkbox ${checkboxClass}" onclick="toggleIssue('${issue.id}')" title="Click pentru a schimba statusul">
                        <i data-lucide="check"></i>
                    </div>
                    <div class="issue-content" onclick="editIssue('${issue.id}')">
                        <div class="issue-title">${issue.title}</div>
                        <div class="issue-meta">
+                            <span class="issue-status ${issue.status || 'todo'}">${statusLabel}</span>
                            ${issue.program ? `<span class="issue-tag program">${issue.program}</span>` : ''}
                            <span class="issue-owner ${issue.owner}">${ownerIcon} ${issue.owner === 'clawdbot' ? 'Clawdbot' : (issue.owner === 'robert' ? 'Robert' : 'Marius')}</span>
                            <span class="issue-date">${dateStr}</span>
@@ -2180,17 +2229,27 @@
            const issue = issuesData.issues.find(i => i.id === id);
            if (!issue) return;

-            issue.status = issue.status === 'done' ? 'todo' : 'done';
+            // Cycle: todo → in-progress → done → todo
+            const statusCycle = { 'todo': 'in-progress', 'in-progress': 'done', 'done': 'todo' };
+            const currentStatus = issue.status || 'todo';
+            issue.status = statusCycle[currentStatus] || 'in-progress';
+            
            if (issue.status === 'done') {
                issue.completed = new Date().toISOString();
            } else {
                delete issue.completed;
            }

+            const statusMessages = { 
+                'in-progress': '🔄 In Progress', 
+                'done': '✅ Done!', 
+                'todo': '📋 Todo' 
+            };
+
            renderIssues();
            updateIssuesCount();
            await saveIssues();
-            showToast(issue.status === 'done' ? 'Issue finalizat! ✓' : 'Issue redeschis');
+            showToast(statusMessages[issue.status]);
        }

        // Filters
@@ -2212,6 +2271,7 @@
            document.getElementById('issueProgram').value = '';
            document.getElementById('issueOwner').value = 'marius';
            document.getElementById('issuePriority').value = 'urgent-important';
+            document.getElementById('issueStatus').value = 'todo';
            document.getElementById('issueDeadline').value = '';
            document.getElementById('issueDeleteBtn').style.display = 'none';
            document.getElementById('issueSaveBtn').textContent = 'Adaugă';
@@ -2230,6 +2290,7 @@
            document.getElementById('issueProgram').value = issue.program || '';
            document.getElementById('issueOwner').value = issue.owner || 'marius';
            document.getElementById('issuePriority').value = issue.priority || 'backlog';
+            document.getElementById('issueStatus').value = issue.status || 'todo';
            document.getElementById('issueDeadline').value = issue.deadline || '';
            document.getElementById('issueDeleteBtn').style.display = 'block';
            document.getElementById('issueSaveBtn').textContent = 'Salvează';
@@ -2272,6 +2333,13 @@
                    issue.program = document.getElementById('issueProgram').value;
                    issue.owner = document.getElementById('issueOwner').value;
                    issue.priority = document.getElementById('issuePriority').value;
+                    const newStatus = document.getElementById('issueStatus').value;
+                    if (newStatus === 'done' && issue.status !== 'done') {
+                        issue.completed = new Date().toISOString();
+                    } else if (newStatus !== 'done') {
+                        delete issue.completed;
+                    }
+                    issue.status = newStatus;
                    issue.deadline = document.getElementById('issueDeadline').value || null;
                    issue.updated = new Date().toISOString();
                }
@@ -2285,7 +2353,7 @@
                    program: document.getElementById('issueProgram').value,
                    owner: document.getElementById('issueOwner').value,
                    priority: document.getElementById('issuePriority').value,
-                    status: 'todo',
+                    status: document.getElementById('issueStatus').value || 'todo',
                    created: new Date().toISOString(),
                    deadline: document.getElementById('issueDeadline').value || null
                };
--- a/dashboard/issues.json
+++ b/dashboard/issues.json
@@ -1,5 +1,5 @@
 {
-  "lastUpdated": "2026-02-02T11:25:18.119Z",
+  "lastUpdated": "2026-02-02T22:27:06.452Z",
  "programs": [
    "ROACONT",
    "ROAGEST",
@@ -23,7 +23,8 @@
      "priority": "urgent-important",
      "status": "todo",
      "created": "2026-02-02T11:25:18.115Z",
-      "deadline": "2026-02-02"
+      "deadline": "2026-02-02",
+      "updated": "2026-02-02T22:27:06.428Z"
    },
    {
      "id": "ROA-001",
@@ -31,10 +32,11 @@
      "description": "RD 49 = în urma inspecției fiscale\nRD 50 = impozit precedent\nFormularul nu recalculează impozitul de 16%\nRD 40 se modifică și la 4.1",
      "program": "ROACONT",
      "owner": "marius",
-      "priority": "urgent-important",
+      "priority": "important",
      "status": "todo",
      "created": "2026-01-30T15:10:00Z",
-      "deadline": null
+      "deadline": "2026-02-06",
+      "updated": "2026-02-02T22:26:59.690Z"
    }
  ]
 }
--- a/memory/2026-02-02.md
+++ b/memory/2026-02-02.md
@@ -1,52 +1,33 @@
-# 2026-02-02 - Note de sesiune
+# 2 Februarie 2026

 ## Decizii
+- Marius aprobă TOATE propunerile din raportul de seară ("Da")
+- A0 + A3 executate imediat
+- A1 + A2 (sesiuni TU+EU) de programat luni-joi 15:00-16:00

-### Rapoarte pe EMAIL (nu Discord)
- Morning-report și evening-report merg acum pe **email** (mmarius28@gmail.com)
- Format nou cu două secțiuni:
-  - **📚 Sinteză** - modele/concepte → fișier separat + link
-  - **⚡ Acționabile** - task-uri cu CINE/CE/EFORT/REZULTAT clar
- 3 răspunsuri predefinite (1/2/3) pentru 80/20
- Job-uri actualizate: `morning-report`, `evening-report`
+## Executat
+- **A0:** Git commit și push (2 commits: TOOLS.md, KB index, coaching, email tool)
+- **A3:** Integrată întrebarea "Ce poveste despre tine ar trebui să renunți?" în insights pentru coaching dimineață

-### Fix email_send.py
- Problema: MailChannels + Gmail respingeau emailurile
- Cauza: Emoji în FROM_NAME + header-e non-RFC
- Fix: 
-  - `FROM_NAME = "Echo"` (fără emoji)
-  - `Header(subject, 'utf-8')` pentru encoding
-  - `formataddr((FROM_NAME, SMTP_USER))` pentru RFC compliance
+## De programat
+- **A1:** Sesiune "Dizolvarea lui Nu Merit" (30 min) - exercițiu Monica Ion
+- **A2:** Sistemul 5 pași pentru frici (15 min) - Zoltan Vereș

-### Reguli sub-agenți (AGENTS.md)
- Când lansez sub-agent, TREBUIE să-i dau tot contextul: AGENTS.md, SOUL.md, USER.md, memory relevant
- Sub-agentul rulează izolat, nu are acces automat la fișierele mele
+## Feedback Marius
+1. **Email replies:** Nu primește email-urile de confirmare - de verificat flux
+2. **Insights → Rapoarte:** Raportul de seară a fost prea conservator - 22 insights extrase dar doar 4 propuneri în raport. De ajustat job-ul evening-report să propună mai multe.

-## Fișiere create/modificate
+## Stats azi
+- 23 note YouTube în KB (20 procesate azi - Zoltan Vereș workshop)
+- 22 insights extrase în `memory/kb/insights/2026-02-02.md`
+- Job insights-extract funcționează, dar rapoartele nu folosesc toate

- `memory/kb/insights/2026-02-02.md` - 22 insights din 20 video-uri
- `memory/kb/insights/sinteza-2026-02-02.md` - 16 modele/concepte (sinteză)
- `tools/email_send.py` - fix RFC compliance
- `AGENTS.md` - reguli sub-agenți
- `TOOLS.md` - documentație joburi actualizată
+## De făcut
+- [x] Ajustez evening-report și morning-report să propună cu ZI și ORĂ concrete
+- [x] Adăugat listare insights disponibile în rapoarte
+- [ ] Programez A1 și A2 cu Marius

-## Aprobat și executat (răspuns email: DA)
-
-**Executat:**
- ✅ A0: Git commit + push (54 fișiere)
- ✅ A4: Template seară "10 lucruri" → memory/kb/projects/templates/template-seara-merit.md
-
-**Programat mâine (job grup-sprijin-pregatire):**
- A3: Fișă grup sprijin - starea de victimă (tema pregătită din insights)
-
-**Programat miercuri-joi 15-16:**
- A1: Lista eforturilor pt clienți noi (template + completăm împreună)
- A2: Template valoare adusă clienți (template + completăm împreună)
- A5: Sesiune film interior (30 min conversație)
-
-## Învățat
-
- Email deliverability: MailChannels poate bloca emailuri de la hosting shared
- Gmail e strict pe RFC 5322 - header-ele trebuie corect formatate
- Rapoarte pe email > Discord pentru decizii care necesită gândire
- Format "sinteză + acționabile + răspunsuri predefinite" = 80/20 friendly
+## Lecții învățate
+- **Rapoarte:** TOATE propunerile TU+EU/FAC TU trebuie să aibă zi și oră concrete
+- **Email flow:** Reply #1 imediat (confirmare primire), Reply #2 după execuție (ce s-a făcut)
+- **Insights:** Listează TOATE insight-urile disponibile, nu doar câteva
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/INSTALLER
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/METADATA
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/METADATA
@@ -0,0 +1,170 @@
+Metadata-Version: 2.4
+Name: pypdf
+Version: 6.6.2
+Summary: A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files
+Author-email: Mathieu Fenniak <biziqe@mathieu.fenniak.net>
+Maintainer: stefan6419846
+Maintainer-email: Martin Thoma <info@martin-thoma.de>
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-Expression: BSD-3-Clause
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Typing :: Typed
+License-File: LICENSE
+Requires-Dist: typing_extensions >= 4.0; python_version < '3.11'
+Requires-Dist: cryptography ; extra == "crypto"
+Requires-Dist: PyCryptodome ; extra == "cryptodome"
+Requires-Dist: black ; extra == "dev"
+Requires-Dist: flit ; extra == "dev"
+Requires-Dist: pip-tools ; extra == "dev"
+Requires-Dist: pre-commit ; extra == "dev"
+Requires-Dist: pytest-cov ; extra == "dev"
+Requires-Dist: pytest-socket ; extra == "dev"
+Requires-Dist: pytest-timeout ; extra == "dev"
+Requires-Dist: pytest-xdist ; extra == "dev"
+Requires-Dist: wheel ; extra == "dev"
+Requires-Dist: myst_parser ; extra == "docs"
+Requires-Dist: sphinx ; extra == "docs"
+Requires-Dist: sphinx_rtd_theme ; extra == "docs"
+Requires-Dist: cryptography ; extra == "full"
+Requires-Dist: Pillow>=8.0.0 ; extra == "full"
+Requires-Dist: Pillow>=8.0.0 ; extra == "image"
+Project-URL: Bug Reports, https://github.com/py-pdf/pypdf/issues
+Project-URL: Changelog, https://pypdf.readthedocs.io/en/latest/meta/CHANGELOG.html
+Project-URL: Documentation, https://pypdf.readthedocs.io/en/latest/
+Project-URL: Source, https://github.com/py-pdf/pypdf
+Provides-Extra: crypto
+Provides-Extra: cryptodome
+Provides-Extra: dev
+Provides-Extra: docs
+Provides-Extra: full
+Provides-Extra: image
+
+[![PyPI version](https://badge.fury.io/py/pypdf.svg)](https://badge.fury.io/py/pypdf)
+[![Python Support](https://img.shields.io/pypi/pyversions/pypdf.svg)](https://pypi.org/project/pypdf/)
+[![](https://img.shields.io/badge/-documentation-green)](https://pypdf.readthedocs.io/en/stable/)
+[![GitHub last commit](https://img.shields.io/github/last-commit/py-pdf/pypdf)](https://github.com/py-pdf/pypdf)
+[![codecov](https://codecov.io/gh/py-pdf/pypdf/branch/main/graph/badge.svg?token=id42cGNZ5Z)](https://codecov.io/gh/py-pdf/pypdf)
+
+# pypdf
+
+pypdf is a free and open-source pure-python PDF library capable of splitting,
+[merging](https://pypdf.readthedocs.io/en/stable/user/merging-pdfs.html),
+[cropping, and transforming](https://pypdf.readthedocs.io/en/stable/user/cropping-and-transforming.html)
+the pages of PDF files. It can also add
+custom data, viewing options, and
+[passwords](https://pypdf.readthedocs.io/en/stable/user/encryption-decryption.html)
+to PDF files. pypdf can
+[retrieve text](https://pypdf.readthedocs.io/en/stable/user/extract-text.html)
+and
+[metadata](https://pypdf.readthedocs.io/en/stable/user/metadata.html)
+from PDFs as well.
+
+See [pdfly](https://github.com/py-pdf/pdfly) for a CLI application that uses pypdf to interact with PDFs.
+
+## Installation
+
+Install pypdf using pip:
+
+```
+pip install pypdf
+```
+
+For using pypdf with AES encryption or decryption, install extra dependencies:
+
+```
+pip install pypdf[crypto]
+```
+
+> **NOTE**: `pypdf` 3.1.0 and above include significant improvements compared to
+> previous versions. Please refer to [the migration
+> guide](https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html) for
+> more information.
+
+## Usage
+
+```python
+from pypdf import PdfReader
+
+reader = PdfReader("example.pdf")
+number_of_pages = len(reader.pages)
+page = reader.pages[0]
+text = page.extract_text()
+```
+
+pypdf can do a lot more, e.g. splitting, merging, reading and creating annotations, decrypting and encrypting. Check out the
+[documentation](https://pypdf.readthedocs.io/en/stable/) for additional usage
+examples!
+
+For questions and answers, visit
+[StackOverflow](https://stackoverflow.com/questions/tagged/pypdf)
+(tagged with [pypdf](https://stackoverflow.com/questions/tagged/pypdf)).
+
+## Contributions
+
+Maintaining pypdf is a collaborative effort. You can support the project by
+writing documentation, helping to narrow down issues, and submitting code.
+See the [CONTRIBUTING.md](https://github.com/py-pdf/pypdf/blob/main/CONTRIBUTING.md) file for more information.
+
+### Q&A
+
+The experience pypdf users have covers the whole range from beginner to expert. You can contribute to the pypdf community by answering questions
+on [StackOverflow](https://stackoverflow.com/questions/tagged/pypdf),
+helping in [discussions](https://github.com/py-pdf/pypdf/discussions),
+and asking users who report issues for [MCVE](https://stackoverflow.com/help/minimal-reproducible-example)'s (Code + example PDF!).
+
+
+### Issues
+
+A good bug ticket includes a MCVE - a minimal complete verifiable example.
+For pypdf, this means that you must upload a PDF that causes the bug to occur
+as well as the code you're executing with all of the output. Use
+`print(pypdf.__version__)` to tell us which version you're using.
+
+### Code
+
+All code contributions are welcome, but smaller ones have a better chance to
+get included in a timely manner. Adding unit tests for new features or test
+cases for bugs you've fixed help us to ensure that the Pull Request (PR) is fine.
+
+pypdf includes a test suite which can be executed with `pytest`:
+
+```bash
+$ pytest
+===================== test session starts =====================
+platform linux -- Python 3.6.15, pytest-7.0.1, pluggy-1.0.0
+rootdir: /home/moose/GitHub/Martin/pypdf
+plugins: cov-3.0.0
+collected 233 items
+
+tests/test_basic_features.py ..                         [  0%]
+tests/test_constants.py .                               [  1%]
+tests/test_filters.py .................x.....           [ 11%]
+tests/test_generic.py ................................. [ 25%]
+.............                                           [ 30%]
+tests/test_javascript.py ..                             [ 31%]
+tests/test_merger.py .                                  [ 32%]
+tests/test_page.py .........................            [ 42%]
+tests/test_pagerange.py ................                [ 49%]
+tests/test_papersizes.py ..................             [ 57%]
+tests/test_reader.py .................................. [ 72%]
+...............                                         [ 78%]
+tests/test_utils.py ....................                [ 87%]
+tests/test_workflows.py ..........                      [ 91%]
+tests/test_writer.py .................                  [ 98%]
+tests/test_xmp.py ...                                   [100%]
+
+========== 232 passed, 1 xfailed, 1 warning in 4.52s ==========
+```
+
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/RECORD
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/RECORD
@@ -0,0 +1,117 @@
+pypdf-6.6.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+pypdf-6.6.2.dist-info/METADATA,sha256=1Vu0OgjW3amj2S_YMUmD0Lj_7_GEw-f5VaIM-_9niK8,7149
+pypdf-6.6.2.dist-info/RECORD,,
+pypdf-6.6.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pypdf-6.6.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+pypdf-6.6.2.dist-info/licenses/LICENSE,sha256=qXrCMOXzPvEKU2eoUOsB-R8aCwZONHQsd5TSKUVX9SQ,1605
+pypdf/__init__.py,sha256=YS_1ZrQ3jBPHsRgMstqJrAts3lUApj_lMOMK5qiLG5w,1283
+pypdf/__pycache__/__init__.cpython-312.pyc,,
+pypdf/__pycache__/_cmap.cpython-312.pyc,,
+pypdf/__pycache__/_doc_common.cpython-312.pyc,,
+pypdf/__pycache__/_encryption.cpython-312.pyc,,
+pypdf/__pycache__/_font.cpython-312.pyc,,
+pypdf/__pycache__/_page.cpython-312.pyc,,
+pypdf/__pycache__/_page_labels.cpython-312.pyc,,
+pypdf/__pycache__/_protocols.cpython-312.pyc,,
+pypdf/__pycache__/_reader.cpython-312.pyc,,
+pypdf/__pycache__/_utils.cpython-312.pyc,,
+pypdf/__pycache__/_version.cpython-312.pyc,,
+pypdf/__pycache__/_writer.cpython-312.pyc,,
+pypdf/__pycache__/_xobj_image_helpers.cpython-312.pyc,,
+pypdf/__pycache__/constants.cpython-312.pyc,,
+pypdf/__pycache__/errors.cpython-312.pyc,,
+pypdf/__pycache__/filters.cpython-312.pyc,,
+pypdf/__pycache__/pagerange.cpython-312.pyc,,
+pypdf/__pycache__/papersizes.cpython-312.pyc,,
+pypdf/__pycache__/types.cpython-312.pyc,,
+pypdf/__pycache__/xmp.cpython-312.pyc,,
+pypdf/_cmap.py,sha256=iaAvJQQKBxkqMj5-WdD4vZV-Zdz-Sba5j6q3oPQyLT0,11713
+pypdf/_codecs/__init__.py,sha256=PF1KlsLWCOF0cgdqns7G4X-l3zq5_OnZePw7RFIn1bE,1645
+pypdf/_codecs/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/_codecs.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/adobe_glyphs.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/core_fontmetrics.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/pdfdoc.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/std.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/symbol.cpython-312.pyc,,
+pypdf/_codecs/__pycache__/zapfding.cpython-312.pyc,,
+pypdf/_codecs/_codecs.py,sha256=46oRZJySwGxCJp1kjIer7js_TYSjj4Gs2i2Uce3v-eE,10555
+pypdf/_codecs/adobe_glyphs.py,sha256=t3cDFPDqwIz1w9B0gdVzjdc8eEK9AuRjk5f7laEw_fY,447213
+pypdf/_codecs/core_fontmetrics.py,sha256=qQvNRQi8V8FOBmSwGcsak4qyl9cQ80cDjbpD5TvhuBg,113269
+pypdf/_codecs/pdfdoc.py,sha256=xfSvMFYsvxuaSQ0Uu9vZDKaB0Wu85h1uCiB1i9rAcUU,4269
+pypdf/_codecs/std.py,sha256=DyQMuEpAGEpS9uy1jWf4cnj-kqShPOAij5sI7Q1YD8E,2630
+pypdf/_codecs/symbol.py,sha256=nIaGQIlhWCJiPMHrwUlmGHH-_fOXyEKvguRmuKXcGAk,3734
+pypdf/_codecs/zapfding.py,sha256=PQxjxRC616d41xF3exVxP1W8nM4QrZfjO3lmtLxpE_s,3742
+pypdf/_crypt_providers/__init__.py,sha256=K3Z6AuXhXVeXgLet-Tukq2gt9H66OgdupsvxIS1CmkI,3054
+pypdf/_crypt_providers/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_base.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_cryptography.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_fallback.cpython-312.pyc,,
+pypdf/_crypt_providers/__pycache__/_pycryptodome.cpython-312.pyc,,
+pypdf/_crypt_providers/_base.py,sha256=_f53Mj6vivhEZMQ4vNxN5G0IOgFY-n5_leke0c_qiNU,1711
+pypdf/_crypt_providers/_cryptography.py,sha256=zT3WmbPzesvgHRkGcKAldqJ24MY3BwZViVbSc55Zxhw,4557
+pypdf/_crypt_providers/_fallback.py,sha256=vsYoowR1YCAV_q-HrdIZhkUcrCb6HvRBNMYm03QtCU8,3334
+pypdf/_crypt_providers/_pycryptodome.py,sha256=U1aQZ9iYBrZo-hKCjJUhGOPhwEFToiitowQ316TNrrA,3381
+pypdf/_doc_common.py,sha256=Cbsc2uczFhAi2JRioaICx0ISC4lCBkRdo_tKRGw3bpc,53243
+pypdf/_encryption.py,sha256=-LwFEKfhL3B10afkco6fXx-EqtjoXf67pAUgH2VBfDw,48762
+pypdf/_font.py,sha256=R5jQsBYa_eMrK7VezyoWCmbBARZyS5xp8jzD2XRvKeE,14146
+pypdf/_page.py,sha256=Tp2GyjjOHLFwQ1tw8bO-poyZA65PJn3k94BymXMmurw,89909
+pypdf/_page_labels.py,sha256=_HXqgEhSLTH_mMhy8m4QAOzIOHRQLV6_lYvg81-l9hI,8546
+pypdf/_protocols.py,sha256=7qz92LVdPrYkSpdUPpAp9U4GW5jxNBTfVcpUWwUhEOo,2123
+pypdf/_reader.py,sha256=KyeDHVEI5n4cZBHGVzbGIfhaPC1nZMiIU0W_ZNb0w_Y,55079
+pypdf/_text_extraction/__init__.py,sha256=a3Z33rQVTiMKGtwt7_bfXlPosbST8rzELoNnt053_Qw,8515
+pypdf/_text_extraction/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_text_extraction/__pycache__/_text_extractor.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__init__.py,sha256=RUQIwiUwzneNtcljnVM6jkRaem6pgP7mOD2-MBmtpvw,340
+pypdf/_text_extraction/_layout_mode/__pycache__/__init__.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_fixed_width_page.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_text_state_manager.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/__pycache__/_text_state_params.cpython-312.pyc,,
+pypdf/_text_extraction/_layout_mode/_fixed_width_page.py,sha256=eJveDbyMooG970qJOhM5Rwb9ZoyyJDynzWpV9a7IS20,15370
+pypdf/_text_extraction/_layout_mode/_text_state_manager.py,sha256=XVrIjeTd5jSdMexBQxs0tL5I5RUOitRmN1mELOcKYm4,8221
+pypdf/_text_extraction/_layout_mode/_text_state_params.py,sha256=hyw6pnC8upBkoFVUJ3LH8hBIIHrNwiqaqcYyzIIyr6Y,5481
+pypdf/_text_extraction/_text_extractor.py,sha256=wRmFtgMYTbJFbZRJVG3j1-lQWhb6mUC5uiE73DLRhIo,14454
+pypdf/_utils.py,sha256=v579jJEHn-JophTC4Ej2MBFTEoQGitPWs_d507pyS6g,20194
+pypdf/_version.py,sha256=S2Qku7VqFDmWPW_O3fID47IPC76TVFqesX1qVVa575w,22
+pypdf/_writer.py,sha256=K7ANMEgNz-tPngYVMW9j07SEcksk5tFf1_tgi0JDRIg,129793
+pypdf/_xobj_image_helpers.py,sha256=y7EMrXlYqwbIeUtdQS2XH9nO_2R73DOLf9-T1IyHMIA,21450
+pypdf/annotations/__init__.py,sha256=f2k_-jAn39CCB27KxQ_e93GinnzkAHbUnnSeGJl1jyE,990
+pypdf/annotations/__pycache__/__init__.cpython-312.pyc,,
+pypdf/annotations/__pycache__/_base.cpython-312.pyc,,
+pypdf/annotations/__pycache__/_markup_annotations.cpython-312.pyc,,
+pypdf/annotations/__pycache__/_non_markup_annotations.cpython-312.pyc,,
+pypdf/annotations/_base.py,sha256=eeoc9v2w15jAUhKXj48l1bB66YgBgV-2v5IIUJH-vws,961
+pypdf/annotations/_markup_annotations.py,sha256=PLDCbsEWSgOmk6HTxepolEzj-Q3EE5J4hXMgnTDFaqc,9590
+pypdf/annotations/_non_markup_annotations.py,sha256=Z2IUvcCOcTcpJhSXrex_9riYM2D64XxFQ_vac10BNRU,3649
+pypdf/constants.py,sha256=_U_xkH1REx2rsgtx3jCOaKivhmyqPA25PLL7Z4A1_ZI,23260
+pypdf/errors.py,sha256=Bw1W9hxOsDgwqwU6YoQ2l0-JiUyTq6l5QjVCr-W4GFA,1947
+pypdf/filters.py,sha256=FzfrqdZK9bs3MjU75KJ2uIMPpx6VcxYQ4oV9wLh3j-w,29210
+pypdf/generic/__init__.py,sha256=VrqdYftQECePDU2rXVMgEqRaYFR8zOV_fvJgo19x_uw,3468
+pypdf/generic/__pycache__/__init__.cpython-312.pyc,,
+pypdf/generic/__pycache__/_appearance_stream.cpython-312.pyc,,
+pypdf/generic/__pycache__/_base.cpython-312.pyc,,
+pypdf/generic/__pycache__/_data_structures.cpython-312.pyc,,
+pypdf/generic/__pycache__/_files.cpython-312.pyc,,
+pypdf/generic/__pycache__/_fit.cpython-312.pyc,,
+pypdf/generic/__pycache__/_image_inline.cpython-312.pyc,,
+pypdf/generic/__pycache__/_link.cpython-312.pyc,,
+pypdf/generic/__pycache__/_outline.cpython-312.pyc,,
+pypdf/generic/__pycache__/_rectangle.cpython-312.pyc,,
+pypdf/generic/__pycache__/_utils.cpython-312.pyc,,
+pypdf/generic/__pycache__/_viewerpref.cpython-312.pyc,,
+pypdf/generic/_appearance_stream.py,sha256=ofXHlJC4-jSBCLOhkKztoeFiYlD-zi8QMdvRrMm3rdE,24867
+pypdf/generic/_base.py,sha256=N8O_NcqK5y5O70OF8-p6vsac9R1ykTDcBIksBY_9rnA,32531
+pypdf/generic/_data_structures.py,sha256=g1Jy5tpPSTHIhOme6HFXdMvxV2HuxbZx-HOsF2Awdc0,63602
+pypdf/generic/_files.py,sha256=NtSkRo6JBgisi4QOyrVneO891boVsuY25hRwij6X9RA,16238
+pypdf/generic/_fit.py,sha256=X_iADJj1YY4PUStS7rFWC2xR2LUVSvKtUAky0AFAIDM,5515
+pypdf/generic/_image_inline.py,sha256=4cADiCeaCYq2kgJu0wOYXRn5YZ27cCHb3hGFqFFT5D4,12787
+pypdf/generic/_link.py,sha256=ibdLhdU0mP_phneaJs-CzUDErkJuqnMT6TsQoHNOYiE,4951
+pypdf/generic/_outline.py,sha256=qKbMX42OWfqnopIiE6BUy6EvdTLGe3ZtjaiWN85JpaY,1094
+pypdf/generic/_rectangle.py,sha256=lOqSfFivQxgBN9LU9aqHoxPH8aCPTDUNgRZsNEUd6fc,3785
+pypdf/generic/_utils.py,sha256=vTDAesfG-cJNDKilz_kbgFodAITzd5ejppWHGjvConk,7258
+pypdf/generic/_viewerpref.py,sha256=6a_s0Avm9-XvV0wqxiW23cE92qK98ry3y6EPjfsFSdo,6758
+pypdf/pagerange.py,sha256=2bt21jQZm-9aq2bVf3TXuH8_wGVx7b9T6UrMFXCEJhQ,7108
+pypdf/papersizes.py,sha256=6Tz5sfNN_3JOUapY83U-lakohnpXYA0hSEQNmOVLFL8,1413
+pypdf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pypdf/types.py,sha256=sJ7wHzk7ER_CJ7kP-s8u9axFnkCXnFpr8nzcj1AxTas,1915
+pypdf/xmp.py,sha256=gqh3IlgTNP7ZuyhvE59p2tsMvu4adGkq0G8RDg0OtQw,29238
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/REQUESTED
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/REQUESTED
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/WHEEL
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/WHEEL
@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: flit 3.12.0
+Root-Is-Purelib: true
+Tag: py3-none-any
--- a/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/licenses/LICENSE
+++ b/venv/lib/python3.12/site-packages/pypdf-6.6.2.dist-info/licenses/LICENSE
@@ -0,0 +1,29 @@
+Copyright (c) 2006-2008, Mathieu Fenniak
+Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+* The name of the author may not be used to endorse or promote products
+derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
--- a/venv/lib/python3.12/site-packages/pypdf/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/init.py
@@ -0,0 +1,48 @@
+"""
+pypdf is a free and open-source pure-python PDF library capable of splitting,
+merging, cropping, and transforming the pages of PDF files. It can also add
+custom data, viewing options, and passwords to PDF files. pypdf can retrieve
+text and metadata from PDFs as well.
+
+You can read the full docs at https://pypdf.readthedocs.io/.
+"""
+
+from ._crypt_providers import crypt_provider
+from ._doc_common import DocumentInformation
+from ._encryption import PasswordType
+from ._page import PageObject, Transformation
+from ._reader import PdfReader
+from ._text_extraction import mult
+from ._version import __version__
+from ._writer import ObjectDeletionFlag, PdfWriter
+from .constants import ImageType
+from .pagerange import PageRange, parse_filename_page_ranges
+from .papersizes import PaperSize
+
+try:
+    import PIL
+
+    pil_version = PIL.__version__
+except ImportError:
+    pil_version = "none"
+
+_debug_versions = (
+    f"pypdf=={__version__}, {crypt_provider=}, PIL={pil_version}"
+)
+
+__all__ = [
+    "DocumentInformation",
+    "ImageType",
+    "ObjectDeletionFlag",
+    "PageObject",
+    "PageRange",
+    "PaperSize",
+    "PasswordType",
+    "PdfReader",
+    "PdfWriter",
+    "Transformation",
+    "__version__",
+    "_debug_versions",
+    "mult",
+    "parse_filename_page_ranges",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_cmap.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_cmap.py
@@ -0,0 +1,338 @@
+import binascii
+from binascii import Error as BinasciiError
+from binascii import unhexlify
+from math import ceil
+from typing import Any, Union, cast
+
+from ._codecs import adobe_glyphs, charset_encoding
+from ._utils import logger_error, logger_warning
+from .generic import (
+    DecodedStreamObject,
+    DictionaryObject,
+    NullObject,
+    StreamObject,
+    is_null_or_none,
+)
+
+_predefined_cmap: dict[str, str] = {
+    "/Identity-H": "utf-16-be",
+    "/Identity-V": "utf-16-be",
+    "/GB-EUC-H": "gbk",
+    "/GB-EUC-V": "gbk",
+    "/GBpc-EUC-H": "gb2312",
+    "/GBpc-EUC-V": "gb2312",
+    "/GBK-EUC-H": "gbk",
+    "/GBK-EUC-V": "gbk",
+    "/GBK2K-H": "gb18030",
+    "/GBK2K-V": "gb18030",
+    "/ETen-B5-H": "cp950",
+    "/ETen-B5-V": "cp950",
+    "/ETenms-B5-H": "cp950",
+    "/ETenms-B5-V": "cp950",
+    "/UniCNS-UTF16-H": "utf-16-be",
+    "/UniCNS-UTF16-V": "utf-16-be",
+    "/UniGB-UTF16-H": "gb18030",
+    "/UniGB-UTF16-V": "gb18030",
+    # UCS2 in code
+}
+
+
+def get_encoding(
+    ft: DictionaryObject
+) -> tuple[Union[str, dict[int, str]], dict[Any, Any]]:
+    encoding = _parse_encoding(ft)
+    map_dict, int_entry = _parse_to_unicode(ft)
+
+    # Apply rule from PDF ref 1.7 §5.9.1, 1st bullet:
+    #   if cmap not empty encoding should be discarded
+    #   (here transformed into identity for those characters)
+    # If encoding is a string, it is expected to be an identity translation.
+    if isinstance(encoding, dict):
+        for x in int_entry:
+            if x <= 255:
+                encoding[x] = chr(x)
+
+    return encoding, map_dict
+
+
+def _parse_encoding(
+    ft: DictionaryObject
+) -> Union[str, dict[int, str]]:
+    encoding: Union[str, list[str], dict[int, str]] = []
+    if "/Encoding" not in ft:
+        if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
+            encoding = dict(
+                zip(range(256), charset_encoding[cast(str, ft["/BaseFont"])])
+            )
+        else:
+            encoding = "charmap"
+        return encoding
+    enc: Union[str, DictionaryObject, NullObject] = cast(
+        Union[str, DictionaryObject, NullObject], ft["/Encoding"].get_object()
+    )
+    if isinstance(enc, str):
+        try:
+            # already done : enc = NameObject.unnumber(enc.encode()).decode()
+            # for #xx decoding
+            if enc in charset_encoding:
+                encoding = charset_encoding[enc].copy()
+            elif enc in _predefined_cmap:
+                encoding = _predefined_cmap[enc]
+            elif "-UCS2-" in enc:
+                encoding = "utf-16-be"
+            else:
+                raise Exception("not found")
+        except Exception:
+            logger_error(f"Advanced encoding {enc} not implemented yet", __name__)
+            encoding = enc
+    elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
+        try:
+            encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
+        except Exception:
+            logger_error(
+                f"Advanced encoding {encoding} not implemented yet",
+                __name__,
+            )
+            encoding = charset_encoding["/StandardEncoding"].copy()
+    else:
+        encoding = charset_encoding["/StandardEncoding"].copy()
+    if isinstance(enc, DictionaryObject) and "/Differences" in enc:
+        x: int = 0
+        o: Union[int, str]
+        for o in cast(DictionaryObject, enc["/Differences"]):
+            if isinstance(o, int):
+                x = o
+            else:  # isinstance(o, str):
+                try:
+                    if x < len(encoding):
+                        encoding[x] = adobe_glyphs[o]  # type: ignore
+                except Exception:
+                    encoding[x] = o  # type: ignore
+                x += 1
+    if isinstance(encoding, list):
+        encoding = dict(zip(range(256), encoding))
+    return encoding
+
+
+def _parse_to_unicode(
+    ft: DictionaryObject
+) -> tuple[dict[Any, Any], list[int]]:
+    # will store all translation code
+    # and map_dict[-1] we will have the number of bytes to convert
+    map_dict: dict[Any, Any] = {}
+
+    # will provide the list of cmap keys as int to correct encoding
+    int_entry: list[int] = []
+
+    if "/ToUnicode" not in ft:
+        if ft.get("/Subtype", "") == "/Type1":
+            return _type1_alternative(ft, map_dict, int_entry)
+        return {}, []
+    process_rg: bool = False
+    process_char: bool = False
+    multiline_rg: Union[
+        None, tuple[int, int]
+    ] = None  # tuple = (current_char, remaining size) ; cf #1285 for example of file
+    cm = prepare_cm(ft)
+    for line in cm.split(b"\n"):
+        process_rg, process_char, multiline_rg = process_cm_line(
+            line.strip(b" \t"),
+            process_rg,
+            process_char,
+            multiline_rg,
+            map_dict,
+            int_entry,
+        )
+
+    return map_dict, int_entry
+
+
+def prepare_cm(ft: DictionaryObject) -> bytes:
+    tu = ft["/ToUnicode"]
+    cm: bytes
+    if isinstance(tu, StreamObject):
+        cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
+    else:  # if (tu is None) or cast(str, tu).startswith("/Identity"):
+        # the full range 0000-FFFF will be processed
+        cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
+    if isinstance(cm, str):
+        cm = cm.encode()
+    # we need to prepare cm before due to missing return line in pdf printed
+    # to pdf from word
+    cm = (
+        cm.strip()
+        .replace(b"beginbfchar", b"\nbeginbfchar\n")
+        .replace(b"endbfchar", b"\nendbfchar\n")
+        .replace(b"beginbfrange", b"\nbeginbfrange\n")
+        .replace(b"endbfrange", b"\nendbfrange\n")
+        .replace(b"<<", b"\n{\n")  # text between << and >> not used but
+        .replace(b">>", b"\n}\n")  # some solution to find it back
+    )
+    ll = cm.split(b"<")
+    for i in range(len(ll)):
+        j = ll[i].find(b">")
+        if j >= 0:
+            if j == 0:
+                # string is empty: stash a placeholder here (see below)
+                # see https://github.com/py-pdf/pypdf/issues/1111
+                content = b"."
+            else:
+                content = ll[i][:j].replace(b" ", b"")
+            ll[i] = content + b" " + ll[i][j + 1 :]
+    cm = (
+        (b" ".join(ll))
+        .replace(b"[", b" [ ")
+        .replace(b"]", b" ]\n ")
+        .replace(b"\r", b"\n")
+    )
+    return cm
+
+
+def process_cm_line(
+    line: bytes,
+    process_rg: bool,
+    process_char: bool,
+    multiline_rg: Union[None, tuple[int, int]],
+    map_dict: dict[Any, Any],
+    int_entry: list[int],
+) -> tuple[bool, bool, Union[None, tuple[int, int]]]:
+    if line == b"" or line[0] == 37:  # 37 = %
+        return process_rg, process_char, multiline_rg
+    line = line.replace(b"\t", b" ")
+    if b"beginbfrange" in line:
+        process_rg = True
+    elif b"endbfrange" in line:
+        process_rg = False
+    elif b"beginbfchar" in line:
+        process_char = True
+    elif b"endbfchar" in line:
+        process_char = False
+    elif process_rg:
+        try:
+            multiline_rg = parse_bfrange(line, map_dict, int_entry, multiline_rg)
+        except binascii.Error as error:
+            logger_warning(f"Skipping broken line {line!r}: {error}", __name__)
+    elif process_char:
+        parse_bfchar(line, map_dict, int_entry)
+    return process_rg, process_char, multiline_rg
+
+
+def parse_bfrange(
+    line: bytes,
+    map_dict: dict[Any, Any],
+    int_entry: list[int],
+    multiline_rg: Union[None, tuple[int, int]],
+) -> Union[None, tuple[int, int]]:
+    lst = [x for x in line.split(b" ") if x]
+    closure_found = False
+    if multiline_rg is not None:
+        fmt = b"%%0%dX" % (map_dict[-1] * 2)
+        a = multiline_rg[0]  # a, b not in the current line
+        b = multiline_rg[1]
+        for sq in lst:
+            if sq == b"]":
+                closure_found = True
+                break
+            map_dict[
+                unhexlify(fmt % a).decode(
+                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                    "surrogatepass",
+                )
+            ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+            int_entry.append(a)
+            a += 1
+    else:
+        a = int(lst[0], 16)
+        b = int(lst[1], 16)
+        nbi = max(len(lst[0]), len(lst[1]))
+        map_dict[-1] = ceil(nbi / 2)
+        fmt = b"%%0%dX" % (map_dict[-1] * 2)
+        if lst[2] == b"[":
+            for sq in lst[3:]:
+                if sq == b"]":
+                    closure_found = True
+                    break
+                map_dict[
+                    unhexlify(fmt % a).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                        "surrogatepass",
+                    )
+                ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
+                int_entry.append(a)
+                a += 1
+        else:  # case without list
+            c = int(lst[2], 16)
+            fmt2 = b"%%0%dX" % max(4, len(lst[2]))
+            closure_found = True
+            while a <= b:
+                map_dict[
+                    unhexlify(fmt % a).decode(
+                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
+                        "surrogatepass",
+                    )
+                ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
+                int_entry.append(a)
+                a += 1
+                c += 1
+    return None if closure_found else (a, b)
+
+
+def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
+    lst = [x for x in line.split(b" ") if x]
+    map_dict[-1] = len(lst[0]) // 2
+    while len(lst) > 1:
+        map_to = ""
+        # placeholder (see above) means empty string
+        if lst[1] != b".":
+            try:
+                map_to = unhexlify(lst[1]).decode(
+                    "charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
+                )  # join is here as some cases where the code was split
+            except BinasciiError as exception:
+                logger_warning(f"Got invalid hex string: {exception!s} ({lst[1]!r})", __name__)
+        map_dict[
+            unhexlify(lst[0]).decode(
+                "charmap" if map_dict[-1] == 1 else "utf-16-be", "surrogatepass"
+            )
+        ] = map_to
+        int_entry.append(int(lst[0], 16))
+        lst = lst[2:]
+
+
+def _type1_alternative(
+    ft: DictionaryObject,
+    map_dict: dict[Any, Any],
+    int_entry: list[int],
+) -> tuple[dict[Any, Any], list[int]]:
+    if "/FontDescriptor" not in ft:
+        return map_dict, int_entry
+    ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
+    if is_null_or_none(ft_desc):
+        return map_dict, int_entry
+    assert ft_desc is not None, "mypy"
+    txt = ft_desc.get_object().get_data()
+    txt = txt.split(b"eexec\n")[0]  # only clear part
+    txt = txt.split(b"/Encoding")[1]  # to get the encoding part
+    lines = txt.replace(b"\r", b"\n").split(b"\n")
+    for li in lines:
+        if li.startswith(b"dup"):
+            words = [_w for _w in li.split(b" ") if _w != b""]
+            if len(words) > 3 and words[3] != b"put":
+                continue
+            try:
+                i = int(words[1])
+            except ValueError:  # pragma: no cover
+                continue
+            try:
+                v = adobe_glyphs[words[2].decode()]
+            except KeyError:
+                if words[2].startswith(b"/uni"):
+                    try:
+                        v = chr(int(words[2][4:], 16))
+                    except ValueError:  # pragma: no cover
+                        continue
+                else:
+                    continue
+            map_dict[chr(i)] = v
+            int_entry.append(i)
+    return map_dict, int_entry
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/init.py
@@ -0,0 +1,59 @@
+from .adobe_glyphs import adobe_glyphs
+from .pdfdoc import _pdfdoc_encoding
+from .std import _std_encoding
+from .symbol import _symbol_encoding
+from .zapfding import _zapfding_encoding
+
+
+def fill_from_encoding(enc: str) -> list[str]:
+    lst: list[str] = []
+    for x in range(256):
+        try:
+            lst += (bytes((x,)).decode(enc),)
+        except Exception:
+            lst += (chr(x),)
+    return lst
+
+
+def rev_encoding(enc: list[str]) -> dict[str, int]:
+    rev: dict[str, int] = {}
+    for i in range(256):
+        char = enc[i]
+        if char == "\u0000":
+            continue
+        assert char not in rev, f"{char} at {i} already at {rev[char]}"
+        rev[char] = i
+    return rev
+
+
+_win_encoding = fill_from_encoding("cp1252")
+_mac_encoding = fill_from_encoding("mac_roman")
+
+
+_win_encoding_rev: dict[str, int] = rev_encoding(_win_encoding)
+_mac_encoding_rev: dict[str, int] = rev_encoding(_mac_encoding)
+_symbol_encoding_rev: dict[str, int] = rev_encoding(_symbol_encoding)
+_zapfding_encoding_rev: dict[str, int] = rev_encoding(_zapfding_encoding)
+_pdfdoc_encoding_rev: dict[str, int] = rev_encoding(_pdfdoc_encoding)
+
+
+charset_encoding: dict[str, list[str]] = {
+    "/StandardEncoding": _std_encoding,
+    "/WinAnsiEncoding": _win_encoding,
+    "/MacRomanEncoding": _mac_encoding,
+    "/PDFDocEncoding": _pdfdoc_encoding,
+    "/Symbol": _symbol_encoding,
+    "/ZapfDingbats": _zapfding_encoding,
+}
+
+__all__ = [
+    "_mac_encoding",
+    "_pdfdoc_encoding",
+    "_pdfdoc_encoding_rev",
+    "_std_encoding",
+    "_symbol_encoding",
+    "_win_encoding",
+    "_zapfding_encoding",
+    "adobe_glyphs",
+    "charset_encoding",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/_codecs.py
@@ -0,0 +1,281 @@
+"""
+This module is for codecs only.
+
+While the codec implementation can contain details of the PDF specification,
+the module should not do any PDF parsing.
+"""
+
+import io
+from abc import ABC, abstractmethod
+
+from pypdf._utils import logger_warning
+from pypdf.errors import LimitReachedError
+
+
+class Codec(ABC):
+    """Abstract base class for all codecs."""
+
+    @abstractmethod
+    def encode(self, data: bytes) -> bytes:
+        """
+        Encode the input data.
+
+        Args:
+            data: Data to encode.
+
+        Returns:
+            Encoded data.
+
+        """
+
+    @abstractmethod
+    def decode(self, data: bytes) -> bytes:
+        """
+        Decode the input data.
+
+        Args:
+            data: Data to decode.
+
+        Returns:
+            Decoded data.
+
+        """
+
+
+class LzwCodec(Codec):
+    """Lempel-Ziv-Welch (LZW) adaptive compression codec."""
+
+    CLEAR_TABLE_MARKER = 256  # Special code to indicate table reset
+    EOD_MARKER = 257  # End-of-data marker
+    INITIAL_BITS_PER_CODE = 9  # Initial code bit width
+    MAX_BITS_PER_CODE = 12  # Maximum code bit width
+
+    def __init__(self, max_output_length: int = 75_000_000) -> None:
+        self.max_output_length = max_output_length
+
+    def _initialize_encoding_table(self) -> None:
+        """Initialize the encoding table and state to initial conditions."""
+        self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
+        self.next_code = self.EOD_MARKER + 1
+        self.bits_per_code = self.INITIAL_BITS_PER_CODE
+        self.max_code_value = (1 << self.bits_per_code) - 1
+
+    def _increase_next_code(self) -> None:
+        """Update bits_per_code and max_code_value if necessary."""
+        self.next_code += 1
+        if (
+            self.next_code > self.max_code_value
+            and self.bits_per_code < self.MAX_BITS_PER_CODE
+        ):
+            self.bits_per_code += 1
+            self.max_code_value = (1 << self.bits_per_code) - 1
+
+    def encode(self, data: bytes) -> bytes:
+        """
+        Encode data using the LZW compression algorithm.
+
+        Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
+        """
+        result_codes: list[int] = []
+
+        # The encoder shall begin by issuing a clear-table code
+        result_codes.append(self.CLEAR_TABLE_MARKER)
+        self._initialize_encoding_table()
+
+        current_sequence = b""
+        for byte in data:
+            next_sequence = current_sequence + bytes([byte])
+
+            if next_sequence in self.encoding_table:
+                # Extend current sequence if already in the table
+                current_sequence = next_sequence
+            else:
+                # Output code for the current sequence
+                result_codes.append(self.encoding_table[current_sequence])
+
+                # Add the new sequence to the table if there's room
+                if self.next_code <= (1 << self.MAX_BITS_PER_CODE) - 1:
+                    self.encoding_table[next_sequence] = self.next_code
+                    self._increase_next_code()
+                else:
+                    # If the table is full, emit a clear-table command
+                    result_codes.append(self.CLEAR_TABLE_MARKER)
+                    self._initialize_encoding_table()
+
+                # Start new sequence
+                current_sequence = bytes([byte])
+
+        # Ensure everything actually is encoded
+        if current_sequence:
+            result_codes.append(self.encoding_table[current_sequence])
+        result_codes.append(self.EOD_MARKER)
+
+        return self._pack_codes_into_bytes(result_codes)
+
+    def _pack_codes_into_bytes(self, codes: list[int]) -> bytes:
+        """
+        Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
+        The bit-width starts at 9 bits and expands as needed.
+        """
+        self._initialize_encoding_table()
+        buffer = 0
+        bits_in_buffer = 0
+        output = bytearray()
+
+        for code in codes:
+            buffer = (buffer << self.bits_per_code) | code
+            bits_in_buffer += self.bits_per_code
+
+            # Codes shall be packed into a continuous bit stream, high-order bit
+            # first. This stream shall then be divided into bytes, high-order bit
+            # first.
+            while bits_in_buffer >= 8:
+                bits_in_buffer -= 8
+                output.append((buffer >> bits_in_buffer) & 0xFF)
+
+            if code == self.CLEAR_TABLE_MARKER:
+                self._initialize_encoding_table()
+            elif code == self.EOD_MARKER:
+                continue
+            else:
+                self._increase_next_code()
+
+        # Flush any remaining bits in the buffer
+        if bits_in_buffer > 0:
+            output.append((buffer << (8 - bits_in_buffer)) & 0xFF)
+
+        return bytes(output)
+
+    def _initialize_decoding_table(self) -> None:
+        self.max_code_value = (1 << self.MAX_BITS_PER_CODE) - 1
+        self.decoding_table = [bytes([i]) for i in range(self.CLEAR_TABLE_MARKER)] + [
+            b""
+        ] * (self.max_code_value - self.CLEAR_TABLE_MARKER + 1)
+        self._table_index = self.EOD_MARKER + 1
+        self._bits_to_get = 9
+
+    def _next_code_decode(self, data: bytes) -> int:
+        self._next_data: int
+        try:
+            while self._next_bits < self._bits_to_get:
+                self._next_data = (self._next_data << 8) | (
+                    data[self._byte_pointer]
+                )
+                self._byte_pointer += 1
+                self._next_bits += 8
+
+            code = (
+                self._next_data >> (self._next_bits - self._bits_to_get)
+            ) & self._and_table[self._bits_to_get - 9]
+            self._next_bits -= self._bits_to_get
+
+            # Reduce data to get rid of the overhead,
+            # which increases performance on large streams significantly.
+            self._next_data = self._next_data & 0xFFFFF
+
+            return code
+        except IndexError:
+            return self.EOD_MARKER
+
+    # The following method has been converted to Python from PDFsharp:
+    # https://github.com/empira/PDFsharp/blob/5fbf6ed14740bc4e16786816882d32e43af3ff5d/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
+    #
+    # Original license:
+    #
+    # -------------------------------------------------------------------------
+    # Copyright (c) 2001-2024 empira Software GmbH, Troisdorf (Cologne Area),
+    # Germany
+    #
+    # http://docs.pdfsharp.net
+    #
+    # MIT License
+    #
+    # Permission is hereby granted, free of charge, to any person obtaining a
+    # copy of this software and associated documentation files (the "Software"),
+    # to deal in the Software without restriction, including without limitation
+    # the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    # and/or sell copies of the Software, and to permit persons to whom the
+    # Software is furnished to do so, subject to the following conditions:
+    #
+    # The above copyright notice and this permission notice shall be included
+    # in all copies or substantial portions of the Software.
+    #
+    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+    # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    # DEALINGS IN THE SOFTWARE.
+    # --------------------------------------------------------------------------
+    def decode(self, data: bytes) -> bytes:
+        """
+        The following code was converted to Python from the following code:
+        https://github.com/empira/PDFsharp/blob/master/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
+        """
+        self._and_table = [511, 1023, 2047, 4095]
+        self._table_index = 0
+        self._bits_to_get = 9
+        self._byte_pointer = 0
+        self._next_data = 0
+        self._next_bits = 0
+
+        output_stream = io.BytesIO()
+        output_length = 0
+
+        self._initialize_decoding_table()
+        self._byte_pointer = 0
+        self._next_data = 0
+        self._next_bits = 0
+        old_code = self.CLEAR_TABLE_MARKER
+
+        while True:
+            code = self._next_code_decode(data)
+            if code == self.EOD_MARKER:
+                break
+
+            if code == self.CLEAR_TABLE_MARKER:
+                self._initialize_decoding_table()
+                code = self._next_code_decode(data)
+                if code == self.EOD_MARKER:
+                    break
+                output_stream.write(decoded := self.decoding_table[code])
+                old_code = code
+            elif code < self._table_index:
+                decoded = self.decoding_table[code]
+                output_stream.write(decoded)
+                if old_code != self.CLEAR_TABLE_MARKER:
+                    self._add_entry_decode(self.decoding_table[old_code], decoded[0])
+                old_code = code
+            else:
+                # The code is not in the table and not one of the special codes
+                decoded = (
+                    self.decoding_table[old_code] + self.decoding_table[old_code][:1]
+                )
+                output_stream.write(decoded)
+                self._add_entry_decode(self.decoding_table[old_code], decoded[0])
+                old_code = code
+
+            output_length += len(decoded)
+            if output_length > self.max_output_length:
+                raise LimitReachedError(
+                    f"Limit reached while decompressing: {output_length} > {self.max_output_length}"
+                )
+
+        return output_stream.getvalue()
+
+    def _add_entry_decode(self, old_string: bytes, new_char: int) -> None:
+        new_string = old_string + bytes([new_char])
+        if self._table_index > self.max_code_value:
+            logger_warning("Ignoring too large LZW table index.", __name__)
+            return
+        self.decoding_table[self._table_index] = new_string
+        self._table_index += 1
+
+        # Update the number of bits to get based on the table index
+        if self._table_index == 511:
+            self._bits_to_get = 10
+        elif self._table_index == 1023:
+            self._bits_to_get = 11
+        elif self._table_index == 2047:
+            self._bits_to_get = 12
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/adobe_glyphs.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/adobe_glyphs.py
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/core_fontmetrics.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/core_fontmetrics.py
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/pdfdoc.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/pdfdoc.py
@@ -0,0 +1,264 @@
+# PDFDocEncoding Character Set: Table D.2 of PDF Reference 1.7
+# C.1 Predefined encodings sorted by character name of another PDF reference
+# Some indices have '\u0000' although they should have something else:
+# 22: should be '\u0017'
+_pdfdoc_encoding = [
+    "\u0000",
+    "\u0001",
+    "\u0002",
+    "\u0003",
+    "\u0004",
+    "\u0005",
+    "\u0006",
+    "\u0007",  # 0 -  7
+    "\u0008",
+    "\u0009",
+    "\u000a",
+    "\u000b",
+    "\u000c",
+    "\u000d",
+    "\u000e",
+    "\u000f",  # 8 - 15
+    "\u0010",
+    "\u0011",
+    "\u0012",
+    "\u0013",
+    "\u0014",
+    "\u0015",
+    "\u0000",
+    "\u0017",  # 16 - 23
+    "\u02d8",
+    "\u02c7",
+    "\u02c6",
+    "\u02d9",
+    "\u02dd",
+    "\u02db",
+    "\u02da",
+    "\u02dc",  # 24 - 31
+    "\u0020",
+    "\u0021",
+    "\u0022",
+    "\u0023",
+    "\u0024",
+    "\u0025",
+    "\u0026",
+    "\u0027",  # 32 - 39
+    "\u0028",
+    "\u0029",
+    "\u002a",
+    "\u002b",
+    "\u002c",
+    "\u002d",
+    "\u002e",
+    "\u002f",  # 40 - 47
+    "\u0030",
+    "\u0031",
+    "\u0032",
+    "\u0033",
+    "\u0034",
+    "\u0035",
+    "\u0036",
+    "\u0037",  # 48 - 55
+    "\u0038",
+    "\u0039",
+    "\u003a",
+    "\u003b",
+    "\u003c",
+    "\u003d",
+    "\u003e",
+    "\u003f",  # 56 - 63
+    "\u0040",
+    "\u0041",
+    "\u0042",
+    "\u0043",
+    "\u0044",
+    "\u0045",
+    "\u0046",
+    "\u0047",  # 64 - 71
+    "\u0048",
+    "\u0049",
+    "\u004a",
+    "\u004b",
+    "\u004c",
+    "\u004d",
+    "\u004e",
+    "\u004f",  # 72 - 79
+    "\u0050",
+    "\u0051",
+    "\u0052",
+    "\u0053",
+    "\u0054",
+    "\u0055",
+    "\u0056",
+    "\u0057",  # 80 - 87
+    "\u0058",
+    "\u0059",
+    "\u005a",
+    "\u005b",
+    "\u005c",
+    "\u005d",
+    "\u005e",
+    "\u005f",  # 88 - 95
+    "\u0060",
+    "\u0061",
+    "\u0062",
+    "\u0063",
+    "\u0064",
+    "\u0065",
+    "\u0066",
+    "\u0067",  # 96 - 103
+    "\u0068",
+    "\u0069",
+    "\u006a",
+    "\u006b",
+    "\u006c",
+    "\u006d",
+    "\u006e",
+    "\u006f",  # 104 - 111
+    "\u0070",
+    "\u0071",
+    "\u0072",
+    "\u0073",
+    "\u0074",
+    "\u0075",
+    "\u0076",
+    "\u0077",  # 112 - 119
+    "\u0078",
+    "\u0079",
+    "\u007a",
+    "\u007b",
+    "\u007c",
+    "\u007d",
+    "\u007e",
+    "\u0000",  # 120 - 127
+    "\u2022",
+    "\u2020",
+    "\u2021",
+    "\u2026",
+    "\u2014",
+    "\u2013",
+    "\u0192",
+    "\u2044",  # 128 - 135
+    "\u2039",
+    "\u203a",
+    "\u2212",
+    "\u2030",
+    "\u201e",
+    "\u201c",
+    "\u201d",
+    "\u2018",  # 136 - 143
+    "\u2019",
+    "\u201a",
+    "\u2122",
+    "\ufb01",
+    "\ufb02",
+    "\u0141",
+    "\u0152",
+    "\u0160",  # 144 - 151
+    "\u0178",
+    "\u017d",
+    "\u0131",
+    "\u0142",
+    "\u0153",
+    "\u0161",
+    "\u017e",
+    "\u0000",  # 152 - 159
+    "\u20ac",
+    "\u00a1",
+    "\u00a2",
+    "\u00a3",
+    "\u00a4",
+    "\u00a5",
+    "\u00a6",
+    "\u00a7",  # 160 - 167
+    "\u00a8",
+    "\u00a9",
+    "\u00aa",
+    "\u00ab",
+    "\u00ac",
+    "\u0000",
+    "\u00ae",
+    "\u00af",  # 168 - 175
+    "\u00b0",
+    "\u00b1",
+    "\u00b2",
+    "\u00b3",
+    "\u00b4",
+    "\u00b5",
+    "\u00b6",
+    "\u00b7",  # 176 - 183
+    "\u00b8",
+    "\u00b9",
+    "\u00ba",
+    "\u00bb",
+    "\u00bc",
+    "\u00bd",
+    "\u00be",
+    "\u00bf",  # 184 - 191
+    "\u00c0",
+    "\u00c1",
+    "\u00c2",
+    "\u00c3",
+    "\u00c4",
+    "\u00c5",
+    "\u00c6",
+    "\u00c7",  # 192 - 199
+    "\u00c8",
+    "\u00c9",
+    "\u00ca",
+    "\u00cb",
+    "\u00cc",
+    "\u00cd",
+    "\u00ce",
+    "\u00cf",  # 200 - 207
+    "\u00d0",
+    "\u00d1",
+    "\u00d2",
+    "\u00d3",
+    "\u00d4",
+    "\u00d5",
+    "\u00d6",
+    "\u00d7",  # 208 - 215
+    "\u00d8",
+    "\u00d9",
+    "\u00da",
+    "\u00db",
+    "\u00dc",
+    "\u00dd",
+    "\u00de",
+    "\u00df",  # 216 - 223
+    "\u00e0",
+    "\u00e1",
+    "\u00e2",
+    "\u00e3",
+    "\u00e4",
+    "\u00e5",
+    "\u00e6",
+    "\u00e7",  # 224 - 231
+    "\u00e8",
+    "\u00e9",
+    "\u00ea",
+    "\u00eb",
+    "\u00ec",
+    "\u00ed",
+    "\u00ee",
+    "\u00ef",  # 232 - 239
+    "\u00f0",
+    "\u00f1",
+    "\u00f2",
+    "\u00f3",
+    "\u00f4",
+    "\u00f5",
+    "\u00f6",
+    "\u00f7",  # 240 - 247
+    "\u00f8",
+    "\u00f9",
+    "\u00fa",
+    "\u00fb",
+    "\u00fc",
+    "\u00fd",
+    "\u00fe",
+    "\u00ff",  # 248 - 255
+]
+
+assert len(_pdfdoc_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/std.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/std.py
@@ -0,0 +1,258 @@
+_std_encoding = [
+    "\x00",
+    "\x01",
+    "\x02",
+    "\x03",
+    "\x04",
+    "\x05",
+    "\x06",
+    "\x07",
+    "\x08",
+    "\t",
+    "\n",
+    "\x0b",
+    "\x0c",
+    "\r",
+    "\x0e",
+    "\x0f",
+    "\x10",
+    "\x11",
+    "\x12",
+    "\x13",
+    "\x14",
+    "\x15",
+    "\x16",
+    "\x17",
+    "\x18",
+    "\x19",
+    "\x1a",
+    "\x1b",
+    "\x1c",
+    "\x1d",
+    "\x1e",
+    "\x1f",
+    " ",
+    "!",
+    '"',
+    "#",
+    "$",
+    "%",
+    "&",
+    "’",
+    "(",
+    ")",
+    "*",
+    "+",
+    ",",
+    "-",
+    ".",
+    "/",
+    "0",
+    "1",
+    "2",
+    "3",
+    "4",
+    "5",
+    "6",
+    "7",
+    "8",
+    "9",
+    ":",
+    ";",
+    "<",
+    "=",
+    ">",
+    "?",
+    "@",
+    "A",
+    "B",
+    "C",
+    "D",
+    "E",
+    "F",
+    "G",
+    "H",
+    "I",
+    "J",
+    "K",
+    "L",
+    "M",
+    "N",
+    "O",
+    "P",
+    "Q",
+    "R",
+    "S",
+    "T",
+    "U",
+    "V",
+    "W",
+    "X",
+    "Y",
+    "Z",
+    "[",
+    "\\",
+    "]",
+    "^",
+    "_",
+    "‘",
+    "a",
+    "b",
+    "c",
+    "d",
+    "e",
+    "f",
+    "g",
+    "h",
+    "i",
+    "j",
+    "k",
+    "l",
+    "m",
+    "n",
+    "o",
+    "p",
+    "q",
+    "r",
+    "s",
+    "t",
+    "u",
+    "v",
+    "w",
+    "x",
+    "y",
+    "z",
+    "{",
+    "|",
+    "}",
+    "~",
+    "\x7f",
+    "\x80",
+    "\x81",
+    "\x82",
+    "\x83",
+    "\x84",
+    "\x85",
+    "\x86",
+    "\x87",
+    "\x88",
+    "\x89",
+    "\x8a",
+    "\x8b",
+    "\x8c",
+    "\x8d",
+    "\x8e",
+    "\x8f",
+    "\x90",
+    "\x91",
+    "\x92",
+    "\x93",
+    "\x94",
+    "\x95",
+    "\x96",
+    "\x97",
+    "\x98",
+    "\x99",
+    "\x9a",
+    "\x9b",
+    "\x9c",
+    "\x9d",
+    "\x9e",
+    "\x9f",
+    "\xa0",
+    "¡",
+    "¢",
+    "£",
+    "⁄",
+    "¥",
+    "ƒ",
+    "§",
+    "¤",
+    "'",
+    "“",
+    "«",
+    "‹",
+    "›",
+    "ﬁ",
+    "ﬂ",
+    "°",
+    "–",
+    "†",
+    "‡",
+    "·",
+    "µ",
+    "¶",
+    "•",
+    "‚",
+    "„",
+    "”",
+    "»",
+    "…",
+    "‰",
+    "¾",
+    "¿",
+    "À",
+    "`",
+    "´",
+    "ˆ",
+    "˜",
+    "¯",
+    "˘",
+    "˙",
+    "¨",
+    "É",
+    "˚",
+    "¸",
+    "Ì",
+    "˝",
+    "˛",
+    "ˇ",
+    "—",
+    "Ñ",
+    "Ò",
+    "Ó",
+    "Ô",
+    "Õ",
+    "Ö",
+    "×",
+    "Ø",
+    "Ù",
+    "Ú",
+    "Û",
+    "Ü",
+    "Ý",
+    "Þ",
+    "ß",
+    "à",
+    "Æ",
+    "â",
+    "ª",
+    "ä",
+    "å",
+    "æ",
+    "ç",
+    "Ł",
+    "Ø",
+    "Œ",
+    "º",
+    "ì",
+    "í",
+    "î",
+    "ï",
+    "ð",
+    "æ",
+    "ò",
+    "ó",
+    "ô",
+    "ı",
+    "ö",
+    "÷",
+    "ł",
+    "ø",
+    "œ",
+    "ß",
+    "ü",
+    "ý",
+    "þ",
+    "ÿ",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/symbol.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/symbol.py
@@ -0,0 +1,260 @@
+# manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
+_symbol_encoding = [
+    "\u0000",
+    "\u0001",
+    "\u0002",
+    "\u0003",
+    "\u0004",
+    "\u0005",
+    "\u0006",
+    "\u0007",
+    "\u0008",
+    "\u0009",
+    "\u000A",
+    "\u000B",
+    "\u000C",
+    "\u000D",
+    "\u000E",
+    "\u000F",
+    "\u0010",
+    "\u0011",
+    "\u0012",
+    "\u0013",
+    "\u0014",
+    "\u0015",
+    "\u0016",
+    "\u0017",
+    "\u0018",
+    "\u0019",
+    "\u001A",
+    "\u001B",
+    "\u001C",
+    "\u001D",
+    "\u001E",
+    "\u001F",
+    "\u0020",
+    "\u0021",
+    "\u2200",
+    "\u0023",
+    "\u2203",
+    "\u0025",
+    "\u0026",
+    "\u220B",
+    "\u0028",
+    "\u0029",
+    "\u2217",
+    "\u002B",
+    "\u002C",
+    "\u2212",
+    "\u002E",
+    "\u002F",
+    "\u0030",
+    "\u0031",
+    "\u0032",
+    "\u0033",
+    "\u0034",
+    "\u0035",
+    "\u0036",
+    "\u0037",
+    "\u0038",
+    "\u0039",
+    "\u003A",
+    "\u003B",
+    "\u003C",
+    "\u003D",
+    "\u003E",
+    "\u003F",
+    "\u2245",
+    "\u0391",
+    "\u0392",
+    "\u03A7",
+    "\u0394",
+    "\u0395",
+    "\u03A6",
+    "\u0393",
+    "\u0397",
+    "\u0399",
+    "\u03D1",
+    "\u039A",
+    "\u039B",
+    "\u039C",
+    "\u039D",
+    "\u039F",
+    "\u03A0",
+    "\u0398",
+    "\u03A1",
+    "\u03A3",
+    "\u03A4",
+    "\u03A5",
+    "\u03C2",
+    "\u03A9",
+    "\u039E",
+    "\u03A8",
+    "\u0396",
+    "\u005B",
+    "\u2234",
+    "\u005D",
+    "\u22A5",
+    "\u005F",
+    "\uF8E5",
+    "\u03B1",
+    "\u03B2",
+    "\u03C7",
+    "\u03B4",
+    "\u03B5",
+    "\u03C6",
+    "\u03B3",
+    "\u03B7",
+    "\u03B9",
+    "\u03D5",
+    "\u03BA",
+    "\u03BB",
+    "\u00B5",
+    "\u03BD",
+    "\u03BF",
+    "\u03C0",
+    "\u03B8",
+    "\u03C1",
+    "\u03C3",
+    "\u03C4",
+    "\u03C5",
+    "\u03D6",
+    "\u03C9",
+    "\u03BE",
+    "\u03C8",
+    "\u03B6",
+    "\u007B",
+    "\u007C",
+    "\u007D",
+    "\u223C",
+    "\u007F",
+    "\u0080",
+    "\u0081",
+    "\u0082",
+    "\u0083",
+    "\u0084",
+    "\u0085",
+    "\u0086",
+    "\u0087",
+    "\u0088",
+    "\u0089",
+    "\u008A",
+    "\u008B",
+    "\u008C",
+    "\u008D",
+    "\u008E",
+    "\u008F",
+    "\u0090",
+    "\u0091",
+    "\u0092",
+    "\u0093",
+    "\u0094",
+    "\u0095",
+    "\u0096",
+    "\u0097",
+    "\u0098",
+    "\u0099",
+    "\u009A",
+    "\u009B",
+    "\u009C",
+    "\u009D",
+    "\u009E",
+    "\u009F",
+    "\u20AC",
+    "\u03D2",
+    "\u2032",
+    "\u2264",
+    "\u2044",
+    "\u221E",
+    "\u0192",
+    "\u2663",
+    "\u2666",
+    "\u2665",
+    "\u2660",
+    "\u2194",
+    "\u2190",
+    "\u2191",
+    "\u2192",
+    "\u2193",
+    "\u00B0",
+    "\u00B1",
+    "\u2033",
+    "\u2265",
+    "\u00D7",
+    "\u221D",
+    "\u2202",
+    "\u2022",
+    "\u00F7",
+    "\u2260",
+    "\u2261",
+    "\u2248",
+    "\u2026",
+    "\uF8E6",
+    "\uF8E7",
+    "\u21B5",
+    "\u2135",
+    "\u2111",
+    "\u211C",
+    "\u2118",
+    "\u2297",
+    "\u2295",
+    "\u2205",
+    "\u2229",
+    "\u222A",
+    "\u2283",
+    "\u2287",
+    "\u2284",
+    "\u2282",
+    "\u2286",
+    "\u2208",
+    "\u2209",
+    "\u2220",
+    "\u2207",
+    "\uF6DA",
+    "\uF6D9",
+    "\uF6DB",
+    "\u220F",
+    "\u221A",
+    "\u22C5",
+    "\u00AC",
+    "\u2227",
+    "\u2228",
+    "\u21D4",
+    "\u21D0",
+    "\u21D1",
+    "\u21D2",
+    "\u21D3",
+    "\u25CA",
+    "\u2329",
+    "\uF8E8",
+    "\uF8E9",
+    "\uF8EA",
+    "\u2211",
+    "\uF8EB",
+    "\uF8EC",
+    "\uF8ED",
+    "\uF8EE",
+    "\uF8EF",
+    "\uF8F0",
+    "\uF8F1",
+    "\uF8F2",
+    "\uF8F3",
+    "\uF8F4",
+    "\u00F0",
+    "\u232A",
+    "\u222B",
+    "\u2320",
+    "\uF8F5",
+    "\u2321",
+    "\uF8F6",
+    "\uF8F7",
+    "\uF8F8",
+    "\uF8F9",
+    "\uF8FA",
+    "\uF8FB",
+    "\uF8FC",
+    "\uF8FD",
+    "\uF8FE",
+    "\u00FF",
+]
+assert len(_symbol_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_codecs/zapfding.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_codecs/zapfding.py
@@ -0,0 +1,261 @@
+#  manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt
+
+_zapfding_encoding = [
+    "\u0000",
+    "\u0001",
+    "\u0002",
+    "\u0003",
+    "\u0004",
+    "\u0005",
+    "\u0006",
+    "\u0007",
+    "\u0008",
+    "\u0009",
+    "\u000A",
+    "\u000B",
+    "\u000C",
+    "\u000D",
+    "\u000E",
+    "\u000F",
+    "\u0010",
+    "\u0011",
+    "\u0012",
+    "\u0013",
+    "\u0014",
+    "\u0015",
+    "\u0016",
+    "\u0017",
+    "\u0018",
+    "\u0019",
+    "\u001A",
+    "\u001B",
+    "\u001C",
+    "\u001D",
+    "\u001E",
+    "\u001F",
+    "\u0020",
+    "\u2701",
+    "\u2702",
+    "\u2703",
+    "\u2704",
+    "\u260E",
+    "\u2706",
+    "\u2707",
+    "\u2708",
+    "\u2709",
+    "\u261B",
+    "\u261E",
+    "\u270C",
+    "\u270D",
+    "\u270E",
+    "\u270F",
+    "\u2710",
+    "\u2711",
+    "\u2712",
+    "\u2713",
+    "\u2714",
+    "\u2715",
+    "\u2716",
+    "\u2717",
+    "\u2718",
+    "\u2719",
+    "\u271A",
+    "\u271B",
+    "\u271C",
+    "\u271D",
+    "\u271E",
+    "\u271F",
+    "\u2720",
+    "\u2721",
+    "\u2722",
+    "\u2723",
+    "\u2724",
+    "\u2725",
+    "\u2726",
+    "\u2727",
+    "\u2605",
+    "\u2729",
+    "\u272A",
+    "\u272B",
+    "\u272C",
+    "\u272D",
+    "\u272E",
+    "\u272F",
+    "\u2730",
+    "\u2731",
+    "\u2732",
+    "\u2733",
+    "\u2734",
+    "\u2735",
+    "\u2736",
+    "\u2737",
+    "\u2738",
+    "\u2739",
+    "\u273A",
+    "\u273B",
+    "\u273C",
+    "\u273D",
+    "\u273E",
+    "\u273F",
+    "\u2740",
+    "\u2741",
+    "\u2742",
+    "\u2743",
+    "\u2744",
+    "\u2745",
+    "\u2746",
+    "\u2747",
+    "\u2748",
+    "\u2749",
+    "\u274A",
+    "\u274B",
+    "\u25CF",
+    "\u274D",
+    "\u25A0",
+    "\u274F",
+    "\u2750",
+    "\u2751",
+    "\u2752",
+    "\u25B2",
+    "\u25BC",
+    "\u25C6",
+    "\u2756",
+    "\u25D7",
+    "\u2758",
+    "\u2759",
+    "\u275A",
+    "\u275B",
+    "\u275C",
+    "\u275D",
+    "\u275E",
+    "\u007F",
+    "\uF8D7",
+    "\uF8D8",
+    "\uF8D9",
+    "\uF8DA",
+    "\uF8DB",
+    "\uF8DC",
+    "\uF8DD",
+    "\uF8DE",
+    "\uF8DF",
+    "\uF8E0",
+    "\uF8E1",
+    "\uF8E2",
+    "\uF8E3",
+    "\uF8E4",
+    "\u008E",
+    "\u008F",
+    "\u0090",
+    "\u0091",
+    "\u0092",
+    "\u0093",
+    "\u0094",
+    "\u0095",
+    "\u0096",
+    "\u0097",
+    "\u0098",
+    "\u0099",
+    "\u009A",
+    "\u009B",
+    "\u009C",
+    "\u009D",
+    "\u009E",
+    "\u009F",
+    "\u00A0",
+    "\u2761",
+    "\u2762",
+    "\u2763",
+    "\u2764",
+    "\u2765",
+    "\u2766",
+    "\u2767",
+    "\u2663",
+    "\u2666",
+    "\u2665",
+    "\u2660",
+    "\u2460",
+    "\u2461",
+    "\u2462",
+    "\u2463",
+    "\u2464",
+    "\u2465",
+    "\u2466",
+    "\u2467",
+    "\u2468",
+    "\u2469",
+    "\u2776",
+    "\u2777",
+    "\u2778",
+    "\u2779",
+    "\u277A",
+    "\u277B",
+    "\u277C",
+    "\u277D",
+    "\u277E",
+    "\u277F",
+    "\u2780",
+    "\u2781",
+    "\u2782",
+    "\u2783",
+    "\u2784",
+    "\u2785",
+    "\u2786",
+    "\u2787",
+    "\u2788",
+    "\u2789",
+    "\u278A",
+    "\u278B",
+    "\u278C",
+    "\u278D",
+    "\u278E",
+    "\u278F",
+    "\u2790",
+    "\u2791",
+    "\u2792",
+    "\u2793",
+    "\u2794",
+    "\u2192",
+    "\u2194",
+    "\u2195",
+    "\u2798",
+    "\u2799",
+    "\u279A",
+    "\u279B",
+    "\u279C",
+    "\u279D",
+    "\u279E",
+    "\u279F",
+    "\u27A0",
+    "\u27A1",
+    "\u27A2",
+    "\u27A3",
+    "\u27A4",
+    "\u27A5",
+    "\u27A6",
+    "\u27A7",
+    "\u27A8",
+    "\u27A9",
+    "\u27AA",
+    "\u27AB",
+    "\u27AC",
+    "\u27AD",
+    "\u27AE",
+    "\u27AF",
+    "\u00F0",
+    "\u27B1",
+    "\u27B2",
+    "\u27B3",
+    "\u27B4",
+    "\u27B5",
+    "\u27B6",
+    "\u27B7",
+    "\u27B8",
+    "\u27B9",
+    "\u27BA",
+    "\u27BB",
+    "\u27BC",
+    "\u27BD",
+    "\u27BE",
+    "\u00FF",
+]
+assert len(_zapfding_encoding) == 256
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/init.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from pypdf._crypt_providers._base import CryptBase, CryptIdentity
+
+try:
+    from pypdf._crypt_providers._cryptography import (
+        CryptAES,
+        CryptRC4,
+        aes_cbc_decrypt,
+        aes_cbc_encrypt,
+        aes_ecb_decrypt,
+        aes_ecb_encrypt,
+        crypt_provider,
+        rc4_decrypt,
+        rc4_encrypt,
+    )
+    from pypdf._utils import Version
+
+    if Version(crypt_provider[1]) <= Version("3.0"):
+        # This is due to the backend parameter being required back then:
+        # https://cryptography.io/en/latest/changelog/#v3-1
+        raise ImportError("cryptography<=3.0 is not supported")  # pragma: no cover
+except ImportError:
+    try:
+        from pypdf._crypt_providers._pycryptodome import (  # type: ignore
+            CryptAES,
+            CryptRC4,
+            aes_cbc_decrypt,
+            aes_cbc_encrypt,
+            aes_ecb_decrypt,
+            aes_ecb_encrypt,
+            crypt_provider,
+            rc4_decrypt,
+            rc4_encrypt,
+        )
+    except ImportError:
+        from pypdf._crypt_providers._fallback import (  # type: ignore
+            CryptAES,
+            CryptRC4,
+            aes_cbc_decrypt,
+            aes_cbc_encrypt,
+            aes_ecb_decrypt,
+            aes_ecb_encrypt,
+            crypt_provider,
+            rc4_decrypt,
+            rc4_encrypt,
+        )
+
+__all__ = [
+    "CryptAES",
+    "CryptBase",
+    "CryptIdentity",
+    "CryptRC4",
+    "aes_cbc_decrypt",
+    "aes_cbc_encrypt",
+    "aes_ecb_decrypt",
+    "aes_ecb_encrypt",
+    "crypt_provider",
+    "rc4_decrypt",
+    "rc4_encrypt",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_base.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+class CryptBase:
+    def encrypt(self, data: bytes) -> bytes:  # pragma: no cover
+        return data
+
+    def decrypt(self, data: bytes) -> bytes:  # pragma: no cover
+        return data
+
+
+class CryptIdentity(CryptBase):
+    pass
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_cryptography.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_cryptography.py
@@ -0,0 +1,118 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import secrets
+
+from cryptography import __version__
+from cryptography.hazmat.primitives import padding
+from cryptography.hazmat.primitives.ciphers.algorithms import AES
+
+try:
+    # 43.0.0 - https://cryptography.io/en/latest/changelog/#v43-0-0
+    from cryptography.hazmat.decrepit.ciphers.algorithms import ARC4
+except ImportError:
+    from cryptography.hazmat.primitives.ciphers.algorithms import ARC4
+from cryptography.hazmat.primitives.ciphers.base import Cipher
+from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB
+
+from pypdf._crypt_providers._base import CryptBase
+
+crypt_provider = ("cryptography", __version__)
+
+
+class CryptRC4(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.cipher = Cipher(ARC4(key), mode=None)
+
+    def encrypt(self, data: bytes) -> bytes:
+        encryptor = self.cipher.encryptor()
+        return encryptor.update(data) + encryptor.finalize()
+
+    def decrypt(self, data: bytes) -> bytes:
+        decryptor = self.cipher.decryptor()
+        return decryptor.update(data) + decryptor.finalize()
+
+
+class CryptAES(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.alg = AES(key)
+
+    def encrypt(self, data: bytes) -> bytes:
+        iv = secrets.token_bytes(16)
+        pad = padding.PKCS7(128).padder()
+        data = pad.update(data) + pad.finalize()
+
+        cipher = Cipher(self.alg, CBC(iv))
+        encryptor = cipher.encryptor()
+        return iv + encryptor.update(data) + encryptor.finalize()
+
+    def decrypt(self, data: bytes) -> bytes:
+        iv = data[:16]
+        data = data[16:]
+        # for empty encrypted data
+        if not data:
+            return data
+
+        # just for robustness, it does not happen under normal circumstances
+        if len(data) % 16 != 0:
+            pad = padding.PKCS7(128).padder()
+            data = pad.update(data) + pad.finalize()
+
+        cipher = Cipher(self.alg, CBC(iv))
+        decryptor = cipher.decryptor()
+        d = decryptor.update(data) + decryptor.finalize()
+        return d[: -d[-1]]
+
+
+def rc4_encrypt(key: bytes, data: bytes) -> bytes:
+    encryptor = Cipher(ARC4(key), mode=None).encryptor()
+    return encryptor.update(data) + encryptor.finalize()
+
+
+def rc4_decrypt(key: bytes, data: bytes) -> bytes:
+    decryptor = Cipher(ARC4(key), mode=None).decryptor()
+    return decryptor.update(data) + decryptor.finalize()
+
+
+def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
+    encryptor = Cipher(AES(key), mode=ECB()).encryptor()
+    return encryptor.update(data) + encryptor.finalize()
+
+
+def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
+    decryptor = Cipher(AES(key), mode=ECB()).decryptor()
+    return decryptor.update(data) + decryptor.finalize()
+
+
+def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    encryptor = Cipher(AES(key), mode=CBC(iv)).encryptor()
+    return encryptor.update(data) + encryptor.finalize()
+
+
+def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    decryptor = Cipher(AES(key), mode=CBC(iv)).decryptor()
+    return decryptor.update(data) + decryptor.finalize()
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_fallback.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_fallback.py
@@ -0,0 +1,93 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from pypdf._crypt_providers._base import CryptBase
+from pypdf.errors import DependencyError
+
+_DEPENDENCY_ERROR_STR = "cryptography>=3.1 is required for AES algorithm"
+
+
+crypt_provider = ("local_crypt_fallback", "0.0.0")
+
+
+class CryptRC4(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.s = bytearray(range(256))
+        j = 0
+        for i in range(256):
+            j = (j + self.s[i] + key[i % len(key)]) % 256
+            self.s[i], self.s[j] = self.s[j], self.s[i]
+
+    def encrypt(self, data: bytes) -> bytes:
+        s = bytearray(self.s)
+        out = [0 for _ in range(len(data))]
+        i, j = 0, 0
+        for k in range(len(data)):
+            i = (i + 1) % 256
+            j = (j + s[i]) % 256
+            s[i], s[j] = s[j], s[i]
+            x = s[(s[i] + s[j]) % 256]
+            out[k] = data[k] ^ x
+        return bytes(out)
+
+    def decrypt(self, data: bytes) -> bytes:
+        return self.encrypt(data)
+
+
+class CryptAES(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        pass
+
+    def encrypt(self, data: bytes) -> bytes:
+        raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+    def decrypt(self, data: bytes) -> bytes:
+        raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def rc4_encrypt(key: bytes, data: bytes) -> bytes:
+    return CryptRC4(key).encrypt(data)
+
+
+def rc4_decrypt(key: bytes, data: bytes) -> bytes:
+    return CryptRC4(key).decrypt(data)
+
+
+def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
+
+
+def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    raise DependencyError(_DEPENDENCY_ERROR_STR)
--- a/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_pycryptodome.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_crypt_providers/_pycryptodome.py
@@ -0,0 +1,97 @@
+# Copyright (c) 2023, exiledkingcc
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import secrets
+
+from Crypto import __version__
+from Crypto.Cipher import AES, ARC4
+from Crypto.Util.Padding import pad
+
+from pypdf._crypt_providers._base import CryptBase
+
+crypt_provider = ("pycryptodome", __version__)
+
+
+class CryptRC4(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.key = key
+
+    def encrypt(self, data: bytes) -> bytes:
+        return ARC4.ARC4Cipher(self.key).encrypt(data)
+
+    def decrypt(self, data: bytes) -> bytes:
+        return ARC4.ARC4Cipher(self.key).decrypt(data)
+
+
+class CryptAES(CryptBase):
+    def __init__(self, key: bytes) -> None:
+        self.key = key
+
+    def encrypt(self, data: bytes) -> bytes:
+        iv = secrets.token_bytes(16)
+        data = pad(data, 16)
+        aes = AES.new(self.key, AES.MODE_CBC, iv)
+        return iv + aes.encrypt(data)
+
+    def decrypt(self, data: bytes) -> bytes:
+        iv = data[:16]
+        data = data[16:]
+        # for empty encrypted data
+        if not data:
+            return data
+
+        # just for robustness, it does not happen under normal circumstances
+        if len(data) % 16 != 0:
+            data = pad(data, 16)
+
+        aes = AES.new(self.key, AES.MODE_CBC, iv)
+        d = aes.decrypt(data)
+        return d[: -d[-1]]
+
+
+def rc4_encrypt(key: bytes, data: bytes) -> bytes:
+    return ARC4.ARC4Cipher(key).encrypt(data)
+
+
+def rc4_decrypt(key: bytes, data: bytes) -> bytes:
+    return ARC4.ARC4Cipher(key).decrypt(data)
+
+
+def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_ECB).encrypt(data)
+
+
+def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_ECB).decrypt(data)
+
+
+def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_CBC, iv).encrypt(data)
+
+
+def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
+    return AES.new(key, AES.MODE_CBC, iv).decrypt(data)
--- a/venv/lib/python3.12/site-packages/pypdf/_doc_common.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_doc_common.py
--- a/venv/lib/python3.12/site-packages/pypdf/_encryption.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_encryption.py
--- a/venv/lib/python3.12/site-packages/pypdf/_font.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_font.py
@@ -0,0 +1,327 @@
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+from typing import Any, Optional, Union, cast
+
+from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject
+
+from ._cmap import get_encoding
+from ._codecs.adobe_glyphs import adobe_glyphs
+from ._utils import logger_warning
+
+
+@dataclass(frozen=True)
+class FontDescriptor:
+    """
+    Represents the FontDescriptor dictionary as defined in the PDF specification.
+    This contains both descriptive and metric information.
+
+    The defaults are derived from the mean values of the 14 core fonts, rounded
+    to 100.
+    """
+
+    name: str = "Unknown"
+    family: str = "Unknown"
+    weight: str = "Unknown"
+
+    ascent: float = 700.0
+    descent: float = -200.0
+    cap_height: float = 600.0
+    x_height: float = 500.0
+    italic_angle: float = 0.0  # Non-italic
+    flags: int = 32  # Non-serif, non-symbolic, not fixed width
+    bbox: tuple[float, float, float, float] = field(default_factory=lambda: (-100.0, -200.0, 1000.0, 900.0))
+
+    character_widths: dict[str, int] = field(default_factory=lambda: {"default": 500})
+
+    @staticmethod
+    def _parse_font_descriptor(font_kwargs: dict[str, Any], font_descriptor_obj: DictionaryObject) -> dict[str, Any]:
+        font_descriptor_dict: DictionaryObject = (
+            font_descriptor_obj.get_object()
+            if isinstance(font_descriptor_obj, IndirectObject)
+            else font_descriptor_obj
+        )
+        for source_key, target_key in [
+            ("/FontName", "name"),
+            ("/FontFamily", "family"),
+            ("/FontWeight", "weight"),
+            ("/Ascent", "ascent"),
+            ("/Descent", "descent"),
+            ("/CapHeight", "cap_height"),
+            ("/XHeight", "x_height"),
+            ("/ItalicAngle", "italic_angle"),
+            ("/Flags", "flags"),
+            ("/FontBBox", "bbox")
+        ]:
+            if source_key in font_descriptor_dict:
+                font_kwargs[target_key] = font_descriptor_dict[source_key]
+        # Handle missing bbox gracefully - PDFs may have fonts without valid bounding boxes
+        if "bbox" in font_kwargs:
+            bbox_tuple = tuple(map(float, font_kwargs["bbox"]))
+            assert len(bbox_tuple) == 4, bbox_tuple
+            font_kwargs["bbox"] = bbox_tuple
+        return font_kwargs
+
+    @staticmethod
+    def _collect_tt_t1_character_widths(
+        pdf_font_dict: DictionaryObject,
+        char_map: dict[Any, Any],
+        encoding: Union[str, dict[int, str]],
+        current_widths: dict[str, int]
+    ) -> None:
+        """Parses a TrueType or Type1 font's /Widths array from a font dictionary and updates character widths"""
+        widths_array = cast(ArrayObject, pdf_font_dict["/Widths"])
+        first_char = pdf_font_dict.get("/FirstChar", 0)
+        if not isinstance(encoding, str):
+            # This means that encoding is a dict
+            current_widths.update({
+                encoding.get(idx + first_char, chr(idx + first_char)): width
+                for idx, width in enumerate(widths_array)
+            })
+            return
+
+        # We map the character code directly to the character
+        # using the string encoding
+        for idx, width in enumerate(widths_array):
+            # Often "idx == 0" will denote the .notdef character, but we add it anyway
+            char_code = idx + first_char  # This is a raw code
+            # Get the "raw" character or byte representation
+            raw_char = bytes([char_code]).decode(encoding, "surrogatepass")
+            # Translate raw_char to the REAL Unicode character using the char_map
+            unicode_char = char_map.get(raw_char)
+            if unicode_char:
+                current_widths[unicode_char] = int(width)
+            else:
+                current_widths[raw_char] = int(width)
+
+    @staticmethod
+    def _collect_cid_character_widths(
+        d_font: DictionaryObject, char_map: dict[Any, Any], current_widths: dict[str, int]
+    ) -> None:
+        """Parses the /W array from a DescendantFont dictionary and updates character widths."""
+        ord_map = {
+            ord(_target): _surrogate
+            for _target, _surrogate in char_map.items()
+            if isinstance(_target, str)
+        }
+        # /W width definitions have two valid formats which can be mixed and matched:
+        #   (1) A character start index followed by a list of widths, e.g.
+        #       `45 [500 600 700]` applies widths 500, 600, 700 to characters 45-47.
+        #   (2) A character start index, a character stop index, and a width, e.g.
+        #       `45 65 500` applies width 500 to characters 45-65.
+        skip_count = 0
+        _w = d_font.get("/W", [])
+        for idx, w_entry in enumerate(_w):
+            w_entry = w_entry.get_object()
+            if skip_count:
+                skip_count -= 1
+                continue
+            if not isinstance(w_entry, (int, float)):
+                # We should never get here due to skip_count above. But
+                # sometimes we do.
+                logger_warning(f"Expected numeric value for width, got {w_entry}. Ignoring it.", __name__)
+                continue
+            # check for format (1): `int [int int int int ...]`
+            w_next_entry = _w[idx + 1].get_object()
+            if isinstance(w_next_entry, Sequence):
+                start_idx, width_list = w_entry, w_next_entry
+                current_widths.update(
+                    {
+                        ord_map[_cidx]: _width
+                        for _cidx, _width in zip(
+                            range(
+                                cast(int, start_idx),
+                                cast(int, start_idx) + len(width_list),
+                                1,
+                            ),
+                            width_list,
+                        )
+                        if _cidx in ord_map
+                    }
+                )
+                skip_count = 1
+            # check for format (2): `int int int`
+            elif isinstance(w_next_entry, (int, float)) and isinstance(
+                _w[idx + 2].get_object(), (int, float)
+            ):
+                start_idx, stop_idx, const_width = (
+                    w_entry,
+                    w_next_entry,
+                    _w[idx + 2].get_object(),
+                )
+                current_widths.update(
+                    {
+                        ord_map[_cidx]: const_width
+                        for _cidx in range(
+                            cast(int, start_idx), cast(int, stop_idx + 1), 1
+                        )
+                        if _cidx in ord_map
+                    }
+                )
+                skip_count = 2
+            else:
+                # This handles the case of out of bounds (reaching the end of the width definitions
+                # while expecting more elements).
+                logger_warning(
+                    f"Invalid font width definition. Last element: {w_entry}.",
+                    __name__
+                )
+
+    @staticmethod
+    def _add_default_width(current_widths: dict[str, int]) -> None:
+        if not current_widths:
+            current_widths["default"] = 500
+            return
+
+        if "default" in current_widths:
+            return
+
+        if " " in current_widths and current_widths[" "] != 0:
+            # Setting default to twice the space width
+            current_widths["default"] = int(2 * current_widths[" "])
+            return
+
+        # Use the average width of existing glyph widths
+        valid_widths = [w for w in current_widths.values() if w > 0]
+        current_widths["default"] = sum(valid_widths) // len(valid_widths) if valid_widths else 500
+
+    @classmethod
+    def from_font_resource(
+        cls,
+        pdf_font_dict: DictionaryObject,
+        encoding: Optional[Union[str, dict[int, str]]] = None,
+        char_map: Optional[dict[Any, Any]] = None
+    ) -> "FontDescriptor":
+        from pypdf._codecs.core_fontmetrics import CORE_FONT_METRICS  # noqa: PLC0415
+        # Prioritize information from the PDF font dictionary
+        font_name = pdf_font_dict.get("/BaseFont", "Unknown").removeprefix("/")
+        font_kwargs: dict[str, Any] = {"character_widths": {}}
+
+        # Deal with fonts by type; Type1, TrueType and certain Type3
+        if pdf_font_dict.get("/Subtype") in ("/Type1", "/MMType1", "/TrueType", "/Type3"):
+            if "/Widths" in pdf_font_dict:
+                if not (encoding and char_map):
+                    encoding, char_map = get_encoding(pdf_font_dict)
+                cls._collect_tt_t1_character_widths(
+                    pdf_font_dict, char_map, encoding, font_kwargs["character_widths"]
+                )
+            elif font_name in CORE_FONT_METRICS:
+                font_descriptor = CORE_FONT_METRICS[font_name]
+                cls._add_default_width(font_descriptor.character_widths)
+
+                return font_descriptor
+
+            if "/FontDescriptor" in pdf_font_dict:  # TODO: This does not account for some Type3 fonts;
+                                                    #       see tests/test_cmap.py::test_ascii_charset
+                font_descriptor_resource = pdf_font_dict.get("/FontDescriptor", DictionaryObject()).get_object()
+                font_descriptor_obj = cast(DictionaryObject, font_descriptor_resource)
+                if "/MissingWidth" in font_descriptor_obj:
+                    font_kwargs["character_widths"]["default"] = font_descriptor_obj["/MissingWidth"].get_object()
+                font_kwargs = cls._parse_font_descriptor(
+                    font_kwargs, pdf_font_dict.get("/FontDescriptor", DictionaryObject())
+                )
+            if "default" not in font_kwargs["character_widths"]:
+                cls._add_default_width(font_kwargs["character_widths"])
+
+            return cls(**font_kwargs)
+
+        # Composite font or CID font - CID fonts have a /W array mapping character codes
+        # to widths stashed in /DescendantFonts. No need to test for /DescendantFonts though,
+        # because all other fonts have already been dealt with.
+        if not (encoding and char_map):
+            encoding, char_map = get_encoding(pdf_font_dict)
+        d_font: DictionaryObject
+        for d_font_idx, d_font in enumerate(
+            cast(ArrayObject, pdf_font_dict["/DescendantFonts"])
+        ):
+            d_font = cast(DictionaryObject, d_font.get_object())
+            cast(ArrayObject, pdf_font_dict["/DescendantFonts"])[d_font_idx] = d_font
+            cls._collect_cid_character_widths(
+                d_font, char_map, font_kwargs["character_widths"]
+            )
+            if "/DW" in d_font:
+                font_kwargs["character_widths"]["default"] = d_font["/DW"].get_object()
+            else:
+                cls._add_default_width(font_kwargs["character_widths"])
+            font_kwargs = cls._parse_font_descriptor(
+                font_kwargs, d_font.get("/FontDescriptor", DictionaryObject())
+            )
+
+        return cls(**font_kwargs)
+
+
+@dataclass
+class Font:
+    """
+    A font object for use during text extraction and for producing
+    text appearance streams.
+
+    Attributes:
+        name: Font name, derived from font["/BaseFont"]
+        character_map: The font's character map
+        encoding: Font encoding
+        sub_type: The font type, such as Type1, TrueType, or Type3.
+        font_descriptor: Font metrics, including a mapping of characters to widths
+        character_widths: A mapping of characters to widths
+        space_width: The width of a space, or an approximation
+        interpretable: Default True. If False, the font glyphs cannot
+            be translated to characters, e.g. Type3 fonts that do not define
+            a '/ToUnicode' mapping.
+
+    """
+
+    name: str
+    encoding: Union[str, dict[int, str]]
+    character_map: dict[Any, Any] = field(default_factory=dict)
+    sub_type: str = "Unknown"
+    font_descriptor: FontDescriptor = field(default_factory=FontDescriptor)
+    character_widths: dict[str, int] = field(default_factory=dict)
+    space_width: Union[float, int] = 250
+    interpretable: bool = True
+
+    @classmethod
+    def from_font_resource(
+        cls,
+        pdf_font_dict: DictionaryObject,
+    ) -> "Font":
+        # Can collect base_font, name and encoding directly from font resource
+        name = pdf_font_dict.get("/BaseFont", "Unknown").removeprefix("/")
+        sub_type = pdf_font_dict.get("/Subtype", "Unknown").removeprefix("/")
+        encoding, character_map = get_encoding(pdf_font_dict)
+
+        # Type3 fonts that do not specify a "/ToUnicode" mapping cannot be
+        # reliably converted into character codes unless all named chars
+        # in /CharProcs map to a standard adobe glyph. See §9.10.2 of the
+        # PDF 1.7 standard.
+        interpretable = True
+        if sub_type == "Type3" and "/ToUnicode" not in pdf_font_dict:
+            interpretable = all(
+                cname in adobe_glyphs
+                for cname in pdf_font_dict.get("/CharProcs") or []
+            )
+
+        if interpretable:
+            font_descriptor = FontDescriptor.from_font_resource(pdf_font_dict, encoding, character_map)
+        else:
+            font_descriptor = FontDescriptor()  # Save some overhead if font is not interpretable
+        character_widths = font_descriptor.character_widths
+
+        space_width = font_descriptor.character_widths.get(" ")
+        if not space_width or space_width == 0:
+            space_width = font_descriptor.character_widths["default"] // 2
+
+        return cls(
+            name=name,
+            sub_type=sub_type,
+            encoding=encoding,
+            font_descriptor=font_descriptor,
+            character_map=character_map,
+            character_widths=character_widths,
+            space_width=space_width,
+            interpretable=interpretable
+        )
+
+    def text_width(self, text: str = "") -> float:
+        """Sum of character widths specified in PDF font for the supplied text."""
+        return sum(
+            [self.character_widths.get(char, self.character_widths["default"]) for char in text], 0.0
+        )
--- a/venv/lib/python3.12/site-packages/pypdf/_page.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_page.py
--- a/venv/lib/python3.12/site-packages/pypdf/_page_labels.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_page_labels.py
@@ -0,0 +1,289 @@
+"""
+Page labels are shown by PDF viewers as "the page number".
+
+A page has a numeric index, starting at 0. Additionally, the page
+has a label. In the most simple case:
+
+    label = index + 1
+
+However, the title page and the table of contents might have Roman numerals as
+page labels. This makes things more complicated.
+
+Example 1
+---------
+
+>>> reader.root_object["/PageLabels"]["/Nums"]
+[0, IndirectObject(18, 0, 139929798197504),
+ 8, IndirectObject(19, 0, 139929798197504)]
+>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
+{'/S': '/r'}
+>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
+{'/S': '/D'}
+
+Example 2
+---------
+The following is a document with pages labeled
+i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...
+
+1 0 obj
+    << /Type /Catalog
+       /PageLabels << /Nums [
+                        0 << /S /r >>
+                        4 << /S /D >>
+                        7 << /S /D
+                             /P ( A- )
+                             /St 8
+                        >>
+                        % A number tree containing
+                        % three page label dictionaries
+                        ]
+                   >>
+    ...
+    >>
+endobj
+
+
+§12.4.2 PDF Specification 1.7 and 2.0
+=====================================
+
+Entries in a page label dictionary
+----------------------------------
+The /S key:
+D       Decimal Arabic numerals
+R       Uppercase Roman numerals
+r       Lowercase Roman numerals
+A       Uppercase letters (A to Z for the first 26 pages,
+                           AA to ZZ for the next 26, and so on)
+a       Lowercase letters (a to z for the first 26 pages,
+                           aa to zz for the next 26, and so on)
+"""
+
+from collections.abc import Iterator
+from typing import Optional, cast
+
+from ._protocols import PdfCommonDocProtocol
+from ._utils import logger_warning
+from .generic import (
+    ArrayObject,
+    DictionaryObject,
+    NullObject,
+    NumberObject,
+    is_null_or_none,
+)
+
+
+def number2uppercase_roman_numeral(num: int) -> str:
+    roman = [
+        (1000, "M"),
+        (900, "CM"),
+        (500, "D"),
+        (400, "CD"),
+        (100, "C"),
+        (90, "XC"),
+        (50, "L"),
+        (40, "XL"),
+        (10, "X"),
+        (9, "IX"),
+        (5, "V"),
+        (4, "IV"),
+        (1, "I"),
+    ]
+
+    def roman_num(num: int) -> Iterator[str]:
+        for decimal, roman_repr in roman:
+            x, _ = divmod(num, decimal)
+            yield roman_repr * x
+            num -= decimal * x
+            if num <= 0:
+                break
+
+    return "".join(list(roman_num(num)))
+
+
+def number2lowercase_roman_numeral(number: int) -> str:
+    return number2uppercase_roman_numeral(number).lower()
+
+
+def number2uppercase_letter(number: int) -> str:
+    if number <= 0:
+        raise ValueError("Expecting a positive number")
+    alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
+    rep = ""
+    while number > 0:
+        remainder = number % 26
+        if remainder == 0:
+            remainder = 26
+        rep = alphabet[remainder - 1] + rep
+        # update
+        number -= remainder
+        number = number // 26
+    return rep
+
+
+def number2lowercase_letter(number: int) -> str:
+    return number2uppercase_letter(number).lower()
+
+
+def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
+    # [Nums] shall be an array of the form
+    #   [ key_1 value_1 key_2 value_2 ... key_n value_n ]
+    # where each key_i is an integer and the corresponding
+    # value_i shall be the object associated with that key.
+    # The keys shall be sorted in numerical order,
+    # analogously to the arrangement of keys in a name tree
+    # as described in 7.9.6, "Name Trees."
+    nums = cast(ArrayObject, dictionary_object["/Nums"])
+    i = 0
+    value = None
+    start_index = 0
+    while i < len(nums):
+        start_index = nums[i]
+        value = nums[i + 1].get_object()
+        if i + 2 == len(nums):
+            break
+        if nums[i + 2] > index:
+            break
+        i += 2
+    m = {
+        None: lambda _: "",
+        "/D": lambda n: str(n),
+        "/R": number2uppercase_roman_numeral,
+        "/r": number2lowercase_roman_numeral,
+        "/A": number2uppercase_letter,
+        "/a": number2lowercase_letter,
+    }
+    # if /Nums array is not following the specification or if /Nums is empty
+    if not isinstance(value, dict):
+        return str(index + 1)  # Fallback
+    start = value.get("/St", 1)
+    prefix = value.get("/P", "")
+    return prefix + m[value.get("/S")](index - start_index + start)
+
+
+def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
+    """
+    See 7.9.7 "Number Trees".
+
+    Args:
+        reader: The PdfReader
+        index: The index of the page
+
+    Returns:
+        The label of the page, e.g. "iv" or "4".
+
+    """
+    root = cast(DictionaryObject, reader.root_object)
+    if "/PageLabels" not in root:
+        return str(index + 1)  # Fallback
+    number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
+    if "/Nums" in number_tree:
+        return get_label_from_nums(number_tree, index)
+    if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
+        # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
+        # Limit maximum depth.
+        level = 0
+        while level < 100:
+            kids = cast(list[DictionaryObject], number_tree["/Kids"])
+            for kid in kids:
+                # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
+                limits = cast(list[int], kid["/Limits"])
+                if limits[0] <= index <= limits[1]:
+                    if not is_null_or_none(kid.get("/Kids", None)):
+                        # Recursive definition.
+                        level += 1
+                        if level == 100:  # pragma: no cover
+                            raise NotImplementedError(
+                                "Too deep nesting is not supported."
+                            )
+                        number_tree = kid
+                        # Exit the inner `for` loop and continue at the next level with the
+                        # next iteration of the `while` loop.
+                        break
+                    return get_label_from_nums(kid, index)
+            else:
+                # When there are no kids, make sure to exit the `while` loop directly
+                # and continue with the fallback.
+                break
+
+    logger_warning(f"Could not reliably determine page label for {index}.", __name__)
+    return str(index + 1)  # Fallback if neither /Nums nor /Kids is in the number_tree
+
+
+def nums_insert(
+    key: NumberObject,
+    value: DictionaryObject,
+    nums: ArrayObject,
+) -> None:
+    """
+    Insert a key, value pair in a Nums array.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry
+        value: value of the entry
+        nums: Nums array to modify
+
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("A nums like array must have an even number of elements")
+
+    i = len(nums)
+    while i != 0 and key <= nums[i - 2]:
+        i = i - 2
+
+    if i < len(nums) and key == nums[i]:
+        nums[i + 1] = value
+    else:
+        nums.insert(i, key)
+        nums.insert(i + 1, value)
+
+
+def nums_clear_range(
+    key: NumberObject,
+    page_index_to: int,
+    nums: ArrayObject,
+) -> None:
+    """
+    Remove all entries in a number tree in a range after an entry.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry before the range
+        page_index_to: The page index of the upper limit of the range
+        nums: Nums array to modify
+
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("A nums like array must have an even number of elements")
+    if page_index_to < key:
+        raise ValueError("page_index_to must be greater or equal than key")
+
+    i = nums.index(key) + 2
+    while i < len(nums) and nums[i] <= page_index_to:
+        nums.pop(i)
+        nums.pop(i)
+
+
+def nums_next(
+    key: NumberObject,
+    nums: ArrayObject,
+) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]:
+    """
+    Return the (key, value) pair of the entry after the given one.
+
+    See 7.9.7 "Number Trees".
+
+    Args:
+        key: number key of the entry
+        nums: Nums array
+
+    """
+    if len(nums) % 2 != 0:
+        raise ValueError("A nums like array must have an even number of elements")
+
+    i = nums.index(key) + 2
+    if i < len(nums):
+        return (nums[i], nums[i + 1])
+    return (None, None)
--- a/venv/lib/python3.12/site-packages/pypdf/_protocols.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_protocols.py
@@ -0,0 +1,86 @@
+"""Helpers for working with PDF types."""
+
+from abc import abstractmethod
+from pathlib import Path
+from typing import IO, Any, Optional, Protocol, Union
+
+from ._utils import StrByteType, StreamType
+
+
+class PdfObjectProtocol(Protocol):
+    indirect_reference: Any
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Union[tuple[str, ...], list[str], None] = (),
+    ) -> Any:
+        ...  # pragma: no cover
+
+    def _reference_clone(self, clone: Any, pdf_dest: Any) -> Any:
+        ...  # pragma: no cover
+
+    def get_object(self) -> Optional["PdfObjectProtocol"]:
+        ...  # pragma: no cover
+
+    def hash_value(self) -> bytes:
+        ...  # pragma: no cover
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        ...  # pragma: no cover
+
+
+class XmpInformationProtocol(PdfObjectProtocol):
+    pass
+
+
+class PdfCommonDocProtocol(Protocol):
+    @property
+    def pdf_header(self) -> str:
+        ...  # pragma: no cover
+
+    @property
+    def pages(self) -> list[Any]:
+        ...  # pragma: no cover
+
+    @property
+    def root_object(self) -> PdfObjectProtocol:
+        ...  # pragma: no cover
+
+    def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
+        ...  # pragma: no cover
+
+    @property
+    def strict(self) -> bool:
+        ...  # pragma: no cover
+
+
+class PdfReaderProtocol(PdfCommonDocProtocol, Protocol):
+    @property
+    @abstractmethod
+    def xref(self) -> dict[int, dict[int, Any]]:
+        ...  # pragma: no cover
+
+    @property
+    @abstractmethod
+    def trailer(self) -> dict[str, Any]:
+        ...  # pragma: no cover
+
+
+class PdfWriterProtocol(PdfCommonDocProtocol, Protocol):
+    _objects: list[Any]
+    _id_translated: dict[int, dict[int, int]]
+
+    incremental: bool
+    _reader: Any  # PdfReader
+
+    @abstractmethod
+    def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]:
+        ...  # pragma: no cover
+
+    @abstractmethod
+    def _add_object(self, obj: Any) -> Any:
+        ...  # pragma: no cover
--- a/venv/lib/python3.12/site-packages/pypdf/_reader.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_reader.py
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/init.py
@@ -0,0 +1,245 @@
+"""
+Code related to text extraction.
+
+Some parts are still in _page.py. In doubt, they will stay there.
+"""
+
+import math
+from typing import Any, Callable, Optional, Union
+
+from .._font import Font
+from ..generic import DictionaryObject, TextStringObject, encode_pdfdocencoding
+
+CUSTOM_RTL_MIN: int = -1
+CUSTOM_RTL_MAX: int = -1
+CUSTOM_RTL_SPECIAL_CHARS: list[int] = []
+LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS: int = 5
+
+
+class OrientationNotFoundError(Exception):
+    pass
+
+
+def set_custom_rtl(
+    _min: Union[str, int, None] = None,
+    _max: Union[str, int, None] = None,
+    specials: Union[str, list[int], None] = None,
+) -> tuple[int, int, list[int]]:
+    """
+    Change the Right-To-Left and special characters custom parameters.
+
+    Args:
+        _min: The new minimum value for the range of custom characters that
+            will be written right to left.
+            If set to ``None``, the value will not be changed.
+            If set to an integer or string, it will be converted to its ASCII code.
+            The default value is -1, which sets no additional range to be converted.
+        _max: The new maximum value for the range of custom characters that will
+            be written right to left.
+            If set to ``None``, the value will not be changed.
+            If set to an integer or string, it will be converted to its ASCII code.
+            The default value is -1, which sets no additional range to be converted.
+        specials: The new list of special characters to be inserted in the
+            current insertion order.
+            If set to ``None``, the current value will not be changed.
+            If set to a string, it will be converted to a list of ASCII codes.
+            The default value is an empty list.
+
+    Returns:
+        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
+        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.
+
+    """
+    global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
+    if isinstance(_min, int):
+        CUSTOM_RTL_MIN = _min
+    elif isinstance(_min, str):
+        CUSTOM_RTL_MIN = ord(_min)
+    if isinstance(_max, int):
+        CUSTOM_RTL_MAX = _max
+    elif isinstance(_max, str):
+        CUSTOM_RTL_MAX = ord(_max)
+    if isinstance(specials, str):
+        CUSTOM_RTL_SPECIAL_CHARS = [ord(x) for x in specials]
+    elif isinstance(specials, list):
+        CUSTOM_RTL_SPECIAL_CHARS = specials
+    return CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
+
+
+def mult(m: list[float], n: list[float]) -> list[float]:
+    return [
+        m[0] * n[0] + m[1] * n[2],
+        m[0] * n[1] + m[1] * n[3],
+        m[2] * n[0] + m[3] * n[2],
+        m[2] * n[1] + m[3] * n[3],
+        m[4] * n[0] + m[5] * n[2] + n[4],
+        m[4] * n[1] + m[5] * n[3] + n[5],
+    ]
+
+
+def orient(m: list[float]) -> int:
+    if m[3] > 1e-6:
+        return 0
+    if m[3] < -1e-6:
+        return 180
+    if m[1] > 0:
+        return 90
+    return 270
+
+
+def crlf_space_check(
+    text: str,
+    cmtm_prev: tuple[list[float], list[float]],
+    cmtm_matrix: tuple[list[float], list[float]],
+    memo_cmtm: tuple[list[float], list[float]],
+    font_resource: Optional[DictionaryObject],
+    orientations: tuple[int, ...],
+    output: str,
+    font_size: float,
+    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
+    str_widths: float,
+    spacewidth: float,
+    str_height: float,
+) -> tuple[str, str, list[float], list[float]]:
+    cm_prev = cmtm_prev[0]
+    tm_prev = cmtm_prev[1]
+    cm_matrix = cmtm_matrix[0]
+    tm_matrix = cmtm_matrix[1]
+    memo_cm = memo_cmtm[0]
+    memo_tm = memo_cmtm[1]
+
+    m_prev = mult(tm_prev, cm_prev)
+    m = mult(tm_matrix, cm_matrix)
+    orientation = orient(m)
+    delta_x = m[4] - m_prev[4]
+    delta_y = m[5] - m_prev[5]
+    # Table 108 of the 1.7 reference ("Text positioning operators")
+    scale_prev_x = math.sqrt(tm_prev[0]**2 + tm_prev[1]**2)
+    scale_prev_y = math.sqrt(tm_prev[2]**2 + tm_prev[3]**2)
+    scale_y = math.sqrt(tm_matrix[2]**2 + tm_matrix[3]**2)
+    cm_prev = m
+
+    if orientation not in orientations:
+        raise OrientationNotFoundError
+    if orientation in (0, 180):
+        moved_height: float = delta_y
+        moved_width: float = delta_x
+    elif orientation in (90, 270):
+        moved_height = delta_x
+        moved_width = delta_y
+    try:
+        if abs(moved_height) > 0.8 * min(str_height * scale_prev_y, font_size * scale_y):
+            if (output + text)[-1] != "\n":
+                output += text + "\n"
+                if visitor_text is not None:
+                    visitor_text(
+                        text + "\n",
+                        memo_cm,
+                        memo_tm,
+                        font_resource,
+                        font_size,
+                    )
+                text = ""
+        elif (
+            (moved_width >= (spacewidth + str_widths) * scale_prev_x)
+            and (output + text)[-1] != " "
+        ):
+            text += " "
+    except Exception:
+        pass
+    tm_prev = tm_matrix.copy()
+    cm_prev = cm_matrix.copy()
+    return text, output, cm_prev, tm_prev
+
+
+def get_text_operands(
+    operands: list[Union[str, TextStringObject]],
+    cm_matrix: list[float],
+    tm_matrix: list[float],
+    font: Font,
+    orientations: tuple[int, ...]
+) -> tuple[str, bool]:
+    t: str = ""
+    is_str_operands = False
+    m = mult(tm_matrix, cm_matrix)
+    orientation = orient(m)
+    if orientation in orientations and len(operands) > 0:
+        if isinstance(operands[0], str):
+            t = operands[0]
+            is_str_operands = True
+        else:
+            t = ""
+            tt: bytes = (
+                encode_pdfdocencoding(operands[0])
+                if isinstance(operands[0], str)
+                else operands[0]
+            )
+            if isinstance(font.encoding, str):
+                try:
+                    t = tt.decode(font.encoding, "surrogatepass")  # apply str encoding
+                except Exception:
+                    # the data does not match the expectation,
+                    # we use the alternative ;
+                    # text extraction may not be good
+                    t = tt.decode(
+                        "utf-16-be" if font.encoding == "charmap" else "charmap",
+                        "surrogatepass",
+                    )  # apply str encoding
+            else:  # apply dict encoding
+                t = "".join(
+                    [font.encoding[x] if x in font.encoding else bytes((x,)).decode() for x in tt]
+                )
+    return (t, is_str_operands)
+
+
+def get_display_str(
+    text: str,
+    cm_matrix: list[float],
+    tm_matrix: list[float],
+    font_resource: Optional[DictionaryObject],
+    font: Font,
+    text_operands: str,
+    font_size: float,
+    rtl_dir: bool,
+    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]]
+) -> tuple[str, bool, float]:
+    # "\u0590 - \u08FF \uFB50 - \uFDFF"
+    widths: float = 0.0
+    for x in [font.character_map.get(x, x) for x in text_operands]:
+        # x can be a sequence of bytes ; ex: habibi.pdf
+        if len(x) == 1:
+            xx = ord(x)
+        else:
+            xx = 1
+        # fmt: off
+        if (
+            # cases where the current inserting order is kept
+            (xx <= 0x2F)                        # punctuations but...
+            or 0x3A <= xx <= 0x40               # numbers (x30-39)
+            or 0x2000 <= xx <= 0x206F           # upper punctuations..
+            or 0x20A0 <= xx <= 0x21FF           # but (numbers) indices/exponents
+            or xx in CUSTOM_RTL_SPECIAL_CHARS   # customized....
+        ):
+            text = x + text if rtl_dir else text + x
+        elif (  # right-to-left characters set
+            0x0590 <= xx <= 0x08FF
+            or 0xFB1D <= xx <= 0xFDFF
+            or 0xFE70 <= xx <= 0xFEFF
+            or CUSTOM_RTL_MIN <= xx <= CUSTOM_RTL_MAX
+        ):
+            if not rtl_dir:
+                rtl_dir = True
+                if visitor_text is not None:
+                    visitor_text(text, cm_matrix, tm_matrix, font_resource, font_size)
+                text = ""
+            text = x + text
+        else:  # left-to-right
+            if rtl_dir:
+                rtl_dir = False
+                if visitor_text is not None:
+                    visitor_text(text, cm_matrix, tm_matrix, font_resource, font_size)
+                text = ""
+            text = text + x
+        widths += font.space_width if x == " " else font.text_width(x)
+        # fmt: on
+    return text, rtl_dir, widths
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/init.py
@@ -0,0 +1,16 @@
+"""Layout mode text extraction extension for pypdf"""
+from ..._font import Font
+from ._fixed_width_page import (
+    fixed_char_width,
+    fixed_width_page,
+    text_show_operations,
+    y_coordinate_groups,
+)
+
+__all__ = [
+    "Font",
+    "fixed_char_width",
+    "fixed_width_page",
+    "text_show_operations",
+    "y_coordinate_groups",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
@@ -0,0 +1,400 @@
+"""Extract PDF text preserving the layout of the source PDF"""
+
+from collections.abc import Iterator
+from itertools import groupby
+from math import ceil
+from pathlib import Path
+from typing import Any, Literal, Optional, TypedDict
+
+from ..._font import Font
+from ..._utils import logger_warning
+from .. import LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
+from ._text_state_manager import TextStateManager
+from ._text_state_params import TextStateParams
+
+
+class BTGroup(TypedDict):
+    """
+    Dict describing a line of text rendered within a BT/ET operator pair.
+    If multiple text show operations render text on the same line, the text
+    will be combined into a single BTGroup dict.
+
+    Keys:
+        tx: x coordinate of first character in BTGroup
+        ty: y coordinate of first character in BTGroup
+        font_size: nominal font size
+        font_height: effective font height
+        text: rendered text
+        displaced_tx: x coordinate of last character in BTGroup
+        flip_sort: -1 if page is upside down, else 1
+    """
+
+    tx: float
+    ty: float
+    font_size: float
+    font_height: float
+    text: str
+    displaced_tx: float
+    flip_sort: Literal[-1, 1]
+
+
+def bt_group(tj_op: TextStateParams, rendered_text: str, dispaced_tx: float) -> BTGroup:
+    """
+    BTGroup constructed from a TextStateParams instance, rendered text, and
+    displaced tx value.
+
+    Args:
+        tj_op (TextStateParams): TextStateParams instance
+        rendered_text (str): rendered text
+        dispaced_tx (float): x coordinate of last character in BTGroup
+
+    """
+    return BTGroup(
+        tx=tj_op.tx,
+        ty=tj_op.ty,
+        font_size=tj_op.font_size,
+        font_height=tj_op.font_height,
+        text=rendered_text,
+        displaced_tx=dispaced_tx,
+        flip_sort=-1 if tj_op.flip_vertical else 1,
+    )
+
+
+def recurs_to_target_op(
+    ops: Iterator[tuple[list[Any], bytes]],
+    text_state_mgr: TextStateManager,
+    end_target: Literal[b"Q", b"ET"],
+    fonts: dict[str, Font],
+    strip_rotated: bool = True,
+) -> tuple[list[BTGroup], list[TextStateParams]]:
+    """
+    Recurse operators between BT/ET and/or q/Q operators managing the transform
+    stack and capturing text positioning and rendering data.
+
+    Args:
+        ops: iterator of operators in content stream
+        text_state_mgr: a TextStateManager instance
+        end_target: Either b"Q" (ends b"q" op) or b"ET" (ends b"BT" op)
+        fonts: font dictionary as returned by PageObject._layout_mode_fonts()
+
+    Returns:
+        tuple: list of BTGroup dicts + list of TextStateParams dataclass instances.
+
+    """
+    # 1 entry per line of text rendered within each BT/ET operation.
+    bt_groups: list[BTGroup] = []
+
+    # 1 entry per text show operator (Tj/TJ/'/")
+    tj_ops: list[TextStateParams] = []
+
+    if end_target == b"Q":
+        # add new q level. cm's added at this level will be popped at next b'Q'
+        text_state_mgr.add_q()
+
+    for operands, op in ops:
+        # The loop is broken by the end target, or exits normally when there are no more ops.
+        if op == end_target:
+            if op == b"Q":
+                text_state_mgr.remove_q()
+            if op == b"ET":
+                if not tj_ops:
+                    return bt_groups, tj_ops
+                _text = ""
+                bt_idx = 0  # idx of first tj in this bt group
+                last_displaced_tx = tj_ops[bt_idx].displaced_tx
+                last_ty = tj_ops[bt_idx].ty
+                for _idx, _tj in enumerate(
+                    tj_ops
+                ):  # ... build text from new Tj operators
+                    if strip_rotated and _tj.rotated:
+                        continue
+                    if not _tj.font.interpretable:  # generates warning
+                        continue
+                    # if the y position of the text is greater than the font height, assume
+                    # the text is on a new line and start a new group
+                    if abs(_tj.ty - last_ty) > _tj.font_height:
+                        if _text.strip():
+                            bt_groups.append(
+                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
+                            )
+                        bt_idx = _idx
+                        _text = ""
+
+                    # if the x position of the text is less than the last x position by
+                    # more than 5 spaces widths, assume the text order should be flipped
+                    # and start a new group
+                    if (
+                        last_displaced_tx - _tj.tx
+                        > _tj.space_tx * LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
+                    ):
+                        if _text.strip():
+                            bt_groups.append(
+                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
+                            )
+                        bt_idx = _idx
+                        last_displaced_tx = _tj.displaced_tx
+                        _text = ""
+
+                    # calculate excess x translation based on ending tx of previous Tj.
+                    # multiply by bool (_idx != bt_idx) to ensure spaces aren't double
+                    # applied to the first tj of a BTGroup in fixed_width_page().
+                    excess_tx = round(_tj.tx - last_displaced_tx, 3) * (_idx != bt_idx)
+                    # space_tx could be 0 if either Tz or font_size was 0 for this _tj.
+                    spaces = int(excess_tx // _tj.space_tx) if _tj.space_tx else 0
+                    new_text = f'{" " * spaces}{_tj.txt}'
+
+                    last_ty = _tj.ty
+                    _text = f"{_text}{new_text}"
+                    last_displaced_tx = _tj.displaced_tx
+                if _text:
+                    bt_groups.append(bt_group(tj_ops[bt_idx], _text, last_displaced_tx))
+                text_state_mgr.reset_tm()
+            break
+        if op == b"q":
+            bts, tjs = recurs_to_target_op(
+                ops, text_state_mgr, b"Q", fonts, strip_rotated
+            )
+            bt_groups.extend(bts)
+            tj_ops.extend(tjs)
+        elif op == b"cm":
+            text_state_mgr.add_cm(*operands)
+        elif op == b"BT":
+            bts, tjs = recurs_to_target_op(
+                ops, text_state_mgr, b"ET", fonts, strip_rotated
+            )
+            bt_groups.extend(bts)
+            tj_ops.extend(tjs)
+        elif op == b"Tj":
+            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
+        elif op == b"TJ":
+            _tj = text_state_mgr.text_state_params()
+            for tj_op in operands[0]:
+                if isinstance(tj_op, bytes):
+                    _tj = text_state_mgr.text_state_params(tj_op)
+                    tj_ops.append(_tj)
+                else:
+                    text_state_mgr.add_trm(_tj.displacement_matrix(td_offset=tj_op))
+        elif op == b"'":
+            text_state_mgr.reset_trm()
+            text_state_mgr.add_tm([0, -text_state_mgr.TL])
+            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
+        elif op == b'"':
+            text_state_mgr.reset_trm()
+            text_state_mgr.set_state_param(b"Tw", operands[0])
+            text_state_mgr.set_state_param(b"Tc", operands[1])
+            text_state_mgr.add_tm([0, -text_state_mgr.TL])
+            tj_ops.append(text_state_mgr.text_state_params(operands[2]))
+        elif op in (b"Td", b"Tm", b"TD", b"T*"):
+            text_state_mgr.reset_trm()
+            if op == b"Tm":
+                text_state_mgr.reset_tm()
+            elif op == b"TD":
+                text_state_mgr.set_state_param(b"TL", -operands[1])
+            elif op == b"T*":
+                operands = [0, -text_state_mgr.TL]
+            text_state_mgr.add_tm(operands)
+        elif op == b"Tf":
+            text_state_mgr.set_font(fonts[operands[0]], operands[1])
+        else:  # handle Tc, Tw, Tz, TL, and Ts operators
+            text_state_mgr.set_state_param(op, operands)
+    else:
+        logger_warning(
+            f"Unbalanced target operations, expected {end_target!r}.",
+            __name__,
+        )
+    return bt_groups, tj_ops
+
+
+def y_coordinate_groups(
+    bt_groups: list[BTGroup], debug_path: Optional[Path] = None
+) -> dict[int, list[BTGroup]]:
+    """
+    Group text operations by rendered y coordinate, i.e. the line number.
+
+    Args:
+        bt_groups: list of dicts as returned by text_show_operations()
+        debug_path (Path, optional): Path to a directory for saving debug output.
+
+    Returns:
+        Dict[int, List[BTGroup]]: dict of lists of text rendered by each BT operator
+            keyed by y coordinate
+
+    """
+    ty_groups = {
+        ty: sorted(grp, key=lambda x: x["tx"])
+        for ty, grp in groupby(
+            bt_groups, key=lambda bt_grp: int(bt_grp["ty"] * bt_grp["flip_sort"])
+        )
+    }
+    # combine groups whose y coordinates differ by less than the effective font height
+    # (accounts for mixed fonts and other minor oddities)
+    last_ty = next(iter(ty_groups))
+    last_txs = {int(_t["tx"]) for _t in ty_groups[last_ty] if _t["text"].strip()}
+    for ty in list(ty_groups)[1:]:
+        fsz = min(ty_groups[_y][0]["font_height"] for _y in (ty, last_ty))
+        txs = {int(_t["tx"]) for _t in ty_groups[ty] if _t["text"].strip()}
+        # prevent merge if both groups are rendering in the same x position.
+        no_text_overlap = not (txs & last_txs)
+        offset_less_than_font_height = abs(ty - last_ty) < fsz
+        if no_text_overlap and offset_less_than_font_height:
+            ty_groups[last_ty] = sorted(
+                ty_groups.pop(ty) + ty_groups[last_ty], key=lambda x: x["tx"]
+            )
+            last_txs |= txs
+        else:
+            last_ty = ty
+            last_txs = txs
+    if debug_path:  # pragma: no cover
+        import json  # noqa: PLC0415
+
+        debug_path.joinpath("bt_groups.json").write_text(
+            json.dumps(ty_groups, indent=2, default=str), "utf-8"
+        )
+    return ty_groups
+
+
+def text_show_operations(
+    ops: Iterator[tuple[list[Any], bytes]],
+    fonts: dict[str, Font],
+    strip_rotated: bool = True,
+    debug_path: Optional[Path] = None,
+) -> list[BTGroup]:
+    """
+    Extract text from BT/ET operator pairs.
+
+    Args:
+        ops (Iterator[Tuple[List, bytes]]): iterator of operators in content stream
+        fonts (Dict[str, Font]): font dictionary
+        strip_rotated: Removes text if rotated w.r.t. to the page. Defaults to True.
+        debug_path (Path, optional): Path to a directory for saving debug output.
+
+    Returns:
+        List[BTGroup]: list of dicts of text rendered by each BT operator
+
+    """
+    state_mgr = TextStateManager()  # transformation stack manager
+    bt_groups: list[BTGroup] = []  # BT operator dict
+    tj_ops: list[TextStateParams] = []  # Tj/TJ operator data
+    for operands, op in ops:
+        if op in (b"BT", b"q"):
+            bts, tjs = recurs_to_target_op(
+                ops, state_mgr, b"ET" if op == b"BT" else b"Q", fonts, strip_rotated
+            )
+            bt_groups.extend(bts)
+            tj_ops.extend(tjs)
+        elif op == b"Tf":
+            state_mgr.set_font(fonts[operands[0]], operands[1])
+        else:  # set Tc, Tw, Tz, TL, and Ts if required. ignores all other ops
+            state_mgr.set_state_param(op, operands)
+
+    if any(tj.rotated for tj in tj_ops):
+        if strip_rotated:
+            logger_warning(
+                "Rotated text discovered. Output will be incomplete.", __name__
+            )
+        else:
+            logger_warning(
+                "Rotated text discovered. Layout will be degraded.", __name__
+            )
+    if not all(tj.font.interpretable for tj in tj_ops):
+        logger_warning(
+            "PDF contains an uninterpretable font. Output will be incomplete.", __name__
+        )
+
+    # left align the data, i.e. decrement all tx values by min(tx)
+    min_x = min((x["tx"] for x in bt_groups), default=0.0)
+    bt_groups = [
+        dict(ogrp, tx=ogrp["tx"] - min_x, displaced_tx=ogrp["displaced_tx"] - min_x)  # type: ignore[misc]
+        for ogrp in sorted(
+            bt_groups, key=lambda x: (x["ty"] * x["flip_sort"], -x["tx"]), reverse=True
+        )
+    ]
+
+    if debug_path:  # pragma: no cover
+        import json  # noqa: PLC0415
+
+        debug_path.joinpath("bts.json").write_text(
+            json.dumps(bt_groups, indent=2, default=str), "utf-8"
+        )
+        debug_path.joinpath("tjs.json").write_text(
+            json.dumps(
+                tj_ops, indent=2, default=lambda x: getattr(x, "to_dict", str)(x)
+            ),
+            "utf-8",
+        )
+    return bt_groups
+
+
+def fixed_char_width(bt_groups: list[BTGroup], scale_weight: float = 1.25) -> float:
+    """
+    Calculate average character width weighted by the length of the rendered
+    text in each sample for conversion to fixed-width layout.
+
+    Args:
+        bt_groups (List[BTGroup]): List of dicts of text rendered by each
+            BT operator
+
+    Returns:
+        float: fixed character width
+
+    """
+    char_widths = []
+    for _bt in bt_groups:
+        _len = len(_bt["text"]) * scale_weight
+        char_widths.append(((_bt["displaced_tx"] - _bt["tx"]) / _len, _len))
+    return sum(_w * _l for _w, _l in char_widths) / sum(_l for _, _l in char_widths)
+
+
+def fixed_width_page(
+    ty_groups: dict[int, list[BTGroup]], char_width: float, space_vertically: bool, font_height_weight: float
+) -> str:
+    """
+    Generate page text from text operations grouped by rendered y coordinate.
+
+    Args:
+        ty_groups: dict of text show ops as returned by y_coordinate_groups()
+        char_width: fixed character width
+        space_vertically: include blank lines inferred from y distance + font height.
+        font_height_weight: multiplier for font height when calculating blank lines.
+
+    Returns:
+        str: page text in a fixed width format that closely adheres to the rendered
+            layout in the source pdf.
+
+    """
+    lines: list[str] = []
+    last_y_coord = 0
+    table = str.maketrans(dict.fromkeys(range(14, 32), " "))
+    for y_coord, line_data in ty_groups.items():
+        if space_vertically and lines:
+            fh = line_data[0]["font_height"]
+            blank_lines = 0 if fh == 0 else (
+                int(abs(y_coord - last_y_coord) / (fh * font_height_weight)) - 1
+            )
+            lines.extend([""] * blank_lines)
+
+        line_parts = []  # It uses a list to construct the line, avoiding string concatenation.
+        current_len = 0  # Track the size with int instead of len(str) overhead.
+        last_disp = 0.0
+        for bt_op in line_data:
+            tx = bt_op["tx"]
+            offset = int(tx // char_width)
+            needed_spaces = offset - current_len
+            if needed_spaces > 0 and ceil(last_disp) < int(tx):
+                padding = " " * needed_spaces
+                line_parts.append(padding)
+                current_len += needed_spaces
+
+            raw_text = bt_op["text"]
+            text = raw_text.translate(table)
+            line_parts.append(text)
+            current_len += len(text)
+            last_disp = bt_op["displaced_tx"]
+
+        full_line = "".join(line_parts).rstrip()
+        if full_line.strip() or (space_vertically and lines):
+            lines.append(full_line)
+
+        last_y_coord = y_coord
+
+    return "\n".join(lines)
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_manager.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_manager.py
@@ -0,0 +1,221 @@
+"""manage the PDF transform stack during "layout" mode text extraction"""
+
+from collections import ChainMap, Counter
+from collections import ChainMap as ChainMapType
+from collections import Counter as CounterType
+from collections.abc import MutableMapping
+from typing import Any, Union
+
+from ..._font import Font
+from ...errors import PdfReadError
+from .. import mult
+from ._text_state_params import TextStateParams
+
+TextStateManagerChainMapType = ChainMapType[Union[int, str], Union[float, bool]]
+TextStateManagerDictType = MutableMapping[Union[int, str], Union[float, bool]]
+
+
+class TextStateManager:
+    """
+    Tracks the current text state including cm/tm/trm transformation matrices.
+
+    Attributes:
+        transform_stack (ChainMap): ChainMap of cm/tm transformation matrices
+        q_queue (Counter[int]): Counter of q operators
+        q_depth (List[int]): list of q operator nesting levels
+        Tc (float): character spacing
+        Tw (float): word spacing
+        Tz (int): horizontal scaling
+        TL (float): leading
+        Ts (float): text rise
+        font (Font): font object
+        font_size (int | float): font size
+
+    """
+
+    def __init__(self) -> None:
+        self.transform_stack: TextStateManagerChainMapType = ChainMap(
+            self.new_transform()
+        )
+        self.q_queue: CounterType[int] = Counter()
+        self.q_depth = [0]
+        self.Tc: float = 0.0
+        self.Tw: float = 0.0
+        self.Tz: float = 100.0
+        self.TL: float = 0.0
+        self.Ts: float = 0.0
+        self.font_stack: list[tuple[Union[Font, None], Union[int, float]]] = []
+        self.font: Union[Font, None] = None
+        self.font_size: Union[int, float] = 0
+
+    def set_state_param(self, op: bytes, value: Union[float, list[Any]]) -> None:
+        """
+        Set a text state parameter. Supports Tc, Tz, Tw, TL, and Ts operators.
+
+        Args:
+            op: operator read from PDF stream as bytes. No action is taken
+                for unsupported operators (see supported operators above).
+            value (float | List[Any]): new parameter value. If a list,
+                value[0] is used.
+
+        """
+        if op not in [b"Tc", b"Tz", b"Tw", b"TL", b"Ts"]:
+            return
+        self.__setattr__(op.decode(), value[0] if isinstance(value, list) else value)
+
+    def set_font(self, font: Font, size: float) -> None:
+        """
+        Set the current font and font_size.
+
+        Args:
+            font (Font): a layout mode Font
+            size (float): font size
+
+        """
+        self.font = font
+        self.font_size = size
+
+    def text_state_params(self, value: Union[bytes, str] = "") -> TextStateParams:
+        """
+        Create a TextStateParams instance to display a text string. Type[bytes] values
+        will be decoded implicitly.
+
+        Args:
+            value (str | bytes): text to associate with the captured state.
+
+        Raises:
+            PdfReadError: if font not set (no Tf operator in incoming pdf content stream)
+
+        Returns:
+            TextStateParams: current text state parameters
+
+        """
+        if not isinstance(self.font, Font):
+            raise PdfReadError(
+                "font not set: is PDF missing a Tf operator?"
+            )  # pragma: no cover
+        if isinstance(value, bytes):
+            try:
+                if isinstance(self.font.encoding, str):
+                    txt = value.decode(self.font.encoding, "surrogatepass")
+                else:
+                    txt = "".join(
+                        self.font.encoding[x]
+                        if x in self.font.encoding
+                        else bytes((x,)).decode()
+                        for x in value
+                    )
+            except (UnicodeEncodeError, UnicodeDecodeError):
+                txt = value.decode("utf-8", "replace")
+            txt = "".join(
+                self.font.character_map.get(x, x) for x in txt
+            )
+        else:
+            txt = value
+        return TextStateParams(
+            txt,
+            self.font,
+            self.font_size,
+            self.Tc,
+            self.Tw,
+            self.Tz,
+            self.TL,
+            self.Ts,
+            self.effective_transform,
+        )
+
+    @staticmethod
+    def raw_transform(
+        _a: float = 1.0,
+        _b: float = 0.0,
+        _c: float = 0.0,
+        _d: float = 1.0,
+        _e: float = 0.0,
+        _f: float = 0.0,
+    ) -> dict[int, float]:
+        """Only a/b/c/d/e/f matrix params"""
+        return dict(zip(range(6), map(float, (_a, _b, _c, _d, _e, _f))))
+
+    @staticmethod
+    def new_transform(
+        _a: float = 1.0,
+        _b: float = 0.0,
+        _c: float = 0.0,
+        _d: float = 1.0,
+        _e: float = 0.0,
+        _f: float = 0.0,
+        is_text: bool = False,
+        is_render: bool = False,
+    ) -> TextStateManagerDictType:
+        """Standard a/b/c/d/e/f matrix params + 'is_text' and 'is_render' keys"""
+        result: Any = TextStateManager.raw_transform(_a, _b, _c, _d, _e, _f)
+        result.update({"is_text": is_text, "is_render": is_render})
+        return result
+
+    def reset_tm(self) -> TextStateManagerChainMapType:
+        """Clear all transforms from chainmap having is_text==True or is_render==True"""
+        while (
+            self.transform_stack.maps[0]["is_text"]
+            or self.transform_stack.maps[0]["is_render"]
+        ):
+            self.transform_stack = self.transform_stack.parents
+        return self.transform_stack
+
+    def reset_trm(self) -> TextStateManagerChainMapType:
+        """Clear all transforms from chainmap having is_render==True"""
+        while self.transform_stack.maps[0]["is_render"]:
+            self.transform_stack = self.transform_stack.parents
+        return self.transform_stack
+
+    def remove_q(self) -> TextStateManagerChainMapType:
+        """Rewind to stack prior state after closing a 'q' with internal 'cm' ops"""
+        self.font, self.font_size = self.font_stack.pop(-1)
+        self.transform_stack = self.reset_tm()
+        self.transform_stack.maps = self.transform_stack.maps[
+            self.q_queue.pop(self.q_depth.pop(), 0) :
+        ]
+        return self.transform_stack
+
+    def add_q(self) -> None:
+        """Add another level to q_queue"""
+        self.font_stack.append((self.font, self.font_size))
+        self.q_depth.append(len(self.q_depth))
+
+    def add_cm(self, *args: Any) -> TextStateManagerChainMapType:
+        """Concatenate an additional transform matrix"""
+        self.transform_stack = self.reset_tm()
+        self.q_queue.update(self.q_depth[-1:])
+        self.transform_stack = self.transform_stack.new_child(self.new_transform(*args))
+        return self.transform_stack
+
+    def _complete_matrix(self, operands: list[float]) -> list[float]:
+        """Adds a, b, c, and d to an "e/f only" operand set (e.g Td)"""
+        if len(operands) == 2:  # this is a Td operator or equivalent
+            operands = [1.0, 0.0, 0.0, 1.0, *operands]
+        return operands
+
+    def add_tm(self, operands: list[float]) -> TextStateManagerChainMapType:
+        """Append a text transform matrix"""
+        self.transform_stack = self.transform_stack.new_child(
+            self.new_transform(  # type: ignore[misc]
+                *self._complete_matrix(operands), is_text=True  # type: ignore[arg-type]
+            )
+        )
+        return self.transform_stack
+
+    def add_trm(self, operands: list[float]) -> TextStateManagerChainMapType:
+        """Append a text rendering transform matrix"""
+        self.transform_stack = self.transform_stack.new_child(
+            self.new_transform(  # type: ignore[misc]
+                *self._complete_matrix(operands), is_text=True, is_render=True  # type: ignore[arg-type]
+            )
+        )
+        return self.transform_stack
+
+    @property
+    def effective_transform(self) -> list[float]:
+        """Current effective transform accounting for cm, tm, and trm transforms"""
+        eff_transform = [*self.transform_stack.maps[0].values()]
+        for transform in self.transform_stack.maps[1:]:
+            eff_transform = mult(eff_transform, transform)  # type: ignore[arg-type]  # dict has int keys 0-5
+        return eff_transform
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_params.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_layout_mode/_text_state_params.py
@@ -0,0 +1,135 @@
+"""A dataclass that captures the CTM and Text State for a tj operation"""
+
+import math
+from dataclasses import dataclass, field
+from typing import Any, Union
+
+from ..._font import Font
+from .. import mult, orient
+
+
+@dataclass
+class TextStateParams:
+    """
+    Text state parameters and operator values for a single text value in a
+    TJ or Tj PDF operation.
+
+    Attributes:
+        txt (str): the text to be rendered.
+        font (Font): font object
+        font_size (int | float): font size
+        Tc (float): character spacing. Defaults to 0.0.
+        Tw (float): word spacing. Defaults to 0.0.
+        Tz (float): horizontal scaling. Defaults to 100.0.
+        TL (float): leading, vertical displacement between text lines. Defaults to 0.0.
+        Ts (float): text rise. Used for super/subscripts. Defaults to 0.0.
+        transform (List[float]): effective transformation matrix.
+        tx (float): x cood of rendered text, i.e. self.transform[4]
+        ty (float): y cood of rendered text. May differ from self.transform[5] per self.Ts.
+        displaced_tx (float): x coord immediately following rendered text
+        space_tx (float): tx for a space character
+        font_height (float): effective font height accounting for CTM
+        flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.)
+        rotated (bool): True if the text orientation is rotated with respect to the page.
+
+    """
+
+    txt: str
+    font: Font
+    font_size: Union[int, float]
+    Tc: float = 0.0
+    Tw: float = 0.0
+    Tz: float = 100.0
+    TL: float = 0.0
+    Ts: float = 0.0
+    transform: list[float] = field(
+        default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+    )
+    tx: float = field(default=0.0, init=False)
+    ty: float = field(default=0.0, init=False)
+    displaced_tx: float = field(default=0.0, init=False)
+    space_tx: float = field(default=0.0, init=False)
+    font_height: float = field(default=0.0, init=False)
+    flip_vertical: bool = field(default=False, init=False)
+    rotated: bool = field(default=False, init=False)
+
+    def __post_init__(self) -> None:
+        if orient(self.transform) in (90, 270):
+            self.transform = mult(
+                [1.0, -self.transform[1], -self.transform[2], 1.0, 0.0, 0.0],
+                self.transform,
+            )
+            self.rotated = True
+        # self.transform[0] AND self.transform[3] < 0 indicates true rotation.
+        # If only self.transform[3] < 0, the y coords are simply inverted.
+        if orient(self.transform) == 180 and self.transform[0] < -1e-6:
+            self.transform = mult([-1.0, 0.0, 0.0, -1.0, 0.0, 0.0], self.transform)
+            self.rotated = True
+        self.displaced_tx = self.displaced_transform()[4]
+        self.tx = self.transform[4]
+        self.ty = self.render_transform()[5]
+        self.space_tx = round(self.word_tx(" "), 3)
+        if self.space_tx < 1e-6:
+            # if the " " char is assigned 0 width (e.g. for fine tuned spacing
+            # with TJ int operators a la crazyones.pdf), calculate space_tx as
+            # a td_offset of -1 * font.space_width where font.space_width is
+            # the space_width calculated in _font.py.
+            self.space_tx = round(self.word_tx("", -self.font.space_width), 3)
+        self.font_height = self.font_size * math.sqrt(
+            self.transform[1] ** 2 + self.transform[3] ** 2
+        )
+        # flip_vertical handles PDFs generated by Microsoft Word's "publish" command.
+        self.flip_vertical = self.transform[3] < -1e-6  # inverts y axis
+
+    def font_size_matrix(self) -> list[float]:
+        """Font size matrix"""
+        return [
+            self.font_size * (self.Tz / 100.0),
+            0.0,
+            0.0,
+            self.font_size,
+            0.0,
+            self.Ts,
+        ]
+
+    def displaced_transform(self) -> list[float]:
+        """Effective transform matrix after text has been rendered."""
+        return mult(self.displacement_matrix(), self.transform)
+
+    def render_transform(self) -> list[float]:
+        """Effective transform matrix accounting for font size, Tz, and Ts."""
+        return mult(self.font_size_matrix(), self.transform)
+
+    def displacement_matrix(
+        self, word: Union[str, None] = None, td_offset: float = 0.0
+    ) -> list[float]:
+        """
+        Text displacement matrix
+
+        Args:
+            word (str, optional): Defaults to None in which case self.txt displacement is
+                returned.
+            td_offset (float, optional): translation applied by TD operator. Defaults to 0.0.
+
+        """
+        word = word if word is not None else self.txt
+        return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, td_offset), 0.0]
+
+    def word_tx(self, word: str, td_offset: float = 0.0) -> float:
+        """Horizontal text displacement for any word according this text state"""
+        width: float = 0.0
+        for char in word:
+            if char == " ":
+                width += self.font.space_width
+            else:
+                width += self.font.text_width(char)
+        return (
+            (self.font_size * ((width - td_offset) / 1000.0))
+            + self.Tc
+            + word.count(" ") * self.Tw
+        ) * (self.Tz / 100.0)
+
+    @staticmethod
+    def to_dict(inst: "TextStateParams") -> dict[str, Any]:
+        """Dataclass to dict for json.dumps serialization"""
+        return {k: getattr(inst, k) for k in inst.__dataclass_fields__ if k != "font"}
--- a/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_text_extractor.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_text_extraction/_text_extractor.py
@@ -0,0 +1,351 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import math
+from typing import Any, Callable, Optional, Union
+
+from .._font import Font, FontDescriptor
+from ..generic import DictionaryObject, TextStringObject
+from . import OrientationNotFoundError, crlf_space_check, get_display_str, get_text_operands, mult
+
+
+class TextExtraction:
+    """
+    A class to handle PDF text extraction operations.
+
+    This class encapsulates all the state and operations needed for extracting
+    text from PDF content streams, replacing the nested functions and nonlocal
+    variables in the original implementation.
+    """
+
+    def __init__(self) -> None:
+        self._font_width_maps: dict[str, tuple[dict[Any, float], str, float]] = {}
+
+        # Text extraction state variables
+        self.cm_matrix: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        self.tm_matrix: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        self.cm_stack: list[
+            tuple[
+                list[float],
+                Optional[DictionaryObject],
+                Font,
+                float,
+                float,
+                float,
+                float,
+            ]
+        ] = []
+
+        # Store the last modified matrices; can be an intermediate position
+        self.cm_prev: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        self.tm_prev: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+
+        # Store the position at the beginning of building the text
+        self.memo_cm: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        self.memo_tm: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+
+        self.char_scale = 1.0
+        self.space_scale = 1.0
+        self._space_width: float = 500.0  # will be set correctly at first Tf
+        self._actual_str_size: dict[str, float] = {
+            "str_widths": 0.0,
+            "str_height": 0.0,
+        }  # will be set to string length calculation result
+        self.TL = 0.0
+        self.font_size = 12.0  # init just in case of
+
+        # Text extraction variables
+        self.text: str = ""
+        self.output: str = ""
+        self.rtl_dir: bool = False  # right-to-left
+        self.font_resource: Optional[DictionaryObject] = None
+        self.font = Font(
+            name = "NotInitialized",
+            sub_type="Unknown",
+            encoding="charmap",
+            font_descriptor=FontDescriptor(),
+            )
+        self.orientations: tuple[int, ...] = (0, 90, 180, 270)
+        self.visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None
+        self.font_resources: dict[str, DictionaryObject] = {}
+        self.fonts: dict[str, Font] = {}
+
+        self.operation_handlers = {
+            b"BT": self._handle_bt,
+            b"ET": self._handle_et,
+            b"q": self._handle_save_graphics_state,
+            b"Q": self._handle_restore_graphics_state,
+            b"cm": self._handle_cm,
+            b"Tz": self._handle_tz,
+            b"Tw": self._handle_tw,
+            b"TL": self._handle_tl,
+            b"Tf": self._handle_tf,
+            b"Td": self._handle_td,
+            b"Tm": self._handle_tm,
+            b"T*": self._handle_t_star,
+            b"Tj": self._handle_tj_operation,
+        }
+
+    def initialize_extraction(
+        self,
+        orientations: tuple[int, ...] = (0, 90, 180, 270),
+        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None,
+        font_resources: Optional[dict[str, DictionaryObject]] = None,
+        fonts: Optional[dict[str, Font]] = None
+    ) -> None:
+        """Initialize the extractor with extraction parameters."""
+        self.orientations = orientations
+        self.visitor_text = visitor_text
+        self.font_resources = font_resources or {}
+        self.fonts = fonts or {}
+
+        # Reset state
+        self.text = ""
+        self.output = ""
+        self.rtl_dir = False
+
+    def compute_str_widths(self, str_widths: float) -> float:
+        return str_widths / 1000
+
+    def process_operation(self, operator: bytes, operands: list[Any]) -> None:
+        if operator in self.operation_handlers:
+            handler = self.operation_handlers[operator]
+            str_widths = handler(operands)
+
+            # Post-process operations that affect text positioning
+            if operator in {b"Td", b"Tm", b"T*", b"Tj"}:
+                self._post_process_text_operation(str_widths or 0.0)
+
+    def _post_process_text_operation(self, str_widths: float) -> None:
+        """Handle common post-processing for text positioning operations."""
+        try:
+            self.text, self.output, self.cm_prev, self.tm_prev = crlf_space_check(
+                self.text,
+                (self.cm_prev, self.tm_prev),
+                (self.cm_matrix, self.tm_matrix),
+                (self.memo_cm, self.memo_tm),
+                self.font_resource,
+                self.orientations,
+                self.output,
+                self.font_size,
+                self.visitor_text,
+                str_widths,
+                self.compute_str_widths(self.font_size * self._space_width),
+                self._actual_str_size["str_height"],
+            )
+            if self.text == "":
+                self.memo_cm = self.cm_matrix.copy()
+                self.memo_tm = self.tm_matrix.copy()
+        except OrientationNotFoundError:
+            pass
+
+    def _handle_tj(
+        self,
+        text: str,
+        operands: list[Union[str, TextStringObject]],
+        cm_matrix: list[float],
+        tm_matrix: list[float],
+        font_resource: Optional[DictionaryObject],
+        font: Font,
+        orientations: tuple[int, ...],
+        font_size: float,
+        rtl_dir: bool,
+        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
+        actual_str_size: dict[str, float],
+    ) -> tuple[str, bool, dict[str, float]]:
+        text_operands, is_str_operands = get_text_operands(
+            operands, cm_matrix, tm_matrix, font, orientations
+        )
+        if is_str_operands:
+            text += text_operands
+            font_widths = sum([font.space_width if x == " " else font.text_width(x) for x in text_operands])
+        else:
+            text, rtl_dir, font_widths = get_display_str(
+                text,
+                cm_matrix,
+                tm_matrix,  # text matrix
+                font_resource,
+                font,
+                text_operands,
+                font_size,
+                rtl_dir,
+                visitor_text,
+            )
+        actual_str_size["str_widths"] += font_widths * font_size
+        actual_str_size["str_height"] = font_size
+        return text, rtl_dir, actual_str_size
+
+    def _flush_text(self) -> None:
+        """Flush accumulated text to output and call visitor if present."""
+        self.output += self.text
+        if self.visitor_text is not None:
+            self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
+        self.text = ""
+        self.memo_cm = self.cm_matrix.copy()
+        self.memo_tm = self.tm_matrix.copy()
+
+    # Operation handlers
+
+    def _handle_bt(self, operands: list[Any]) -> None:
+        """Handle BT (Begin Text) operation - Table 5.4 page 405."""
+        self.tm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        self._flush_text()
+
+    def _handle_et(self, operands: list[Any]) -> None:
+        """Handle ET (End Text) operation - Table 5.4 page 405."""
+        self._flush_text()
+
+    def _handle_save_graphics_state(self, operands: list[Any]) -> None:
+        """Handle q (Save graphics state) operation - Table 4.7 page 219."""
+        self.cm_stack.append(
+            (
+                self.cm_matrix,
+                self.font_resource,
+                self.font,
+                self.font_size,
+                self.char_scale,
+                self.space_scale,
+                self.TL,
+            )
+        )
+
+    def _handle_restore_graphics_state(self, operands: list[Any]) -> None:
+        """Handle Q (Restore graphics state) operation - Table 4.7 page 219."""
+        try:
+            (
+                self.cm_matrix,
+                self.font_resource,
+                self.font,
+                self.font_size,
+                self.char_scale,
+                self.space_scale,
+                self.TL,
+            ) = self.cm_stack.pop()
+        except Exception:
+            self.cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+
+    def _handle_cm(self, operands: list[Any]) -> None:
+        """Handle cm (Modify current matrix) operation - Table 4.7 page 219."""
+        self.output += self.text
+        if self.visitor_text is not None:
+            self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
+        self.text = ""
+        try:
+            self.cm_matrix = mult([float(operand) for operand in operands[:6]], self.cm_matrix)
+        except Exception:
+            self.cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
+        self.memo_cm = self.cm_matrix.copy()
+        self.memo_tm = self.tm_matrix.copy()
+
+    def _handle_tz(self, operands: list[Any]) -> None:
+        """Handle Tz (Set horizontal text scaling) operation - Table 5.2 page 398."""
+        self.char_scale = float(operands[0]) / 100 if operands else 1.0
+
+    def _handle_tw(self, operands: list[Any]) -> None:
+        """Handle Tw (Set word spacing) operation - Table 5.2 page 398."""
+        self.space_scale = 1.0 + float(operands[0] if operands else 0.0)
+
+    def _handle_tl(self, operands: list[Any]) -> None:
+        """Handle TL (Set Text Leading) operation - Table 5.2 page 398."""
+        scale_x = math.sqrt(self.tm_matrix[0] ** 2 + self.tm_matrix[2] ** 2)
+        self.TL = float(operands[0] if operands else 0.0) * self.font_size * scale_x
+
+    def _handle_tf(self, operands: list[Any]) -> None:
+        """Handle Tf (Set font size) operation - Table 5.2 page 398."""
+        if self.text != "":
+            self.output += self.text  # .translate(cmap)
+            if self.visitor_text is not None:
+                self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
+        self.text = ""
+        self.memo_cm = self.cm_matrix.copy()
+        self.memo_tm = self.tm_matrix.copy()
+        try:
+            self.font_resource = self.font_resources[operands[0]]
+            self.font = self.fonts[operands[0]]
+        except KeyError:  # font not found
+            self.font_resource = None
+            font_descriptor = FontDescriptor()
+            self.font = Font(
+                "Unknown",
+                space_width=250,
+                encoding=dict.fromkeys(range(256), "<EFBFBD>"),
+                font_descriptor=font_descriptor,
+                character_map={},
+                character_widths=font_descriptor.character_widths
+            )
+
+        self._space_width = self.font.space_width / 2  # Actually the width of _half_ a space...
+        try:
+            self.font_size = float(operands[1])
+        except Exception:
+            pass  # keep previous size
+
+    def _handle_td(self, operands: list[Any]) -> float:
+        """Handle Td (Move text position) operation - Table 5.5 page 406."""
+        # A special case is a translating only tm:
+        # tm = [1, 0, 0, 1, e, f]
+        # i.e. tm[4] += tx, tm[5] += ty.
+        tx, ty = float(operands[0]), float(operands[1])
+        self.tm_matrix[4] += tx * self.tm_matrix[0] + ty * self.tm_matrix[2]
+        self.tm_matrix[5] += tx * self.tm_matrix[1] + ty * self.tm_matrix[3]
+        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
+        self._actual_str_size["str_widths"] = 0.0
+        return str_widths
+
+    def _handle_tm(self, operands: list[Any]) -> float:
+        """Handle Tm (Set text matrix) operation - Table 5.5 page 406."""
+        self.tm_matrix = [float(operand) for operand in operands[:6]]
+        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
+        self._actual_str_size["str_widths"] = 0.0
+        return str_widths
+
+    def _handle_t_star(self, operands: list[Any]) -> float:
+        """Handle T* (Move to next line) operation - Table 5.5 page 406."""
+        self.tm_matrix[4] -= self.TL * self.tm_matrix[2]
+        self.tm_matrix[5] -= self.TL * self.tm_matrix[3]
+        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
+        self._actual_str_size["str_widths"] = 0.0
+        return str_widths
+
+    def _handle_tj_operation(self, operands: list[Any]) -> float:
+        """Handle Tj (Show text) operation - Table 5.5 page 406."""
+        self.text, self.rtl_dir, self._actual_str_size = self._handle_tj(
+            self.text,
+            operands,
+            self.cm_matrix,
+            self.tm_matrix,
+            self.font_resource,
+            self.font,
+            self.orientations,
+            self.font_size,
+            self.rtl_dir,
+            self.visitor_text,
+            self._actual_str_size,
+        )
+        return 0.0  # str_widths will be handled in post-processing
--- a/venv/lib/python3.12/site-packages/pypdf/_utils.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_utils.py
@@ -0,0 +1,631 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Utility functions for PDF library."""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import functools
+import logging
+import re
+import sys
+import warnings
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from io import DEFAULT_BUFFER_SIZE
+from os import SEEK_CUR
+from re import Pattern
+from typing import (
+    IO,
+    Any,
+    Optional,
+    Union,
+    overload,
+)
+
+if sys.version_info[:2] >= (3, 10):
+    # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
+
+from .errors import (
+    STREAM_TRUNCATED_PREMATURELY,
+    DeprecationError,
+    PdfStreamError,
+)
+
+TransformationMatrixType: TypeAlias = tuple[
+    tuple[float, float, float], tuple[float, float, float], tuple[float, float, float]
+]
+CompressedTransformationMatrix: TypeAlias = tuple[
+    float, float, float, float, float, float
+]
+
+StreamType = IO[Any]
+StrByteType = Union[str, StreamType]
+
+
+def parse_iso8824_date(text: Optional[str]) -> Optional[datetime]:
+    orgtext = text
+    if not text:
+        return None
+    if text[0].isdigit():
+        text = "D:" + text
+    if text.endswith(("Z", "z")):
+        text += "0000"
+    text = text.replace("z", "+").replace("Z", "+").replace("'", "")
+    i = max(text.find("+"), text.find("-"))
+    if i > 0 and i != len(text) - 5:
+        text += "00"
+    for f in (
+        "D:%Y",
+        "D:%Y%m",
+        "D:%Y%m%d",
+        "D:%Y%m%d%H",
+        "D:%Y%m%d%H%M",
+        "D:%Y%m%d%H%M%S",
+        "D:%Y%m%d%H%M%S%z",
+    ):
+        try:
+            d = datetime.strptime(text, f)  # noqa: DTZ007
+        except ValueError:
+            continue
+        else:
+            if text.endswith("+0000"):
+                d = d.replace(tzinfo=timezone.utc)
+            return d
+    raise ValueError(f"Can not convert date: {orgtext}")
+
+
+def format_iso8824_date(dt: datetime) -> str:
+    """
+    Convert a datetime object to PDF date string format.
+
+    Converts datetime to the PDF date format D:YYYYMMDDHHmmSSOHH'mm
+    as specified in the PDF Reference.
+
+    Args:
+        dt: A datetime object to convert.
+
+    Returns:
+        A date string in PDF format.
+    """
+    date_str = dt.strftime("D:%Y%m%d%H%M%S")
+    if dt.tzinfo is not None:
+        offset = dt.utcoffset()
+        assert offset is not None
+        total_seconds = int(offset.total_seconds())
+        hours, remainder = divmod(abs(total_seconds), 3600)
+        minutes = remainder // 60
+        sign = "+" if total_seconds >= 0 else "-"
+        date_str += f"{sign}{hours:02d}'{minutes:02d}'"
+    return date_str
+
+
+def _get_max_pdf_version_header(header1: str, header2: str) -> str:
+    versions = (
+        "%PDF-1.3",
+        "%PDF-1.4",
+        "%PDF-1.5",
+        "%PDF-1.6",
+        "%PDF-1.7",
+        "%PDF-2.0",
+    )
+    pdf_header_indices = []
+    if header1 in versions:
+        pdf_header_indices.append(versions.index(header1))
+    if header2 in versions:
+        pdf_header_indices.append(versions.index(header2))
+    if len(pdf_header_indices) == 0:
+        raise ValueError(f"Neither {header1!r} nor {header2!r} are proper headers")
+    return versions[max(pdf_header_indices)]
+
+
+WHITESPACES = (b"\x00", b"\t", b"\n", b"\f", b"\r", b" ")
+WHITESPACES_AS_BYTES = b"".join(WHITESPACES)
+WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]"
+
+
+def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
+    """
+    Read non-whitespace characters and return them.
+
+    Stops upon encountering whitespace or when maxchars is reached.
+
+    Args:
+        stream: The data stream from which was read.
+        maxchars: The maximum number of bytes returned; by default unlimited.
+
+    Returns:
+        The data which was read.
+
+    """
+    txt = b""
+    while True:
+        tok = stream.read(1)
+        if tok.isspace() or not tok:
+            break
+        txt += tok
+        if len(txt) == maxchars:
+            break
+    return txt
+
+
+def read_non_whitespace(stream: StreamType) -> bytes:
+    """
+    Find and read the next non-whitespace character (ignores whitespace).
+
+    Args:
+        stream: The data stream from which was read.
+
+    Returns:
+        The data which was read.
+
+    """
+    tok = stream.read(1)
+    while tok in WHITESPACES:
+        tok = stream.read(1)
+    return tok
+
+
+def skip_over_whitespace(stream: StreamType) -> bool:
+    """
+    Similar to read_non_whitespace, but return a boolean if at least one
+    whitespace character was read.
+
+    Args:
+        stream: The data stream from which was read.
+
+    Returns:
+        True if one or more whitespace was skipped, otherwise return False.
+
+    """
+    tok = stream.read(1)
+    cnt = 0
+    while tok in WHITESPACES:
+        cnt += 1
+        tok = stream.read(1)
+    return cnt > 0
+
+
+def check_if_whitespace_only(value: bytes) -> bool:
+    """
+    Check if the given value consists of whitespace characters only.
+
+    Args:
+        value: The bytes to check.
+
+    Returns:
+        True if the value only has whitespace characters, otherwise return False.
+
+    """
+    return all(b in WHITESPACES_AS_BYTES for b in value)
+
+
+def skip_over_comment(stream: StreamType) -> None:
+    tok = stream.read(1)
+    stream.seek(-1, 1)
+    if tok == b"%":
+        while tok not in (b"\n", b"\r"):
+            tok = stream.read(1)
+            if tok == b"":
+                raise PdfStreamError("File ended unexpectedly.")
+
+
+def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
+    """
+    Read until the regular expression pattern matched (ignore the match).
+    Treats EOF on the underlying stream as the end of the token to be matched.
+
+    Args:
+        regex: re.Pattern
+
+    Returns:
+        The read bytes.
+
+    """
+    name = b""
+    while True:
+        tok = stream.read(16)
+        if not tok:
+            return name
+        m = regex.search(name + tok)
+        if m is not None:
+            stream.seek(m.start() - (len(name) + len(tok)), 1)
+            name = (name + tok)[: m.start()]
+            break
+        name += tok
+    return name
+
+
+def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
+    """
+    Given a stream at position X, read a block of size to_read ending at position X.
+
+    This changes the stream's position to the beginning of where the block was
+    read.
+
+    Args:
+        stream:
+        to_read:
+
+    Returns:
+        The data which was read.
+
+    """
+    if stream.tell() < to_read:
+        raise PdfStreamError("Could not read malformed PDF file")
+    # Seek to the start of the block we want to read.
+    stream.seek(-to_read, SEEK_CUR)
+    read = stream.read(to_read)
+    # Seek to the start of the block we read after reading it.
+    stream.seek(-to_read, SEEK_CUR)
+    return read
+
+
+def read_previous_line(stream: StreamType) -> bytes:
+    """
+    Given a byte stream with current position X, return the previous line.
+
+    All characters between the first CR/LF byte found before X
+    (or, the start of the file, if no such byte is found) and position X
+    After this call, the stream will be positioned one byte after the
+    first non-CRLF character found beyond the first CR/LF byte before X,
+    or, if no such byte is found, at the beginning of the stream.
+
+    Args:
+        stream: StreamType:
+
+    Returns:
+        The data which was read.
+
+    """
+    line_content = []
+    found_crlf = False
+    if stream.tell() == 0:
+        raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+    while True:
+        to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
+        if to_read == 0:
+            break
+        # Read the block. After this, our stream will be one
+        # beyond the initial position.
+        block = read_block_backwards(stream, to_read)
+        idx = len(block) - 1
+        if not found_crlf:
+            # We haven't found our first CR/LF yet.
+            # Read off characters until we hit one.
+            while idx >= 0 and block[idx] not in b"\r\n":
+                idx -= 1
+            if idx >= 0:
+                found_crlf = True
+        if found_crlf:
+            # We found our first CR/LF already (on this block or
+            # a previous one).
+            # Our combined line is the remainder of the block
+            # plus any previously read blocks.
+            line_content.append(block[idx + 1 :])
+            # Continue to read off any more CRLF characters.
+            while idx >= 0 and block[idx] in b"\r\n":
+                idx -= 1
+        else:
+            # Didn't find CR/LF yet - add this block to our
+            # previously read blocks and continue.
+            line_content.append(block)
+        if idx >= 0:
+            # We found the next non-CRLF character.
+            # Set the stream position correctly, then break
+            stream.seek(idx + 1, SEEK_CUR)
+            break
+    # Join all the blocks in the line (which are in reverse order)
+    return b"".join(line_content[::-1])
+
+
+def matrix_multiply(
+    a: TransformationMatrixType, b: TransformationMatrixType
+) -> TransformationMatrixType:
+    return tuple(  # type: ignore[return-value]
+        tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
+        for row in a
+    )
+
+
+def mark_location(stream: StreamType) -> None:
+    """Create text file showing current location in context."""
+    # Mainly for debugging
+    radius = 5000
+    stream.seek(-radius, 1)
+    with open("pypdf_pdfLocation.txt", "wb") as output_fh:
+        output_fh.write(stream.read(radius))
+        output_fh.write(b"HERE")
+        output_fh.write(stream.read(radius))
+    stream.seek(-radius, 1)
+
+
+@overload
+def ord_(b: str) -> int:
+    ...
+
+
+@overload
+def ord_(b: bytes) -> bytes:
+    ...
+
+
+@overload
+def ord_(b: int) -> int:
+    ...
+
+
+def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
+    if isinstance(b, str):
+        return ord(b)
+    return b
+
+
+def deprecate(msg: str, stacklevel: int = 3) -> None:
+    warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)
+
+
+def deprecation(msg: str) -> None:
+    raise DeprecationError(msg)
+
+
+def deprecate_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
+    """Issue a warning that a feature will be removed, but has a replacement."""
+    deprecate(
+        f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.",
+        4,
+    )
+
+
+def deprecation_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
+    """Raise an exception that a feature was already removed, but has a replacement."""
+    deprecation(
+        f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead."
+    )
+
+
+def deprecate_no_replacement(name: str, removed_in: str) -> None:
+    """Issue a warning that a feature will be removed without replacement."""
+    deprecate(f"{name} is deprecated and will be removed in pypdf {removed_in}.", 4)
+
+
+def deprecation_no_replacement(name: str, removed_in: str) -> None:
+    """Raise an exception that a feature was already removed without replacement."""
+    deprecation(f"{name} is deprecated and was removed in pypdf {removed_in}.")
+
+
+def logger_error(msg: str, src: str) -> None:
+    """
+    Use this instead of logger.error directly.
+
+    That allows people to overwrite it more easily.
+
+    See the docs on when to use which:
+    https://pypdf.readthedocs.io/en/latest/user/suppress-warnings.html
+    """
+    logging.getLogger(src).error(msg)
+
+
+def logger_warning(msg: str, src: str) -> None:
+    """
+    Use this instead of logger.warning directly.
+
+    That allows people to overwrite it more easily.
+
+    ## Exception, warnings.warn, logger_warning
+    - Exceptions should be used if the user should write code that deals with
+      an error case, e.g. the PDF being completely broken.
+    - warnings.warn should be used if the user needs to fix their code, e.g.
+      DeprecationWarnings
+    - logger_warning should be used if the user needs to know that an issue was
+      handled by pypdf, e.g. a non-compliant PDF being read in a way that
+      pypdf could apply a robustness fix to still read it. This applies mainly
+      to strict=False mode.
+    """
+    logging.getLogger(src).warning(msg)
+
+
+def rename_kwargs(
+    func_name: str, kwargs: dict[str, Any], aliases: dict[str, str], fail: bool = False
+) -> None:
+    """
+    Helper function to deprecate arguments.
+
+    Args:
+        func_name: Name of the function to be deprecated
+        kwargs:
+        aliases:
+        fail:
+
+    """
+    for old_term, new_term in aliases.items():
+        if old_term in kwargs:
+            if fail:
+                raise DeprecationError(
+                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
+                )
+            if new_term in kwargs:
+                raise TypeError(
+                    f"{func_name} received both {old_term} and {new_term} as "
+                    f"an argument. {old_term} is deprecated. "
+                    f"Use {new_term} instead."
+                )
+            kwargs[new_term] = kwargs.pop(old_term)
+            warnings.warn(
+                message=(
+                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
+                ),
+                category=DeprecationWarning,
+                stacklevel=3,
+            )
+
+
+def _human_readable_bytes(bytes: int) -> str:
+    if bytes < 10**3:
+        return f"{bytes} Byte"
+    if bytes < 10**6:
+        return f"{bytes / 10**3:.1f} kB"
+    if bytes < 10**9:
+        return f"{bytes / 10**6:.1f} MB"
+    return f"{bytes / 10**9:.1f} GB"
+
+
+# The following class has been copied from Django:
+# https://github.com/django/django/blob/adae619426b6f50046b3daaa744db52989c9d6db/django/utils/functional.py#L51-L65
+# It received some modifications to comply with our own coding standards.
+#
+# Original license:
+#
+# ---------------------------------------------------------------------------------
+# Copyright (c) Django Software Foundation and individual contributors.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without modification,
+# are permitted provided that the following conditions are met:
+#
+#     1. Redistributions of source code must retain the above copyright notice,
+#        this list of conditions and the following disclaimer.
+#
+#     2. Redistributions in binary form must reproduce the above copyright
+#        notice, this list of conditions and the following disclaimer in the
+#        documentation and/or other materials provided with the distribution.
+#
+#     3. Neither the name of Django nor the names of its contributors may be used
+#        to endorse or promote products derived from this software without
+#        specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# ---------------------------------------------------------------------------------
+class classproperty:  # noqa: N801
+    """
+    Decorator that converts a method with a single cls argument into a property
+    that can be accessed directly from the class.
+    """
+
+    def __init__(self, method=None) -> None:  # type: ignore  # noqa: ANN001
+        self.fget = method
+
+    def __get__(self, instance, cls=None) -> Any:  # type: ignore  # noqa: ANN001
+        return self.fget(cls)
+
+    def getter(self, method) -> Self:  # type: ignore  # noqa: ANN001
+        self.fget = method
+        return self
+
+
+@dataclass
+class File:
+    from .generic import IndirectObject  # noqa: PLC0415
+
+    name: str = ""
+    """
+    Filename as identified within the PDF file.
+    """
+    data: bytes = b""
+    """
+    Data as bytes.
+    """
+    indirect_reference: Optional[IndirectObject] = None
+    """
+    Reference to the object storing the stream.
+    """
+
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}(name={self.name}, data: {_human_readable_bytes(len(self.data))})"
+
+    def __repr__(self) -> str:
+        return self.__str__()[:-1] + f", hash: {hash(self.data)})"
+
+
+@functools.total_ordering
+class Version:
+    COMPONENT_PATTERN = re.compile(r"^(\d+)(.*)$")
+
+    def __init__(self, version_str: str) -> None:
+        self.version_str = version_str
+        self.components = self._parse_version(version_str)
+
+    def _parse_version(self, version_str: str) -> list[tuple[int, str]]:
+        components = version_str.split(".")
+        parsed_components = []
+        for component in components:
+            match = Version.COMPONENT_PATTERN.match(component)
+            if not match:
+                parsed_components.append((0, component))
+                continue
+            integer_prefix = match.group(1)
+            suffix = match.group(2)
+            if integer_prefix is None:
+                integer_prefix = 0
+            parsed_components.append((int(integer_prefix), suffix))
+        return parsed_components
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Version):
+            return False
+        return self.components == other.components
+
+    def __hash__(self) -> int:
+        # Convert to tuple as lists cannot be hashed.
+        return hash((self.__class__, tuple(self.components)))
+
+    def __lt__(self, other: Any) -> bool:
+        if not isinstance(other, Version):
+            raise ValueError(f"Version cannot be compared against {type(other)}")
+
+        for self_component, other_component in zip(self.components, other.components):
+            self_value, self_suffix = self_component
+            other_value, other_suffix = other_component
+
+            if self_value < other_value:
+                return True
+            if self_value > other_value:
+                return False
+
+            if self_suffix < other_suffix:
+                return True
+            if self_suffix > other_suffix:
+                return False
+
+        return len(self.components) < len(other.components)
--- a/venv/lib/python3.12/site-packages/pypdf/_version.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_version.py
@@ -0,0 +1 @@
+__version__ = "6.6.2"
--- a/venv/lib/python3.12/site-packages/pypdf/_writer.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_writer.py
--- a/venv/lib/python3.12/site-packages/pypdf/_xobj_image_helpers.py
+++ b/venv/lib/python3.12/site-packages/pypdf/_xobj_image_helpers.py
@@ -0,0 +1,577 @@
+"""Functions to convert an image XObject to an image"""
+
+import sys
+from io import BytesIO
+from typing import Any, Literal, Optional, Union, cast
+
+from ._utils import check_if_whitespace_only, logger_warning
+from .constants import ColorSpaces, StreamAttributes
+from .constants import FilterTypes as FT
+from .constants import ImageAttributes as IA
+from .errors import EmptyImageDataError, PdfReadError
+from .generic import (
+    ArrayObject,
+    DecodedStreamObject,
+    EncodedStreamObject,
+    NullObject,
+    TextStringObject,
+    is_null_or_none,
+)
+
+if sys.version_info[:2] >= (3, 10):
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+
+try:
+    from PIL import Image, UnidentifiedImageError
+except ImportError:
+    raise ImportError(
+        "pillow is required to do image extraction. "
+        "It can be installed via 'pip install pypdf[image]'"
+    )
+
+mode_str_type: TypeAlias = Literal[
+    "", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"
+]
+
+MAX_IMAGE_MODE_NESTING_DEPTH: int = 10
+
+
+def _get_image_mode(
+    color_space: Union[str, list[Any], Any],
+    color_components: int,
+    prev_mode: mode_str_type,
+    depth: int = 0,
+) -> tuple[mode_str_type, bool]:
+    """
+    Returns:
+        Image mode, not taking into account mask (transparency).
+        ColorInversion is required (like for some DeviceCMYK).
+
+    """
+    if depth > MAX_IMAGE_MODE_NESTING_DEPTH:
+        raise PdfReadError(
+            "Color spaces nested too deeply. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH."
+        )
+    if is_null_or_none(color_space):
+        return "", False
+    color_space_str: str = ""
+    if isinstance(color_space, str):
+        color_space_str = color_space
+    elif not isinstance(color_space, list):
+        raise PdfReadError(
+            "Cannot interpret color space", color_space
+        )  # pragma: no cover
+    elif not color_space:
+        return "", False
+    elif color_space[0].startswith("/Cal"):  # /CalRGB or /CalGray
+        color_space_str = "/Device" + color_space[0][4:]
+    elif color_space[0] == "/ICCBased":
+        icc_profile = color_space[1].get_object()
+        color_components = cast(int, icc_profile["/N"])
+        color_space_str = icc_profile.get("/Alternate", "")
+    elif color_space[0] == "/Indexed":
+        color_space_str = color_space[1].get_object()
+        mode, invert_color = _get_image_mode(
+            color_space_str, color_components, prev_mode, depth + 1
+        )
+        if mode in ("RGB", "CMYK"):
+            mode = "P"
+        return mode, invert_color
+    elif color_space[0] == "/Separation":
+        color_space_str = color_space[2].get_object()
+        mode, invert_color = _get_image_mode(
+            color_space_str, color_components, prev_mode, depth + 1
+        )
+        return mode, True
+    elif color_space[0] == "/DeviceN":
+        original_color_space = color_space
+        color_components = len(color_space[1])
+        color_space_str = color_space[2].get_object()
+        if color_space_str == "/DeviceCMYK" and color_components == 1:
+            if original_color_space[1][0] != "/Black":
+                logger_warning(
+                    f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",
+                    __name__,
+                )
+            return "L", True
+        mode, invert_color = _get_image_mode(
+            color_space_str, color_components, prev_mode, depth + 1
+        )
+        return mode, invert_color
+
+    mode_map: dict[str, mode_str_type] = {
+        "1bit": "1",  # must be zeroth position: color_components may index the values
+        "/DeviceGray": "L",  # must be first position: color_components may index the values
+        "palette": "P",  # must be second position: color_components may index the values
+        "/DeviceRGB": "RGB",  # must be third position: color_components may index the values
+        "/DeviceCMYK": "CMYK",  # must be fourth position: color_components may index the values
+        "2bit": "2bits",
+        "4bit": "4bits",
+    }
+
+    mode = (
+        mode_map.get(color_space_str)
+        or list(mode_map.values())[color_components]
+        or prev_mode
+    )
+
+    return mode, mode == "CMYK"
+
+
+def bits2byte(data: bytes, size: tuple[int, int], bits: int) -> bytes:
+    mask = (1 << bits) - 1
+    byte_buffer = bytearray(size[0] * size[1])
+    data_index = 0
+    bit = 8 - bits
+    for y in range(size[1]):
+        if bit != 8 - bits:
+            data_index += 1
+            bit = 8 - bits
+        for x in range(size[0]):
+            byte_buffer[x + y * size[0]] = (data[data_index] >> bit) & mask
+            bit -= bits
+            if bit < 0:
+                data_index += 1
+                bit = 8 - bits
+    return bytes(byte_buffer)
+
+
+def _extended_image_from_bytes(
+    mode: str, size: tuple[int, int], data: bytes
+) -> Image.Image:
+    try:
+        img = Image.frombytes(mode, size, data)
+    except ValueError as exc:
+        nb_pix = size[0] * size[1]
+        data_length = len(data)
+        if data_length == 0:
+            raise EmptyImageDataError(
+                "Data is 0 bytes, cannot process an image from empty data."
+            ) from exc
+        if data_length % nb_pix != 0:
+            raise exc
+        k = nb_pix * len(mode) / data_length
+        data = b"".join(bytes((x,) * int(k)) for x in data)
+        img = Image.frombytes(mode, size, data)
+    return img
+
+
+def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, Any]:
+    count = len(color_space)
+    if count == 4:
+        color_space, base, hival, lookup = (value.get_object() for value in color_space)
+        return color_space, base, hival, lookup
+
+    # Deal with strange AutoDesk files where `base` and `hival` look like this:
+    #   /DeviceRGB\x00255
+    element1 = color_space[1]
+    element1 = element1 if isinstance(element1, str) else element1.get_object()
+    if count == 3 and "\x00" in element1:
+        color_space, lookup = color_space[0].get_object(), color_space[2].get_object()
+        base, hival = element1.split("\x00")
+        hival = int(hival)
+        return color_space, base, hival, lookup
+    raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}")
+
+
+def _handle_flate(
+    size: tuple[int, int],
+    data: bytes,
+    mode: mode_str_type,
+    color_space: str,
+    colors: int,
+    obj_as_text: str,
+) -> tuple[Image.Image, str, str, bool]:
+    """
+    Process image encoded in flateEncode
+    Returns img, image_format, extension, color inversion
+    """
+    extension = ".png"  # mime_type: "image/png"
+    image_format = "PNG"
+    lookup: Any
+    base: Any
+    hival: Any
+    if isinstance(color_space, ArrayObject) and color_space[0] == "/Indexed":
+        color_space, base, hival, lookup = __handle_flate__indexed(color_space)
+    if mode == "2bits":
+        mode = "P"
+        data = bits2byte(data, size, 2)
+    elif mode == "4bits":
+        mode = "P"
+        data = bits2byte(data, size, 4)
+    img = _extended_image_from_bytes(mode, size, data)
+    if color_space == "/Indexed":
+        if isinstance(lookup, (EncodedStreamObject, DecodedStreamObject)):
+            lookup = lookup.get_data()
+        if isinstance(lookup, TextStringObject):
+            lookup = lookup.original_bytes
+        if isinstance(lookup, str):
+            lookup = lookup.encode()
+        try:
+            nb, conv, mode = {  # type: ignore
+                "1": (0, "", ""),
+                "L": (1, "P", "L"),
+                "P": (0, "", ""),
+                "RGB": (3, "P", "RGB"),
+                "CMYK": (4, "P", "CMYK"),
+            }[_get_image_mode(base, 0, "")[0]]
+        except KeyError:  # pragma: no cover
+            logger_warning(
+                f"Base {base} not coded please share the pdf file with pypdf dev team",
+                __name__,
+            )
+            lookup = None
+        else:
+            if img.mode == "1":
+                # Two values ("high" and "low").
+                expected_count = 2 * nb
+                actual_count = len(lookup)
+                if actual_count != expected_count:
+                    if actual_count < expected_count:
+                        logger_warning(
+                            f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
+                            __name__
+                        )
+                        lookup += bytes([0] * (expected_count - actual_count))
+                    elif not check_if_whitespace_only(lookup[expected_count:]):
+                        logger_warning(
+                            f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
+                            __name__
+                        )
+                    lookup = lookup[:expected_count]
+                colors_arr = [lookup[:nb], lookup[nb:]]
+                arr = b"".join(
+                    b"".join(
+                        colors_arr[1 if img.getpixel((x, y)) > 127 else 0]  # type: ignore[operator,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
+                        for x in range(img.size[0])
+                    )
+                    for y in range(img.size[1])
+                )
+                img = Image.frombytes(mode, img.size, arr)
+            else:
+                img = img.convert(conv)
+                if len(lookup) != (hival + 1) * nb:
+                    logger_warning(f"Invalid Lookup Table in {obj_as_text}", __name__)
+                    lookup = None
+                elif mode == "L":
+                    # gray lookup does not work: it is converted to a similar RGB lookup
+                    lookup = b"".join([bytes([b, b, b]) for b in lookup])
+                    mode = "RGB"
+                # TODO: https://github.com/py-pdf/pypdf/pull/2039
+                # this is a work around until PIL is able to process CMYK images
+                elif mode == "CMYK":
+                    _rgb = []
+                    for _c, _m, _y, _k in (
+                        lookup[n : n + 4] for n in range(0, 4 * (len(lookup) // 4), 4)
+                    ):
+                        _r = int(255 * (1 - _c / 255) * (1 - _k / 255))
+                        _g = int(255 * (1 - _m / 255) * (1 - _k / 255))
+                        _b = int(255 * (1 - _y / 255) * (1 - _k / 255))
+                        _rgb.append(bytes((_r, _g, _b)))
+                    lookup = b"".join(_rgb)
+                    mode = "RGB"
+                if lookup is not None:
+                    img.putpalette(lookup, rawmode=mode)
+            img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB")
+    elif not isinstance(color_space, NullObject) and color_space[0] == "/ICCBased":
+        # Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary
+        mode2 = _get_image_mode(color_space, colors, mode)[0]
+        if mode != mode2:
+            img = Image.frombytes(mode2, size, data)  # reloaded as mode may have changed
+    if mode == "CMYK":
+        extension = ".tif"
+        image_format = "TIFF"
+    return img, image_format, extension, False
+
+
+def _handle_jpx(
+    size: tuple[int, int],
+    data: bytes,
+    mode: mode_str_type,
+    color_space: str,
+    colors: int,
+) -> tuple[Image.Image, str, str, bool]:
+    """
+    Process image encoded in flateEncode
+    Returns img, image_format, extension, inversion
+    """
+    extension = ".jp2"  # mime_type: "image/x-jp2"
+    img1: Image.Image = Image.open(BytesIO(data), formats=("JPEG2000",))
+    mode, invert_color = _get_image_mode(color_space, colors, mode)
+    if mode == "":
+        mode = cast(mode_str_type, img1.mode)
+        invert_color = mode in ("CMYK",)
+    if img1.mode == "RGBA" and mode == "RGB":
+        mode = "RGBA"
+    # we need to convert to the good mode
+    if img1.mode == mode or {img1.mode, mode} == {"L", "P"}:  # compare (unordered) sets
+        # L and P are indexed modes which should not be changed.
+        img = img1
+    elif {img1.mode, mode} == {"RGBA", "CMYK"}:
+        # RGBA / CMYK are 4bytes encoding where
+        # the encoding should be corrected
+        img = Image.frombytes(mode, img1.size, img1.tobytes())
+    else:  # pragma: no cover
+        img = img1.convert(mode)
+    # CMYK conversion
+    # https://stcom/questions/38855022/conversion-from-cmyk-to-rgb-with-pillow-is-different-from-that-of-photoshop
+    # not implemented for the moment as I need to get properly the ICC
+    if img.mode == "CMYK":
+        img = img.convert("RGB")
+    image_format = "JPEG2000"
+    return img, image_format, extension, invert_color
+
+
+def _apply_decode(
+    img: Image.Image,
+    x_object_obj: dict[str, Any],
+    lfilters: FT,
+    color_space: Union[str, list[Any], Any],
+    invert_color: bool,
+) -> Image.Image:
+    # CMYK image and other color spaces without decode
+    # requires reverting scale (cf p243,2§ last sentence)
+    decode = x_object_obj.get(
+        IA.DECODE,
+        ([1.0, 0.0] * len(img.getbands()))
+        if (
+            (img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE))
+            or (invert_color and img.mode == "L")
+        )
+        else None,
+    )
+    if (
+        isinstance(color_space, ArrayObject)
+        and color_space[0].get_object() == "/Indexed"
+    ):
+        decode = None  # decode is meaningless if Indexed
+    if (
+        isinstance(color_space, ArrayObject)
+        and color_space[0].get_object() == "/Separation"
+    ):
+        decode = [1.0, 0.0] * len(img.getbands())
+    if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):
+        lut: list[int] = []
+        for i in range(0, len(decode), 2):
+            dmin = decode[i]
+            dmax = decode[i + 1]
+            lut.extend(
+                round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)
+            )
+        img = img.point(lut)
+    return img
+
+
+def _get_mode_and_invert_color(
+    x_object_obj: dict[str, Any], colors: int, color_space: Union[str, list[Any], Any]
+) -> tuple[mode_str_type, bool]:
+    if (
+        IA.COLOR_SPACE in x_object_obj
+        and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
+    ):
+        # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
+        mode: mode_str_type = "RGB"
+    if x_object_obj.get("/BitsPerComponent", 8) < 8:
+        mode, invert_color = _get_image_mode(
+            f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""
+        )
+    else:
+        mode, invert_color = _get_image_mode(
+            color_space,
+            2
+            if (
+                colors == 1
+                and (
+                    not is_null_or_none(color_space)
+                    and "Gray" not in color_space
+                )
+            )
+            else colors,
+            "",
+        )
+    return mode, invert_color
+
+
+def _xobj_to_image(
+        x_object: dict[str, Any],
+        pillow_parameters: Union[dict[str, Any], None] = None
+) -> tuple[Optional[str], bytes, Any]:
+    """
+    Users need to have the pillow package installed.
+
+    It's unclear if pypdf will keep this function here, hence it's private.
+    It might get removed at any point.
+
+    Args:
+        x_object:
+        pillow_parameters: parameters provided to Pillow Image.save() method,
+            cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>
+
+    Returns:
+        Tuple[file extension, bytes, PIL.Image.Image]
+
+    """
+    def _apply_alpha(
+        img: Image.Image,
+        x_object: dict[str, Any],
+        obj_as_text: str,
+        image_format: str,
+        extension: str,
+    ) -> tuple[Image.Image, str, str]:
+        alpha = None
+        if IA.S_MASK in x_object:  # add alpha channel
+            alpha = _xobj_to_image(x_object[IA.S_MASK])[2]
+            if img.size != alpha.size:
+                logger_warning(
+                    f"image and mask size not matching: {obj_as_text}", __name__
+                )
+            else:
+                # TODO: implement mask
+                if alpha.mode != "L":
+                    alpha = alpha.convert("L")
+                if img.mode == "P":
+                    img = img.convert("RGB")
+                elif img.mode == "1":
+                    img = img.convert("L")
+                img.putalpha(alpha)
+            if "JPEG" in image_format:
+                image_format = "JPEG2000"
+                extension = ".jp2"
+            else:
+                image_format = "PNG"
+                extension = ".png"
+        return img, extension, image_format
+
+    # For error reporting
+    obj_as_text = (
+        x_object.indirect_reference.__repr__()
+        if x_object is None  # pragma: no cover
+        else x_object.__repr__()
+    )
+
+    # Get size and data
+    size = (cast(int, x_object[IA.WIDTH]), cast(int, x_object[IA.HEIGHT]))
+    data = x_object.get_data()  # type: ignore
+    if isinstance(data, str):  # pragma: no cover
+        data = data.encode()
+    if len(data) % (size[0] * size[1]) == 1 and data[-1] == 0x0A:  # ie. '\n'
+        data = data[:-1]
+
+    # Get color properties
+    colors = x_object.get("/Colors", 1)
+    color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object()
+    if isinstance(color_space, list) and len(color_space) == 1:
+        color_space = color_space[0].get_object()
+
+    mode, invert_color = _get_mode_and_invert_color(x_object, colors, color_space)
+
+    # Get filters
+    filters = x_object.get(StreamAttributes.FILTER, NullObject()).get_object()
+    lfilters = filters[-1] if isinstance(filters, list) else filters
+    decode_parms = x_object.get(StreamAttributes.DECODE_PARMS, None)
+    if decode_parms and isinstance(decode_parms, (tuple, list)):
+        decode_parms = decode_parms[0]
+    else:
+        decode_parms = {}
+    if not isinstance(decode_parms, dict):
+        decode_parms = {}
+
+    extension = None
+    if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):
+        img, image_format, extension, _ = _handle_flate(
+            size,
+            data,
+            mode,
+            color_space,
+            colors,
+            obj_as_text,
+        )
+    elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE):
+        # I'm not sure if the following logic is correct.
+        # There might not be any relationship between the filters and the
+        # extension
+        if lfilters == FT.LZW_DECODE:
+            image_format = "TIFF"
+            extension = ".tiff"  # mime_type = "image/tiff"
+        else:
+            image_format = "PNG"
+            extension = ".png"  # mime_type = "image/png"
+        try:
+            img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
+        except UnidentifiedImageError:
+            img = _extended_image_from_bytes(mode, size, data)
+    elif lfilters == FT.DCT_DECODE:
+        img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
+        # invert_color kept unchanged
+    elif lfilters == FT.JPX_DECODE:
+        img, image_format, extension, invert_color = _handle_jpx(
+            size, data, mode, color_space, colors
+        )
+    elif lfilters == FT.CCITT_FAX_DECODE:
+        img, image_format, extension, invert_color = (
+            Image.open(BytesIO(data), formats=("TIFF",)),
+            "TIFF",
+            ".tiff",
+            False,
+        )
+    elif lfilters == FT.JBIG2_DECODE:
+        img, image_format, extension, invert_color = (
+            Image.open(BytesIO(data), formats=("PNG", "PPM")),
+            "PNG",
+            ".png",
+            False,
+        )
+    elif mode == "CMYK":
+        img, image_format, extension, invert_color = (
+            _extended_image_from_bytes(mode, size, data),
+            "TIFF",
+            ".tif",
+            False,
+        )
+    elif mode == "":
+        raise PdfReadError(f"ColorSpace field not found in {x_object}")
+    else:
+        img, image_format, extension, invert_color = (
+            _extended_image_from_bytes(mode, size, data),
+            "PNG",
+            ".png",
+            False,
+        )
+
+    img = _apply_decode(img, x_object, lfilters, color_space, invert_color)
+    img, extension, image_format = _apply_alpha(
+        img, x_object, obj_as_text, image_format, extension
+    )
+
+    if pillow_parameters is None:
+        pillow_parameters = {}
+    # Preserve JPEG image quality - see issue #3515.
+    if image_format == "JPEG":
+        # This prevents: Cannot use 'keep' when original image is not a JPEG:
+        # "JPEG" is the value of PIL.JpegImagePlugin.JpegImageFile.format
+        img.format = "JPEG"
+        if "quality" not in pillow_parameters:
+            pillow_parameters["quality"] = "keep"
+
+    # Save image to bytes
+    img_byte_arr = BytesIO()
+    try:
+        img.save(img_byte_arr, format=image_format, **pillow_parameters)
+    except OSError:  # pragma: no cover  # covered with pillow 10.3
+        # in case of we convert to RGBA and then to PNG
+        img1 = img.convert("RGBA")
+        image_format = "PNG"
+        extension = ".png"
+        img_byte_arr = BytesIO()
+        img1.save(img_byte_arr, format=image_format)
+    data = img_byte_arr.getvalue()
+
+    try:  # temporary try/except until other fixes of images
+        img = Image.open(BytesIO(data))
+    except Exception as exception:
+        logger_warning(f"Failed loading image: {exception}", __name__)
+        img = None  # type: ignore[assignment,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
+    return extension, data, img
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/init.py
@@ -0,0 +1,42 @@
+"""
+PDF specifies several annotation types which pypdf makes available here.
+
+The names of the annotations and their attributes do not reflect the names in
+the specification in all cases. For example, the PDF standard defines a
+'Square' annotation that does not actually need to be square. For this reason,
+pypdf calls it 'Rectangle'.
+
+At their core, all annotation types are DictionaryObjects. That means if pypdf
+does not implement a feature, users can easily extend the given functionality.
+"""
+
+
+from ._base import NO_FLAGS, AnnotationDictionary
+from ._markup_annotations import (
+    Ellipse,
+    FreeText,
+    Highlight,
+    Line,
+    MarkupAnnotation,
+    Polygon,
+    PolyLine,
+    Rectangle,
+    Text,
+)
+from ._non_markup_annotations import Link, Popup
+
+__all__ = [
+    "NO_FLAGS",
+    "AnnotationDictionary",
+    "Ellipse",
+    "FreeText",
+    "Highlight",
+    "Line",
+    "Link",
+    "MarkupAnnotation",
+    "PolyLine",
+    "Polygon",
+    "Popup",
+    "Rectangle",
+    "Text",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_base.py
@@ -0,0 +1,29 @@
+from abc import ABC
+
+from ..constants import AnnotationFlag
+from ..generic import NameObject, NumberObject
+from ..generic._data_structures import DictionaryObject
+
+
+class AnnotationDictionary(DictionaryObject, ABC):
+    def __init__(self) -> None:
+        super().__init__()
+
+        from ..generic._base import NameObject  # noqa: PLC0415
+
+        # /Rect should not be added here as Polygon and PolyLine can automatically set it
+        self[NameObject("/Type")] = NameObject("/Annot")
+        # The flags were NOT added to the constructor on purpose:
+        # We expect that most users don't want to change the default.
+        # If they do, they can use the property. The default is 0.
+
+    @property
+    def flags(self) -> AnnotationFlag:
+        return self.get(NameObject("/F"), AnnotationFlag(0))
+
+    @flags.setter
+    def flags(self, value: AnnotationFlag) -> None:
+        self[NameObject("/F")] = NumberObject(value)
+
+
+NO_FLAGS = AnnotationFlag(0)
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_markup_annotations.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_markup_annotations.py
@@ -0,0 +1,305 @@
+import sys
+from abc import ABC
+from typing import Any, Optional, Union
+
+from ..constants import AnnotationFlag
+from ..generic import ArrayObject, DictionaryObject
+from ..generic._base import (
+    BooleanObject,
+    FloatObject,
+    NameObject,
+    NumberObject,
+    TextStringObject,
+)
+from ..generic._rectangle import RectangleObject
+from ..generic._utils import hex_to_rgb
+from ._base import NO_FLAGS, AnnotationDictionary
+
+if sys.version_info[:2] >= (3, 10):
+    from typing import TypeAlias
+else:
+    # PEP 613 introduced typing.TypeAlias with Python 3.10
+    # For older Python versions, the backport typing_extensions is necessary:
+    from typing_extensions import TypeAlias
+
+
+Vertex: TypeAlias = tuple[float, float]
+
+
+def _get_bounding_rectangle(vertices: list[Vertex]) -> RectangleObject:
+    x_min, y_min = vertices[0][0], vertices[0][1]
+    x_max, y_max = vertices[0][0], vertices[0][1]
+    for x, y in vertices:
+        x_min = min(x_min, x)
+        y_min = min(y_min, y)
+        x_max = max(x_max, x)
+        y_max = max(y_max, y)
+    return RectangleObject((x_min, y_min, x_max, y_max))
+
+
+class MarkupAnnotation(AnnotationDictionary, ABC):
+    """
+    Base class for all markup annotations.
+
+    Args:
+        title_bar: Text to be displayed in the title bar of the annotation;
+            by convention this is the name of the author
+
+    """
+
+    def __init__(self, *, title_bar: Optional[str] = None) -> None:
+        if title_bar is not None:
+            self[NameObject("/T")] = TextStringObject(title_bar)
+
+
+class Text(MarkupAnnotation):
+    """
+    A text annotation.
+
+    Args:
+        rect: array of four integers ``[xLL, yLL, xUR, yUR]``
+            specifying the clickable rectangular area
+        text: The text that is added to the document
+        open:
+        flags:
+
+    """
+
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        text: str,
+        open: bool = False,
+        flags: int = NO_FLAGS,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self[NameObject("/Subtype")] = NameObject("/Text")
+        self[NameObject("/Rect")] = RectangleObject(rect)
+        self[NameObject("/Contents")] = TextStringObject(text)
+        self[NameObject("/Open")] = BooleanObject(open)
+        self[NameObject("/Flags")] = NumberObject(flags)
+
+
+class FreeText(MarkupAnnotation):
+    """A FreeText annotation"""
+
+    def __init__(
+        self,
+        *,
+        text: str,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        font: str = "Helvetica",
+        bold: bool = False,
+        italic: bool = False,
+        font_size: str = "14pt",
+        font_color: str = "000000",
+        border_color: Optional[str] = "000000",
+        background_color: Optional[str] = "ffffff",
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self[NameObject("/Subtype")] = NameObject("/FreeText")
+        self[NameObject("/Rect")] = RectangleObject(rect)
+
+        # Table 225 of the 1.7 reference ("CSS2 style attributes used in rich text strings")
+        font_str = "font: "
+        if italic:
+            font_str = f"{font_str}italic "
+        else:
+            font_str = f"{font_str}normal "
+        if bold:
+            font_str = f"{font_str}bold "
+        else:
+            font_str = f"{font_str}normal "
+        font_str = f"{font_str}{font_size} {font}"
+        font_str = f"{font_str};text-align:left;color:#{font_color}"
+
+        default_appearance_string = ""
+        if border_color:
+            for st in hex_to_rgb(border_color):
+                default_appearance_string = f"{default_appearance_string}{st} "
+            default_appearance_string = f"{default_appearance_string}rg"
+
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/FreeText"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/Contents"): TextStringObject(text),
+                # font size color
+                NameObject("/DS"): TextStringObject(font_str),
+                NameObject("/DA"): TextStringObject(default_appearance_string),
+            }
+        )
+        if border_color is None:
+            # Border Style
+            self[NameObject("/BS")] = DictionaryObject(
+                {
+                    # width of 0 means no border
+                    NameObject("/W"): NumberObject(0)
+                }
+            )
+        if background_color is not None:
+            self[NameObject("/C")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(background_color)]
+            )
+
+
+class Line(MarkupAnnotation):
+    def __init__(
+        self,
+        p1: Vertex,
+        p2: Vertex,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        text: str = "",
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/Line"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/L"): ArrayObject(
+                    [
+                        FloatObject(p1[0]),
+                        FloatObject(p1[1]),
+                        FloatObject(p2[0]),
+                        FloatObject(p2[1]),
+                    ]
+                ),
+                NameObject("/LE"): ArrayObject(
+                    [
+                        NameObject("/None"),
+                        NameObject("/None"),
+                    ]
+                ),
+                NameObject("/IC"): ArrayObject(
+                    [
+                        FloatObject(0.5),
+                        FloatObject(0.5),
+                        FloatObject(0.5),
+                    ]
+                ),
+                NameObject("/Contents"): TextStringObject(text),
+            }
+        )
+
+
+class PolyLine(MarkupAnnotation):
+    def __init__(
+        self,
+        vertices: list[Vertex],
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        if len(vertices) == 0:
+            raise ValueError("A polyline needs at least 1 vertex with two coordinates")
+        coord_list = []
+        for x, y in vertices:
+            coord_list.append(NumberObject(x))
+            coord_list.append(NumberObject(y))
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/PolyLine"),
+                NameObject("/Vertices"): ArrayObject(coord_list),
+                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
+            }
+        )
+
+
+class Rectangle(MarkupAnnotation):
+    def __init__(
+        self,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        *,
+        interior_color: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Square"),
+                NameObject("/Rect"): RectangleObject(rect),
+            }
+        )
+
+        if interior_color:
+            self[NameObject("/IC")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(interior_color)]
+            )
+
+
+class Highlight(MarkupAnnotation):
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        quad_points: ArrayObject,
+        highlight_color: str = "ff0000",
+        printing: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/Highlight"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/QuadPoints"): quad_points,
+                NameObject("/C"): ArrayObject(
+                    [FloatObject(n) for n in hex_to_rgb(highlight_color)]
+                ),
+            }
+        )
+        if printing:
+            self.flags = AnnotationFlag.PRINT
+
+
+class Ellipse(MarkupAnnotation):
+    def __init__(
+        self,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        *,
+        interior_color: Optional[str] = None,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Circle"),
+                NameObject("/Rect"): RectangleObject(rect),
+            }
+        )
+
+        if interior_color:
+            self[NameObject("/IC")] = ArrayObject(
+                [FloatObject(n) for n in hex_to_rgb(interior_color)]
+            )
+
+
+class Polygon(MarkupAnnotation):
+    def __init__(
+        self,
+        vertices: list[tuple[float, float]],
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        if len(vertices) == 0:
+            raise ValueError("A polygon needs at least 1 vertex with two coordinates")
+
+        coord_list = []
+        for x, y in vertices:
+            coord_list.append(NumberObject(x))
+            coord_list.append(NumberObject(y))
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Polygon"),
+                NameObject("/Vertices"): ArrayObject(coord_list),
+                NameObject("/IT"): NameObject("/PolygonCloud"),
+                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
+            }
+        )
--- a/venv/lib/python3.12/site-packages/pypdf/annotations/_non_markup_annotations.py
+++ b/venv/lib/python3.12/site-packages/pypdf/annotations/_non_markup_annotations.py
@@ -0,0 +1,106 @@
+from typing import TYPE_CHECKING, Any, Optional, Union
+
+from ..generic._base import (
+    BooleanObject,
+    NameObject,
+    NumberObject,
+    TextStringObject,
+)
+from ..generic._data_structures import ArrayObject, DictionaryObject
+from ..generic._fit import DEFAULT_FIT, Fit
+from ..generic._rectangle import RectangleObject
+from ._base import AnnotationDictionary
+
+
+class Link(AnnotationDictionary):
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        border: Optional[ArrayObject] = None,
+        url: Optional[str] = None,
+        target_page_index: Optional[int] = None,
+        fit: Fit = DEFAULT_FIT,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        if TYPE_CHECKING:
+            from ..types import BorderArrayType  # noqa: PLC0415
+
+        is_external = url is not None
+        is_internal = target_page_index is not None
+        if not is_external and not is_internal:
+            raise ValueError(
+                "Either 'url' or 'target_page_index' have to be provided. Both were None."
+            )
+        if is_external and is_internal:
+            raise ValueError(
+                "Either 'url' or 'target_page_index' have to be provided. "
+                f"{url=}, {target_page_index=}"
+            )
+
+        border_arr: BorderArrayType
+        if border is not None:
+            border_arr = [NumberObject(n) for n in border[:3]]
+            if len(border) == 4:
+                dash_pattern = ArrayObject([NumberObject(n) for n in border[3]])
+                border_arr.append(dash_pattern)
+        else:
+            border_arr = [NumberObject(0)] * 3
+
+        self.update(
+            {
+                NameObject("/Type"): NameObject("/Annot"),
+                NameObject("/Subtype"): NameObject("/Link"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/Border"): ArrayObject(border_arr),
+            }
+        )
+        if is_external:
+            self[NameObject("/A")] = DictionaryObject(
+                {
+                    NameObject("/S"): NameObject("/URI"),
+                    NameObject("/Type"): NameObject("/Action"),
+                    NameObject("/URI"): TextStringObject(url),
+                }
+            )
+        if is_internal:
+            # This needs to be updated later!
+            dest_deferred = DictionaryObject(
+                {
+                    "target_page_index": NumberObject(target_page_index),
+                    "fit": NameObject(fit.fit_type),
+                    "fit_args": fit.fit_args,
+                }
+            )
+            self[NameObject("/Dest")] = dest_deferred
+
+
+class Popup(AnnotationDictionary):
+    def __init__(
+        self,
+        *,
+        rect: Union[RectangleObject, tuple[float, float, float, float]],
+        parent: Optional[DictionaryObject] = None,
+        open: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(**kwargs)
+        self.update(
+            {
+                NameObject("/Subtype"): NameObject("/Popup"),
+                NameObject("/Rect"): RectangleObject(rect),
+                NameObject("/Open"): BooleanObject(open),
+            }
+        )
+        if parent:
+            # This needs to be an indirect object
+            try:
+                self[NameObject("/Parent")] = parent.indirect_reference
+            except AttributeError:
+                from .._utils import logger_warning  # noqa: PLC0415
+
+                logger_warning(
+                    "Unregistered Parent object : No Parent field set",
+                    __name__,
+                )
--- a/venv/lib/python3.12/site-packages/pypdf/constants.py
+++ b/venv/lib/python3.12/site-packages/pypdf/constants.py
@@ -0,0 +1,796 @@
+"""Various constants, enums, and flags to aid readability."""
+
+from enum import Enum, IntFlag, auto, unique
+
+
+class StrEnum(str, Enum):  # Once we are on Python 3.11+: enum.StrEnum
+    def __str__(self) -> str:
+        return str(self.value)
+
+
+class Core:
+    """Keywords that don't quite belong anywhere else."""
+
+    OUTLINES = "/Outlines"
+    THREADS = "/Threads"
+    PAGE = "/Page"
+    PAGES = "/Pages"
+    CATALOG = "/Catalog"
+
+
+class TrailerKeys:
+    SIZE = "/Size"
+    PREV = "/Prev"
+    ROOT = "/Root"
+    ENCRYPT = "/Encrypt"
+    INFO = "/Info"
+    ID = "/ID"
+
+
+class CatalogAttributes:
+    NAMES = "/Names"
+    DESTS = "/Dests"
+
+
+class EncryptionDictAttributes:
+    """
+    Additional encryption dictionary entries for the standard security handler.
+
+    Table 3.19, Page 122.
+    Table 21 of the 2.0 manual.
+    """
+
+    R = "/R"  # number, required; revision of the standard security handler
+    O = "/O"  # 32-byte string, required  # noqa: E741
+    U = "/U"  # 32-byte string, required
+    P = "/P"  # integer flag, required; permitted operations
+    ENCRYPT_METADATA = "/EncryptMetadata"  # boolean flag, optional
+
+
+class UserAccessPermissions(IntFlag):
+    """
+    Table 3.20 User access permissions.
+    Table 22 of the 2.0 manual.
+    """
+
+    R1 = 1
+    R2 = 2
+    PRINT = 4
+    MODIFY = 8
+    EXTRACT = 16
+    ADD_OR_MODIFY = 32
+    R7 = 64
+    R8 = 128
+    FILL_FORM_FIELDS = 256
+    EXTRACT_TEXT_AND_GRAPHICS = 512
+    ASSEMBLE_DOC = 1024
+    PRINT_TO_REPRESENTATION = 2048
+    R13 = 2**12
+    R14 = 2**13
+    R15 = 2**14
+    R16 = 2**15
+    R17 = 2**16
+    R18 = 2**17
+    R19 = 2**18
+    R20 = 2**19
+    R21 = 2**20
+    R22 = 2**21
+    R23 = 2**22
+    R24 = 2**23
+    R25 = 2**24
+    R26 = 2**25
+    R27 = 2**26
+    R28 = 2**27
+    R29 = 2**28
+    R30 = 2**29
+    R31 = 2**30
+    R32 = 2**31
+
+    @classmethod
+    def _is_reserved(cls, name: str) -> bool:
+        """Check if the given name corresponds to a reserved flag entry."""
+        return name.startswith("R") and name[1:].isdigit()
+
+    @classmethod
+    def _is_active(cls, name: str) -> bool:
+        """Check if the given reserved name defaults to 1 = active."""
+        return name not in {"R1", "R2"}
+
+    def to_dict(self) -> dict[str, bool]:
+        """Convert the given flag value to a corresponding verbose name mapping."""
+        result: dict[str, bool] = {}
+        for name, flag in UserAccessPermissions.__members__.items():
+            if UserAccessPermissions._is_reserved(name):
+                continue
+            result[name.lower()] = (self & flag) == flag
+        return result
+
+    @classmethod
+    def from_dict(cls, value: dict[str, bool]) -> "UserAccessPermissions":
+        """Convert the verbose name mapping to the corresponding flag value."""
+        value_copy = value.copy()
+        result = cls(0)
+        for name, flag in cls.__members__.items():
+            if cls._is_reserved(name):
+                # Reserved names have a required value. Use it.
+                if cls._is_active(name):
+                    result |= flag
+                continue
+            is_active = value_copy.pop(name.lower(), False)
+            if is_active:
+                result |= flag
+        if value_copy:
+            raise ValueError(f"Unknown dictionary keys: {value_copy!r}")
+        return result
+
+    @classmethod
+    def all(cls) -> "UserAccessPermissions":
+        return cls((2**32 - 1) - cls.R1 - cls.R2)
+
+
+class Resources:
+    """
+    Table 3.30 Entries in a resource dictionary.
+    Table 34 in the 2.0 reference.
+    """
+
+    EXT_G_STATE = "/ExtGState"  # dictionary, optional
+    COLOR_SPACE = "/ColorSpace"  # dictionary, optional
+    PATTERN = "/Pattern"  # dictionary, optional
+    SHADING = "/Shading"  # dictionary, optional
+    XOBJECT = "/XObject"  # dictionary, optional
+    FONT = "/Font"  # dictionary, optional
+    PROC_SET = "/ProcSet"  # array, optional
+    PROPERTIES = "/Properties"  # dictionary, optional
+
+
+class PagesAttributes:
+    """§7.7.3.2 of the 1.7 and 2.0 reference."""
+
+    TYPE = "/Type"  # name, required; must be /Pages
+    PARENT = "/Parent"  # dictionary, required; indirect reference to pages object
+    KIDS = "/Kids"  # array, required; List of indirect references
+    COUNT = "/Count"
+    # integer, required; the number of leaf nodes (page objects)
+    # that are descendants of this node within the page tree
+
+
+class PageAttributes:
+    """§7.7.3.3 of the 1.7 and 2.0 reference."""
+
+    TYPE = "/Type"  # name, required; must be /Page
+    PARENT = "/Parent"  # dictionary, required; a pages object
+    LAST_MODIFIED = (
+        "/LastModified"  # date, optional; date and time of last modification
+    )
+    RESOURCES = "/Resources"  # dictionary, required if there are any
+    MEDIABOX = "/MediaBox"  # rectangle, required; rectangle specifying page size
+    CROPBOX = "/CropBox"  # rectangle, optional
+    BLEEDBOX = "/BleedBox"  # rectangle, optional
+    TRIMBOX = "/TrimBox"  # rectangle, optional
+    ARTBOX = "/ArtBox"  # rectangle, optional
+    BOX_COLOR_INFO = "/BoxColorInfo"  # dictionary, optional
+    CONTENTS = "/Contents"  # stream or array, optional
+    ROTATE = "/Rotate"  # integer, optional; page rotation in degrees
+    GROUP = "/Group"  # dictionary, optional; page group
+    THUMB = "/Thumb"  # stream, optional; indirect reference to image of the page
+    B = "/B"  # array, optional
+    DUR = "/Dur"  # number, optional
+    TRANS = "/Trans"  # dictionary, optional
+    ANNOTS = "/Annots"  # array, optional; an array of annotations
+    AA = "/AA"  # dictionary, optional
+    METADATA = "/Metadata"  # stream, optional
+    PIECE_INFO = "/PieceInfo"  # dictionary, optional
+    STRUCT_PARENTS = "/StructParents"  # integer, optional
+    ID = "/ID"  # byte string, optional
+    PZ = "/PZ"  # number, optional
+    SEPARATION_INFO = "/SeparationInfo"  # dictionary, optional
+    TABS = "/Tabs"  # name, optional
+    TEMPLATE_INSTANTIATED = "/TemplateInstantiated"  # name, optional
+    PRES_STEPS = "/PresSteps"  # dictionary, optional
+    USER_UNIT = "/UserUnit"  # number, optional
+    VP = "/VP"  # dictionary, optional
+    AF = "/AF"  # array of dictionaries, optional
+    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
+    D_PART = "/DPart"  # dictionary, required, if this page is within the range of a DPart, not permitted otherwise
+
+
+class FileSpecificationDictionaryEntries:
+    """Table 3.41 Entries in a file specification dictionary."""
+
+    Type = "/Type"
+    FS = "/FS"  # The name of the file system to be used to interpret this file specification
+    F = "/F"  # A file specification string of the form described in §3.10.1
+    UF = "/UF"  # A Unicode string of the file as described in §3.10.1
+    DOS = "/DOS"
+    Mac = "/Mac"
+    Unix = "/Unix"
+    ID = "/ID"
+    V = "/V"
+    EF = "/EF"  # dictionary, containing a subset of the keys F, UF, DOS, Mac, and Unix
+    RF = "/RF"  # dictionary, containing arrays of /EmbeddedFile
+    DESC = "/Desc"  # description of the file
+    Cl = "/Cl"
+
+
+class StreamAttributes:
+    """
+    Table 4.2.
+    Table 5 in the 2.0 reference.
+    """
+
+    LENGTH = "/Length"  # integer, required
+    FILTER = "/Filter"  # name or array of names, optional
+    DECODE_PARMS = "/DecodeParms"  # variable, optional -- 'decodeParams is wrong
+
+
+@unique
+class FilterTypes(StrEnum):
+    """§7.4 of the 1.7 and 2.0 references."""
+
+    ASCII_HEX_DECODE = "/ASCIIHexDecode"  # abbreviation: AHx
+    ASCII_85_DECODE = "/ASCII85Decode"  # abbreviation: A85
+    LZW_DECODE = "/LZWDecode"  # abbreviation: LZW
+    FLATE_DECODE = "/FlateDecode"  # abbreviation: Fl
+    RUN_LENGTH_DECODE = "/RunLengthDecode"  # abbreviation: RL
+    CCITT_FAX_DECODE = "/CCITTFaxDecode"  # abbreviation: CCF
+    DCT_DECODE = "/DCTDecode"  # abbreviation: DCT
+    JPX_DECODE = "/JPXDecode"
+    JBIG2_DECODE = "/JBIG2Decode"
+
+
+class FilterTypeAbbreviations:
+    """§8.9.7 of the 1.7 and 2.0 references."""
+
+    AHx = "/AHx"
+    A85 = "/A85"
+    LZW = "/LZW"
+    FL = "/Fl"
+    RL = "/RL"
+    CCF = "/CCF"
+    DCT = "/DCT"
+
+
+class LzwFilterParameters:
+    """
+    Table 4.4.
+    Table 8 in the 2.0 reference.
+    """
+
+    PREDICTOR = "/Predictor"  # integer
+    COLORS = "/Colors"  # integer
+    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer
+    COLUMNS = "/Columns"  # integer
+    EARLY_CHANGE = "/EarlyChange"  # integer
+
+
+class CcittFaxDecodeParameters:
+    """
+    Table 4.5.
+    Table 11 in the 2.0 reference.
+    """
+
+    K = "/K"  # integer
+    END_OF_LINE = "/EndOfLine"  # boolean
+    ENCODED_BYTE_ALIGN = "/EncodedByteAlign"  # boolean
+    COLUMNS = "/Columns"  # integer
+    ROWS = "/Rows"  # integer
+    END_OF_BLOCK = "/EndOfBlock"  # boolean
+    BLACK_IS_1 = "/BlackIs1"  # boolean
+    DAMAGED_ROWS_BEFORE_ERROR = "/DamagedRowsBeforeError"  # integer
+
+
+class ImageAttributes:
+    """§11.6.5 of the 1.7 and 2.0 references."""
+
+    TYPE = "/Type"  # name, required; must be /XObject
+    SUBTYPE = "/Subtype"  # name, required; must be /Image
+    NAME = "/Name"  # name, required
+    WIDTH = "/Width"  # integer, required
+    HEIGHT = "/Height"  # integer, required
+    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer, required
+    COLOR_SPACE = "/ColorSpace"  # name, required
+    DECODE = "/Decode"  # array, optional
+    INTENT = "/Intent"  # string, optional
+    INTERPOLATE = "/Interpolate"  # boolean, optional
+    IMAGE_MASK = "/ImageMask"  # boolean, optional
+    MASK = "/Mask"  # 1-bit image mask stream
+    S_MASK = "/SMask"  # dictionary or name, optional
+
+
+class ColorSpaces:
+    DEVICE_RGB = "/DeviceRGB"
+    DEVICE_CMYK = "/DeviceCMYK"
+    DEVICE_GRAY = "/DeviceGray"
+
+
+class TypArguments:
+    """Table 8.2 of the PDF 1.7 reference."""
+
+    LEFT = "/Left"
+    RIGHT = "/Right"
+    BOTTOM = "/Bottom"
+    TOP = "/Top"
+
+
+class TypFitArguments:
+    """Table 8.2 of the PDF 1.7 reference."""
+
+    XYZ = "/XYZ"
+    FIT = "/Fit"
+    FIT_H = "/FitH"
+    FIT_V = "/FitV"
+    FIT_R = "/FitR"
+    FIT_B = "/FitB"
+    FIT_BH = "/FitBH"
+    FIT_BV = "/FitBV"
+
+
+class GoToActionArguments:
+    S = "/S"  # name, required: type of action
+    D = "/D"  # name, byte string, or array, required: destination to jump to
+    SD = "/SD"  # array, optional: structure destination to jump to
+
+
+class AnnotationDictionaryAttributes:
+    """Table 8.15 Entries common to all annotation dictionaries."""
+
+    Type = "/Type"
+    Subtype = "/Subtype"
+    Rect = "/Rect"
+    Contents = "/Contents"
+    P = "/P"
+    NM = "/NM"
+    M = "/M"
+    F = "/F"
+    AP = "/AP"
+    AS = "/AS"
+    DA = "/DA"
+    Border = "/Border"
+    C = "/C"
+    StructParent = "/StructParent"
+    OC = "/OC"
+
+
+class InteractiveFormDictEntries:
+    Fields = "/Fields"
+    NeedAppearances = "/NeedAppearances"
+    SigFlags = "/SigFlags"
+    CO = "/CO"
+    DR = "/DR"
+    DA = "/DA"
+    Q = "/Q"
+    XFA = "/XFA"
+
+
+class FieldDictionaryAttributes:
+    """
+    Entries common to all field dictionaries (Table 8.69 PDF 1.7 reference)
+    (*very partially documented here*).
+
+    FFBits provides the constants used for `/Ff` from Table 8.70/8.75/8.77/8.79
+    """
+
+    FT = "/FT"  # name, required for terminal fields
+    Parent = "/Parent"  # dictionary, required for children
+    Kids = "/Kids"  # array, sometimes required
+    T = "/T"  # text string, optional
+    TU = "/TU"  # text string, optional
+    TM = "/TM"  # text string, optional
+    Ff = "/Ff"  # integer, optional
+    V = "/V"  # text string or array, optional
+    DV = "/DV"  # text string, optional
+    AA = "/AA"  # dictionary, optional
+    Opt = "/Opt"  # array, optional
+
+    class FfBits(IntFlag):
+        """
+        Ease building /Ff flags
+        Some entries may be specific to:
+
+        * Text (Tx) (Table 8.75 PDF 1.7 reference)
+        * Buttons (Btn) (Table 8.77 PDF 1.7 reference)
+        * Choice (Ch) (Table 8.79 PDF 1.7 reference)
+        """
+
+        ReadOnly = 1 << 0
+        """common to Tx/Btn/Ch in Table 8.70"""
+        Required = 1 << 1
+        """common to Tx/Btn/Ch in Table 8.70"""
+        NoExport = 1 << 2
+        """common to Tx/Btn/Ch in Table 8.70"""
+
+        Multiline = 1 << 12
+        """Tx"""
+        Password = 1 << 13
+        """Tx"""
+
+        NoToggleToOff = 1 << 14
+        """Btn"""
+        Radio = 1 << 15
+        """Btn"""
+        Pushbutton = 1 << 16
+        """Btn"""
+
+        Combo = 1 << 17
+        """Ch"""
+        Edit = 1 << 18
+        """Ch"""
+        Sort = 1 << 19
+        """Ch"""
+
+        FileSelect = 1 << 20
+        """Tx"""
+
+        MultiSelect = 1 << 21
+        """Tx"""
+
+        DoNotSpellCheck = 1 << 22
+        """Tx/Ch"""
+        DoNotScroll = 1 << 23
+        """Tx"""
+        Comb = 1 << 24
+        """Tx"""
+
+        RadiosInUnison = 1 << 25
+        """Btn"""
+
+        RichText = 1 << 25
+        """Tx"""
+
+        CommitOnSelChange = 1 << 26
+        """Ch"""
+
+    @classmethod
+    def attributes(cls) -> tuple[str, ...]:
+        """
+        Get a tuple of all the attributes present in a Field Dictionary.
+
+        This method returns a tuple of all the attribute constants defined in
+        the FieldDictionaryAttributes class. These attributes correspond to the
+        entries that are common to all field dictionaries as specified in the
+        PDF 1.7 reference.
+
+        Returns:
+            A tuple containing all the attribute constants.
+
+        """
+        return (
+            cls.TM,
+            cls.T,
+            cls.FT,
+            cls.Parent,
+            cls.TU,
+            cls.Ff,
+            cls.V,
+            cls.DV,
+            cls.Kids,
+            cls.AA,
+        )
+
+    @classmethod
+    def attributes_dict(cls) -> dict[str, str]:
+        """
+        Get a dictionary of attribute keys and their human-readable names.
+
+        This method returns a dictionary where the keys are the attribute
+        constants defined in the FieldDictionaryAttributes class and the values
+        are their corresponding human-readable names. These attributes
+        correspond to the entries that are common to all field dictionaries as
+        specified in the PDF 1.7 reference.
+
+        Returns:
+            A dictionary containing attribute keys and their names.
+
+        """
+        return {
+            cls.FT: "Field Type",
+            cls.Parent: "Parent",
+            cls.T: "Field Name",
+            cls.TU: "Alternate Field Name",
+            cls.TM: "Mapping Name",
+            cls.Ff: "Field Flags",
+            cls.V: "Value",
+            cls.DV: "Default Value",
+        }
+
+
+class CheckboxRadioButtonAttributes:
+    """Table 8.76 Field flags common to all field types."""
+
+    Opt = "/Opt"  # Options, Optional
+
+    @classmethod
+    def attributes(cls) -> tuple[str, ...]:
+        """
+        Get a tuple of all the attributes present in a Field Dictionary.
+
+        This method returns a tuple of all the attribute constants defined in
+        the CheckboxRadioButtonAttributes class. These attributes correspond to
+        the entries that are common to all field dictionaries as specified in
+        the PDF 1.7 reference.
+
+        Returns:
+            A tuple containing all the attribute constants.
+
+        """
+        return (cls.Opt,)
+
+    @classmethod
+    def attributes_dict(cls) -> dict[str, str]:
+        """
+        Get a dictionary of attribute keys and their human-readable names.
+
+        This method returns a dictionary where the keys are the attribute
+        constants defined in the CheckboxRadioButtonAttributes class and the
+        values are their corresponding human-readable names. These attributes
+        correspond to the entries that are common to all field dictionaries as
+        specified in the PDF 1.7 reference.
+
+        Returns:
+            A dictionary containing attribute keys and their names.
+
+        """
+        return {
+            cls.Opt: "Options",
+        }
+
+
+class FieldFlag(IntFlag):
+    """Table 8.70 Field flags common to all field types."""
+
+    READ_ONLY = 1
+    REQUIRED = 2
+    NO_EXPORT = 4
+
+
+class DocumentInformationAttributes:
+    """Table 10.2 Entries in the document information dictionary."""
+
+    TITLE = "/Title"  # text string, optional
+    AUTHOR = "/Author"  # text string, optional
+    SUBJECT = "/Subject"  # text string, optional
+    KEYWORDS = "/Keywords"  # text string, optional
+    CREATOR = "/Creator"  # text string, optional
+    PRODUCER = "/Producer"  # text string, optional
+    CREATION_DATE = "/CreationDate"  # date, optional
+    MOD_DATE = "/ModDate"  # date, optional
+    TRAPPED = "/Trapped"  # name, optional
+
+
+class PageLayouts:
+    """
+    Page 84, PDF 1.4 reference.
+    Page 115, PDF 2.0 reference.
+    """
+
+    SINGLE_PAGE = "/SinglePage"
+    ONE_COLUMN = "/OneColumn"
+    TWO_COLUMN_LEFT = "/TwoColumnLeft"
+    TWO_COLUMN_RIGHT = "/TwoColumnRight"
+    TWO_PAGE_LEFT = "/TwoPageLeft"  # (PDF 1.5)
+    TWO_PAGE_RIGHT = "/TwoPageRight"  # (PDF 1.5)
+
+
+class GraphicsStateParameters:
+    """Table 58 – Entries in a Graphics State Parameter Dictionary"""
+
+    TYPE = "/Type"  # name, optional
+    LW = "/LW"  # number, optional
+    LC = "/LC"  # integer, optional
+    LJ = "/LJ"  # integer, optional
+    ML = "/ML"  # number, optional
+    D = "/D"  # array, optional
+    RI = "/RI"  # name, optional
+    OP = "/OP"
+    op = "/op"
+    OPM = "/OPM"
+    FONT = "/Font"  # array, optional
+    BG = "/BG"
+    BG2 = "/BG2"
+    UCR = "/UCR"
+    UCR2 = "/UCR2"
+    TR = "/TR"
+    TR2 = "/TR2"
+    HT = "/HT"
+    FL = "/FL"
+    SM = "/SM"
+    SA = "/SA"
+    BM = "/BM"
+    S_MASK = "/SMask"  # dictionary or name, optional
+    CA = "/CA"
+    ca = "/ca"
+    AIS = "/AIS"
+    TK = "/TK"
+
+
+class CatalogDictionary:
+    """§7.7.2 of the 1.7 and 2.0 references."""
+
+    TYPE = "/Type"  # name, required; must be /Catalog
+    VERSION = "/Version"  # name
+    EXTENSIONS = "/Extensions"  # dictionary, optional; ISO 32000-1
+    PAGES = "/Pages"  # dictionary, required
+    PAGE_LABELS = "/PageLabels"  # number tree, optional
+    NAMES = "/Names"  # dictionary, optional
+    DESTS = "/Dests"  # dictionary, optional
+    VIEWER_PREFERENCES = "/ViewerPreferences"  # dictionary, optional
+    PAGE_LAYOUT = "/PageLayout"  # name, optional
+    PAGE_MODE = "/PageMode"  # name, optional
+    OUTLINES = "/Outlines"  # dictionary, optional
+    THREADS = "/Threads"  # array, optional
+    OPEN_ACTION = "/OpenAction"  # array or dictionary or name, optional
+    AA = "/AA"  # dictionary, optional
+    URI = "/URI"  # dictionary, optional
+    ACRO_FORM = "/AcroForm"  # dictionary, optional
+    METADATA = "/Metadata"  # stream, optional
+    STRUCT_TREE_ROOT = "/StructTreeRoot"  # dictionary, optional
+    MARK_INFO = "/MarkInfo"  # dictionary, optional
+    LANG = "/Lang"  # text string, optional
+    SPIDER_INFO = "/SpiderInfo"  # dictionary, optional
+    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
+    PIECE_INFO = "/PieceInfo"  # dictionary, optional
+    OC_PROPERTIES = "/OCProperties"  # dictionary, optional
+    PERMS = "/Perms"  # dictionary, optional
+    LEGAL = "/Legal"  # dictionary, optional
+    REQUIREMENTS = "/Requirements"  # array, optional
+    COLLECTION = "/Collection"  # dictionary, optional
+    NEEDS_RENDERING = "/NeedsRendering"  # boolean, optional
+    DSS = "/DSS"  # dictionary, optional
+    AF = "/AF"  # array of dictionaries, optional
+    D_PART_ROOT = "/DPartRoot"  # dictionary, optional
+
+
+class OutlineFontFlag(IntFlag):
+    """A class used as an enumerable flag for formatting an outline font."""
+
+    italic = 1
+    bold = 2
+
+
+class PageLabelStyle:
+    """
+    Table 8.10 in the 1.7 reference.
+    Table 161 in the 2.0 reference.
+    """
+
+    DECIMAL = "/D"  # Decimal Arabic numerals
+    UPPERCASE_ROMAN = "/R"  # Uppercase Roman numerals
+    LOWERCASE_ROMAN = "/r"  # Lowercase Roman numerals
+    UPPERCASE_LETTER = "/A"  # Uppercase letters
+    LOWERCASE_LETTER = "/a"  # Lowercase letters
+
+
+class AnnotationFlag(IntFlag):
+    """See §12.5.3 "Annotation Flags"."""
+
+    INVISIBLE = 1
+    HIDDEN = 2
+    PRINT = 4
+    NO_ZOOM = 8
+    NO_ROTATE = 16
+    NO_VIEW = 32
+    READ_ONLY = 64
+    LOCKED = 128
+    TOGGLE_NO_VIEW = 256
+    LOCKED_CONTENTS = 512
+
+
+PDF_KEYS = (
+    AnnotationDictionaryAttributes,
+    CatalogAttributes,
+    CatalogDictionary,
+    CcittFaxDecodeParameters,
+    CheckboxRadioButtonAttributes,
+    ColorSpaces,
+    Core,
+    DocumentInformationAttributes,
+    EncryptionDictAttributes,
+    FieldDictionaryAttributes,
+    FileSpecificationDictionaryEntries,
+    FilterTypeAbbreviations,
+    FilterTypes,
+    GoToActionArguments,
+    GraphicsStateParameters,
+    ImageAttributes,
+    InteractiveFormDictEntries,
+    LzwFilterParameters,
+    PageAttributes,
+    PageLayouts,
+    PagesAttributes,
+    Resources,
+    StreamAttributes,
+    TrailerKeys,
+    TypArguments,
+    TypFitArguments,
+)
+
+
+class ImageType(IntFlag):
+    NONE = 0
+    XOBJECT_IMAGES = auto()
+    INLINE_IMAGES = auto()
+    DRAWING_IMAGES = auto()
+    ALL = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES
+    IMAGES = ALL  # for consistency with ObjectDeletionFlag
+
+
+_INLINE_IMAGE_VALUE_MAPPING = {
+    "/G": "/DeviceGray",
+    "/RGB": "/DeviceRGB",
+    "/CMYK": "/DeviceCMYK",
+    "/I": "/Indexed",
+    "/AHx": "/ASCIIHexDecode",
+    "/A85": "/ASCII85Decode",
+    "/LZW": "/LZWDecode",
+    "/Fl": "/FlateDecode",
+    "/RL": "/RunLengthDecode",
+    "/CCF": "/CCITTFaxDecode",
+    "/DCT": "/DCTDecode",
+    "/DeviceGray": "/DeviceGray",
+    "/DeviceRGB": "/DeviceRGB",
+    "/DeviceCMYK": "/DeviceCMYK",
+    "/Indexed": "/Indexed",
+    "/ASCIIHexDecode": "/ASCIIHexDecode",
+    "/ASCII85Decode": "/ASCII85Decode",
+    "/LZWDecode": "/LZWDecode",
+    "/FlateDecode": "/FlateDecode",
+    "/RunLengthDecode": "/RunLengthDecode",
+    "/CCITTFaxDecode": "/CCITTFaxDecode",
+    "/DCTDecode": "/DCTDecode",
+    "/RelativeColorimetric": "/RelativeColorimetric",
+}
+
+_INLINE_IMAGE_KEY_MAPPING = {
+    "/BPC": "/BitsPerComponent",
+    "/CS": "/ColorSpace",
+    "/D": "/Decode",
+    "/DP": "/DecodeParms",
+    "/F": "/Filter",
+    "/H": "/Height",
+    "/W": "/Width",
+    "/I": "/Interpolate",
+    "/Intent": "/Intent",
+    "/IM": "/ImageMask",
+    "/BitsPerComponent": "/BitsPerComponent",
+    "/ColorSpace": "/ColorSpace",
+    "/Decode": "/Decode",
+    "/DecodeParms": "/DecodeParms",
+    "/Filter": "/Filter",
+    "/Height": "/Height",
+    "/Width": "/Width",
+    "/Interpolate": "/Interpolate",
+    "/ImageMask": "/ImageMask",
+}
+
+
+class AFRelationship:
+    """
+    Associated file relationship types, defining the relationship between
+    the PDF component and the associated file.
+
+    Defined in table 43 of the PDF 2.0 reference.
+    """
+
+    SOURCE = "/Source"  # Original content source
+    DATA = "/Data"  # Base data for visual presentation
+    ALTERNATIVE = "/Alternative"  # Alternative content representation
+    SUPPLEMENT = "/Supplement"  # Supplemental representation of original source/data
+    ENCRYPTED_PAYLOAD = "/EncryptedPayload"  # Encrypted payload document
+    FORM_DATA = "/FormData"  # Data associated with AcroForm of this PDF
+    SCHEMA = "/Schema"  # Schema definition for associated object
+    UNSPECIFIED = "/Unspecified"  # Not known or cannot be described with values
+
+
+class BorderStyles:
+    """
+    A class defining border styles used in PDF documents.
+
+    Defined in table 168 of the PDF 2.0 reference.
+    """
+
+    BEVELED = "/B"
+    DASHED = "/D"
+    INSET = "/I"
+    SOLID = "/S"
+    UNDERLINED = "/U"
--- a/venv/lib/python3.12/site-packages/pypdf/errors.py
+++ b/venv/lib/python3.12/site-packages/pypdf/errors.py
@@ -0,0 +1,74 @@
+"""
+All errors/exceptions pypdf raises and all of the warnings it uses.
+
+Please note that broken PDF files might cause other Exceptions.
+"""
+
+
+class DeprecationError(Exception):
+    """Raised when a deprecated feature is used."""
+
+
+class DependencyError(Exception):
+    """
+    Raised when a required dependency (a library or module that pypdf depends on)
+    is not available or cannot be imported.
+    """
+
+
+class PyPdfError(Exception):
+    """Base class for all exceptions raised by pypdf."""
+
+
+class PdfReadError(PyPdfError):
+    """Raised when there is an issue reading a PDF file."""
+
+
+class PageSizeNotDefinedError(PyPdfError):
+    """Raised when the page size of a PDF document is not defined."""
+
+
+class PdfReadWarning(UserWarning):
+    """Issued when there is a potential issue reading a PDF file, but it can still be read."""
+
+
+class PdfStreamError(PdfReadError):
+    """Raised when there is an issue reading the stream of data in a PDF file."""
+
+
+class ParseError(PyPdfError):
+    """
+    Raised when there is an issue parsing (analyzing and understanding the
+    structure and meaning of) a PDF file.
+    """
+
+
+class FileNotDecryptedError(PdfReadError):
+    """
+    Raised when a PDF file that has been encrypted
+    (meaning it requires a password to be accessed) has not been successfully
+    decrypted.
+    """
+
+
+class WrongPasswordError(FileNotDecryptedError):
+    """Raised when the wrong password is used to try to decrypt an encrypted PDF file."""
+
+
+class EmptyFileError(PdfReadError):
+    """Raised when a PDF file is empty or has no content."""
+
+
+class EmptyImageDataError(PyPdfError):
+    """Raised when trying to process an image that has no data."""
+
+
+STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"
+
+
+class LimitReachedError(PyPdfError):
+    """Raised when a limit is reached."""
+
+
+class XmpDocumentError(PyPdfError, RuntimeError):
+    """Raised when the XMP XML document context is invalid or missing."""
--- a/venv/lib/python3.12/site-packages/pypdf/filters.py
+++ b/venv/lib/python3.12/site-packages/pypdf/filters.py
@@ -0,0 +1,815 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+"""
+Implementation of stream filters; §7.4 Filters of the PDF 2.0 specification.
+
+§8.9.7 Inline images of the PDF 2.0 specification has abbreviations that can be
+used for the names of filters in an inline image object.
+"""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+import math
+import os
+import shutil
+import struct
+import subprocess
+import zlib
+from base64 import a85decode
+from dataclasses import dataclass
+from pathlib import Path
+from tempfile import TemporaryDirectory
+from typing import Any, Optional, Union, cast
+
+from ._codecs._codecs import LzwCodec as _LzwCodec
+from ._utils import (
+    WHITESPACES_AS_BYTES,
+    deprecation_with_replacement,
+    logger_warning,
+)
+from .constants import CcittFaxDecodeParameters as CCITT
+from .constants import FilterTypeAbbreviations as FTA
+from .constants import FilterTypes as FT
+from .constants import ImageAttributes as IA
+from .constants import LzwFilterParameters as LZW
+from .constants import StreamAttributes as SA
+from .errors import DependencyError, LimitReachedError, PdfReadError, PdfStreamError
+from .generic import (
+    ArrayObject,
+    DictionaryObject,
+    IndirectObject,
+    NullObject,
+    StreamObject,
+    is_null_or_none,
+)
+
+JBIG2_MAX_OUTPUT_LENGTH = 75_000_000
+LZW_MAX_OUTPUT_LENGTH = 75_000_000
+ZLIB_MAX_OUTPUT_LENGTH = 75_000_000
+
+
+
+def _decompress_with_limit(data: bytes) -> bytes:
+    decompressor = zlib.decompressobj()
+    result = decompressor.decompress(data, max_length=ZLIB_MAX_OUTPUT_LENGTH)
+    if decompressor.unconsumed_tail:
+        raise LimitReachedError(
+            f"Limit reached while decompressing. {len(decompressor.unconsumed_tail)} bytes remaining."
+        )
+    return result
+
+
+def decompress(data: bytes) -> bytes:
+    """
+    Decompress the given data using zlib.
+
+    Attempts to decompress the input data using zlib.
+    If the decompression fails due to a zlib error, it falls back
+    to using a decompression object with a larger window size.
+
+    Please note that the output length is limited to avoid memory
+    issues. If you need to process larger content streams, consider
+    adapting ``pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH``. In case you
+    are only dealing with trusted inputs and/or want to disable these
+    limits, set the value to `0`.
+
+    Args:
+        data: The input data to be decompressed.
+
+    Returns:
+        The decompressed data.
+
+    """
+    try:
+        return _decompress_with_limit(data)
+    except zlib.error:
+        # First quick approach: There are known issues with faulty added bytes to the
+        # tail of the encoded stream from early Adobe Distiller or Pitstop versions
+        # with CR char as the default line separator (assumed by reverse engineering)
+        # that breaks the decoding process in the end.
+        #
+        # Try first to cut off some of the tail byte by byte, but limited to not
+        # iterate through too many loops and kill the performance for large streams,
+        # to then allow the final fallback to run. Added this intermediate attempt,
+        # because starting from the head of the stream byte by byte kills completely
+        # the performance for large streams (e.g., 6 MB) with the tail-byte-issue
+        # and takes ages. This solution is really fast:
+        max_tail_cut_off_bytes: int = 8
+        for i in range(1, min(max_tail_cut_off_bytes + 1, len(data))):
+            try:
+                return _decompress_with_limit(data[:-i])
+            except zlib.error:
+                pass
+
+        # If still failing, then try with increased window size.
+        decompressor = zlib.decompressobj(zlib.MAX_WBITS | 32)
+        result_str = b""
+        remaining_limit = ZLIB_MAX_OUTPUT_LENGTH
+        data_single_bytes = [data[i : i + 1] for i in range(len(data))]
+        known_errors = set()
+        for index, b in enumerate(data_single_bytes):
+            try:
+                decompressed = decompressor.decompress(b, max_length=remaining_limit)
+                result_str += decompressed
+                remaining_limit -= len(decompressed)
+                if remaining_limit <= 0:
+                    raise LimitReachedError(
+                        f"Limit reached while decompressing. {len(data_single_bytes) - index} bytes remaining."
+                    )
+            except zlib.error as error:
+                error_str = str(error)
+                if error_str in known_errors:
+                    continue
+                logger_warning(error_str, __name__)
+                known_errors.add(error_str)
+        return result_str
+
+
+class FlateDecode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode data which is flate-encoded.
+
+        Args:
+          data: flate-encoded data.
+          decode_parms: a dictionary of values, understanding the
+            "/Predictor":<int> key only
+
+        Returns:
+          The flate-decoded data.
+
+        Raises:
+          PdfReadError:
+
+        """
+        str_data = decompress(data)
+        predictor = 1
+
+        if decode_parms:
+            try:
+                predictor = decode_parms.get("/Predictor", 1)
+            except (AttributeError, TypeError):  # Type Error is NullObject
+                pass  # Usually an array with a null object was read
+        # predictor 1 == no predictor
+        if predictor != 1:
+            # /Columns, the number of samples in each row, has a default value of 1;
+            # §7.4.4.3, ISO 32000.
+            DEFAULT_BITS_PER_COMPONENT = 8
+            try:
+                columns = cast(int, decode_parms[LZW.COLUMNS].get_object())  # type: ignore
+            except (TypeError, KeyError):
+                columns = 1
+            try:
+                colors = cast(int, decode_parms[LZW.COLORS].get_object())  # type: ignore
+            except (TypeError, KeyError):
+                colors = 1
+            try:
+                bits_per_component = cast(
+                    int,
+                    decode_parms[LZW.BITS_PER_COMPONENT].get_object(),  # type: ignore
+                )
+            except (TypeError, KeyError):
+                bits_per_component = DEFAULT_BITS_PER_COMPONENT
+
+            # PNG predictor can vary by row and so is the lead byte on each row
+            rowlength = (
+                math.ceil(columns * colors * bits_per_component / 8) + 1
+            )  # number of bytes
+
+            # TIFF prediction:
+            if predictor == 2:
+                rowlength -= 1  # remove the predictor byte
+                bpp = rowlength // columns
+                str_data = bytearray(str_data)
+                for i in range(len(str_data)):
+                    if i % rowlength >= bpp:
+                        str_data[i] = (str_data[i] + str_data[i - bpp]) % 256
+                str_data = bytes(str_data)
+            # PNG prediction:
+            elif 10 <= predictor <= 15:
+                str_data = FlateDecode._decode_png_prediction(
+                    str_data, columns, rowlength
+                )
+            else:
+                raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}")
+        return str_data
+
+    @staticmethod
+    def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
+        # PNG prediction can vary from row to row
+        if (remainder := len(data) % rowlength) != 0:
+            logger_warning("Image data is not rectangular. Adding padding.", __name__)
+            data += b"\x00" * (rowlength - remainder)
+            assert len(data) % rowlength == 0
+        output = []
+        prev_rowdata = (0,) * rowlength
+        bpp = (rowlength - 1) // columns  # recomputed locally to not change params
+        for row in range(0, len(data), rowlength):
+            rowdata: list[int] = list(data[row : row + rowlength])
+            filter_byte = rowdata[0]
+
+            if filter_byte == 0:
+                # PNG None Predictor
+                pass
+            elif filter_byte == 1:
+                # PNG Sub Predictor
+                for i in range(bpp + 1, rowlength):
+                    rowdata[i] = (rowdata[i] + rowdata[i - bpp]) % 256
+            elif filter_byte == 2:
+                # PNG Up Predictor
+                for i in range(1, rowlength):
+                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+            elif filter_byte == 3:
+                # PNG Average Predictor
+                for i in range(1, bpp + 1):
+                    floor = prev_rowdata[i] // 2
+                    rowdata[i] = (rowdata[i] + floor) % 256
+                for i in range(bpp + 1, rowlength):
+                    left = rowdata[i - bpp]
+                    floor = (left + prev_rowdata[i]) // 2
+                    rowdata[i] = (rowdata[i] + floor) % 256
+            elif filter_byte == 4:
+                # PNG Paeth Predictor
+                for i in range(1, bpp + 1):
+                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
+                for i in range(bpp + 1, rowlength):
+                    left = rowdata[i - bpp]
+                    up = prev_rowdata[i]
+                    up_left = prev_rowdata[i - bpp]
+
+                    p = left + up - up_left
+                    dist_left = abs(p - left)
+                    dist_up = abs(p - up)
+                    dist_up_left = abs(p - up_left)
+
+                    if dist_left <= dist_up and dist_left <= dist_up_left:
+                        paeth = left
+                    elif dist_up <= dist_up_left:
+                        paeth = up
+                    else:
+                        paeth = up_left
+
+                    rowdata[i] = (rowdata[i] + paeth) % 256
+            else:
+                raise PdfReadError(
+                    f"Unsupported PNG filter {filter_byte!r}"
+                )  # pragma: no cover
+            prev_rowdata = tuple(rowdata)
+            output.extend(rowdata[1:])
+        return bytes(output)
+
+    @staticmethod
+    def encode(data: bytes, level: int = -1) -> bytes:
+        """
+        Compress the input data using zlib.
+
+        Args:
+            data: The data to be compressed.
+            level: See https://docs.python.org/3/library/zlib.html#zlib.compress
+
+        Returns:
+            The compressed data.
+
+        """
+        return zlib.compress(data, level)
+
+
+class ASCIIHexDecode:
+    """
+    The ASCIIHexDecode filter decodes data that has been encoded in ASCII
+    hexadecimal form into a base-7 ASCII format.
+    """
+
+    @staticmethod
+    def decode(
+        data: Union[str, bytes],
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode an ASCII-Hex encoded data stream.
+
+        Args:
+          data: a str sequence of hexadecimal-encoded values to be
+            converted into a base-7 ASCII string
+          decode_parms: this filter does not use parameters.
+
+        Returns:
+          A string conversion in base-7 ASCII, where each of its values
+          v is such that 0 <= ord(v) <= 127.
+
+        Raises:
+          PdfStreamError:
+
+        """
+        if isinstance(data, str):
+            data = data.encode()
+        retval = b""
+        hex_pair = b""
+        index = 0
+        while True:
+            if index >= len(data):
+                logger_warning(
+                    "missing EOD in ASCIIHexDecode, check if output is OK", __name__
+                )
+                break  # Reached end of string without an EOD
+            char = data[index : index + 1]
+            if char == b">":
+                break
+            if char.isspace():
+                index += 1
+                continue
+            hex_pair += char
+            if len(hex_pair) == 2:
+                retval += bytes((int(hex_pair, base=16),))
+                hex_pair = b""
+            index += 1
+        # If the filter encounters the EOD marker after reading
+        # an odd number of hexadecimal digits,
+        # it shall behave as if a 0 (zero) followed the last digit.
+        # For every even number of hexadecimal digits, hex_pair is reset to b"".
+        if hex_pair != b"":
+            hex_pair += b"0"
+            retval += bytes((int(hex_pair, base=16),))
+        return retval
+
+
+class RunLengthDecode:
+    """
+    The RunLengthDecode filter decodes data that has been encoded in a
+    simple byte-oriented format based on run length.
+    The encoded data is a sequence of runs, where each run consists of
+    a length byte followed by 1 to 128 bytes of data. If the length byte is
+    in the range 0 to 127,
+    the following length + 1 (1 to 128) bytes are copied literally during
+    decompression.
+    If length is in the range 129 to 255, the following single byte is to be
+    copied 257 − length (2 to 128) times during decompression. A length value
+    of 128 denotes EOD.
+    """
+
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode a run length encoded data stream.
+
+        Args:
+          data: a bytes sequence of length/data
+          decode_parms: this filter does not use parameters.
+
+        Returns:
+          A bytes decompressed sequence.
+
+        Raises:
+          PdfStreamError:
+
+        """
+        lst = []
+        index = 0
+        while True:
+            if index >= len(data):
+                logger_warning(
+                    "missing EOD in RunLengthDecode, check if output is OK", __name__
+                )
+                break  # Reached end of string without an EOD
+            length = data[index]
+            index += 1
+            if length == 128:
+                data_length = len(data)
+                if index < data_length:
+                    # We should first check, if we have an inner stream from a multi-encoded
+                    # stream with a faulty trailing newline that we can decode properly.
+                    # We will just ignore the last byte and raise a warning ...
+                    if (index == data_length - 1) and (data[index : index + 1] == b"\n"):
+                        logger_warning(
+                            "Found trailing newline in stream data, check if output is OK", __name__
+                        )
+                        break
+                    # Raising an exception here breaks all image extraction for this file, which might
+                    # not be desirable. For this reason, indicate that the output is most likely wrong,
+                    # as processing stopped after the first EOD marker. See issue #3517.
+                    logger_warning(
+                        "Early EOD in RunLengthDecode, check if output is OK", __name__
+                    )
+                break
+            if length < 128:
+                length += 1
+                lst.append(data[index : (index + length)])
+                index += length
+            else:  # >128
+                length = 257 - length
+                lst.append(bytes((data[index],)) * length)
+                index += 1
+        return b"".join(lst)
+
+
+class LZWDecode:
+    class Decoder:
+        STOP = 257
+        CLEARDICT = 256
+
+        def __init__(self, data: bytes) -> None:
+            self.data = data
+
+        def decode(self) -> bytes:
+            return _LzwCodec(max_output_length=LZW_MAX_OUTPUT_LENGTH).decode(self.data)
+
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode an LZW encoded data stream.
+
+        Args:
+          data: ``bytes`` or ``str`` text to decode.
+          decode_parms: a dictionary of parameter values.
+
+        Returns:
+          decoded data.
+
+        """
+        # decode_parms is unused here
+        return LZWDecode.Decoder(data).decode()
+
+
+class ASCII85Decode:
+    """Decodes string ASCII85-encoded data into a byte format."""
+
+    @staticmethod
+    def decode(
+        data: Union[str, bytes],
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decode an Ascii85 encoded data stream.
+
+        Args:
+          data: ``bytes`` or ``str`` text to decode.
+          decode_parms: this filter does not use parameters.
+
+        Returns:
+          decoded data.
+
+        """
+        if isinstance(data, str):
+            data = data.encode()
+        data = data.strip(WHITESPACES_AS_BYTES)
+        if len(data) > 2 and data.endswith(b">"):
+            data = data[:-1].rstrip(WHITESPACES_AS_BYTES) + data[-1:]
+        try:
+            return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
+        except ValueError as error:
+            if error.args[0] == "Ascii85 encoded byte sequences must end with b'~>'":
+                logger_warning("Ignoring missing Ascii85 end marker.", __name__)
+                return a85decode(data, adobe=False, ignorechars=WHITESPACES_AS_BYTES)
+            raise
+
+
+class DCTDecode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decompresses data encoded using a DCT (discrete cosine transform)
+        technique based on the JPEG standard (IS0/IEC 10918),
+        reproducing image sample data that approximates the original data.
+
+        Args:
+          data: text to decode.
+          decode_parms: this filter does not use parameters.
+
+        Returns:
+          decoded data.
+
+        """
+        return data
+
+
+class JPXDecode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        """
+        Decompresses data encoded using the wavelet-based JPEG 2000 standard,
+        reproducing the original image data.
+
+        Args:
+          data: text to decode.
+          decode_parms: this filter does not use parameters.
+
+        Returns:
+          decoded data.
+
+        """
+        return data
+
+
+@dataclass
+class CCITTParameters:
+    """§7.4.6, optional parameters for the CCITTFaxDecode filter."""
+
+    K: int = 0
+    columns: int = 1728
+    rows: int = 0
+    EndOfLine: Union[bool, None] = False
+    EncodedByteAlign: Union[bool, None] = False
+    EndOfBlock: Union[bool, None] = True
+    BlackIs1: bool = False
+    DamagedRowsBeforeError: Union[int, None] = 0
+
+    @property
+    def group(self) -> int:
+        if self.K < 0:
+            # Pure two-dimensional encoding (Group 4)
+            CCITTgroup = 4
+        else:
+            # K == 0: Pure one-dimensional encoding (Group 3, 1-D)
+            # K > 0: Mixed one- and two-dimensional encoding (Group 3, 2-D)
+            CCITTgroup = 3
+        return CCITTgroup
+
+
+def __create_old_class_instance(
+    K: int = 0,
+    columns: int = 0,
+    rows: int = 0
+) -> CCITTParameters:
+    deprecation_with_replacement("CCITParameters", "CCITTParameters", "6.0.0")
+    return CCITTParameters(K, columns, rows)
+
+
+# Create an alias for the old class name
+CCITParameters = __create_old_class_instance
+
+
+class CCITTFaxDecode:
+    """
+    §7.4.6, CCITTFaxDecode filter (ISO 32000).
+
+    Either Group 3 or Group 4 CCITT facsimile (fax) encoding.
+    CCITT encoding is bit-oriented, not byte-oriented.
+
+    §7.4.6, optional parameters for the CCITTFaxDecode filter.
+    """
+
+    @staticmethod
+    def _get_parameters(
+        parameters: Union[None, ArrayObject, DictionaryObject, IndirectObject],
+        rows: Union[int, IndirectObject],
+    ) -> CCITTParameters:
+        ccitt_parameters = CCITTParameters(rows=int(rows))
+        if parameters:
+            parameters_unwrapped = cast(
+                Union[ArrayObject, DictionaryObject], parameters.get_object()
+            )
+            if isinstance(parameters_unwrapped, ArrayObject):
+                for decode_parm in parameters_unwrapped:
+                    if CCITT.K in decode_parm:
+                        ccitt_parameters.K = decode_parm[CCITT.K].get_object()
+                    if CCITT.COLUMNS in decode_parm:
+                        ccitt_parameters.columns = decode_parm[CCITT.COLUMNS].get_object()
+                    if CCITT.BLACK_IS_1 in decode_parm:
+                        ccitt_parameters.BlackIs1 = decode_parm[CCITT.BLACK_IS_1].get_object().value
+            else:
+                if CCITT.K in parameters_unwrapped:
+                    ccitt_parameters.K = parameters_unwrapped[CCITT.K].get_object()  # type: ignore
+                if CCITT.COLUMNS in parameters_unwrapped:
+                    ccitt_parameters.columns = parameters_unwrapped[CCITT.COLUMNS].get_object()  # type: ignore
+                if CCITT.BLACK_IS_1 in parameters_unwrapped:
+                    ccitt_parameters.BlackIs1 = parameters_unwrapped[CCITT.BLACK_IS_1].get_object().value  # type: ignore
+        return ccitt_parameters
+
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        height: int = 0,
+        **kwargs: Any,
+    ) -> bytes:
+        params = CCITTFaxDecode._get_parameters(decode_parms, height)
+
+        img_size = len(data)
+        tiff_header_struct = "<2shlh" + "hhll" * 8 + "h"
+        tiff_header = struct.pack(
+            tiff_header_struct,
+            b"II",  # Byte order indication: Little endian
+            42,     # Version number (always 42)
+            8,      # Offset to the first image file directory (IFD)
+            8,      # Number of tags in IFD
+            256,    # ImageWidth, LONG, 1, width
+            4,
+            1,
+            params.columns,
+            257,    # ImageLength, LONG, 1, length
+            4,
+            1,
+            params.rows,
+            258,    # BitsPerSample, SHORT, 1, 1
+            3,
+            1,
+            1,
+            259,    # Compression, SHORT, 1, compression Type
+            3,
+            1,
+            params.group,
+            262,    # Thresholding, SHORT, 1, 0 = BlackIs1
+            3,
+            1,
+            int(params.BlackIs1),
+            273,    # StripOffsets, LONG, 1, length of header
+            4,
+            1,
+              struct.calcsize(
+                tiff_header_struct
+            ),
+            278,    # RowsPerStrip, LONG, 1, length
+            4,
+            1,
+            params.rows,
+            279,    # StripByteCounts, LONG, 1, size of image
+            4,
+            1,
+            img_size,
+            0,      # last IFD
+        )
+
+        return tiff_header + data
+
+
+JBIG2DEC_BINARY = shutil.which("jbig2dec")
+
+
+class JBIG2Decode:
+    @staticmethod
+    def decode(
+        data: bytes,
+        decode_parms: Optional[DictionaryObject] = None,
+        **kwargs: Any,
+    ) -> bytes:
+        if JBIG2DEC_BINARY is None:
+            raise DependencyError("jbig2dec binary is not available.")
+
+        with TemporaryDirectory() as tempdir:
+            directory = Path(tempdir)
+            paths: list[Path] = []
+
+            if decode_parms and "/JBIG2Globals" in decode_parms:
+                jbig2_globals = decode_parms["/JBIG2Globals"]
+                if not is_null_or_none(jbig2_globals) and not is_null_or_none(pointer := jbig2_globals.get_object()):
+                    assert pointer is not None, "mypy"
+                    if isinstance(pointer, StreamObject):
+                        path = directory.joinpath("globals.jbig2")
+                        path.write_bytes(pointer.get_data())
+                        paths.append(path)
+
+            path = directory.joinpath("image.jbig2")
+            path.write_bytes(data)
+            paths.append(path)
+
+            environment = os.environ.copy()
+            environment["LC_ALL"] = "C"
+            result = subprocess.run(  # noqa: S603
+                [
+                    JBIG2DEC_BINARY,
+                    "--embedded",
+                    "--format", "png",
+                    "--output", "-",
+                    "-M", str(JBIG2_MAX_OUTPUT_LENGTH),
+                    *paths
+                ],
+                capture_output=True,
+                env=environment,
+            )
+            if b"unrecognized option '--embedded'" in result.stderr or b"unrecognized option '-M'" in result.stderr:
+                raise DependencyError("jbig2dec>=0.19 is required.")
+            if b"FATAL ERROR failed to allocate image data buffer" in result.stderr:
+                raise LimitReachedError(
+                    f"Memory limit reached while reading JBIG2 data:\n{result.stderr.decode('utf-8')}"
+                )
+            if result.stderr:
+                for line in result.stderr.decode("utf-8").splitlines():
+                    logger_warning(line, __name__)
+            if result.returncode != 0:
+                raise PdfStreamError(f"Unable to decode JBIG2 data. Exit code: {result.returncode}")
+        return result.stdout
+
+    @staticmethod
+    def _is_binary_compatible() -> bool:
+        if not JBIG2DEC_BINARY:  # pragma: no cover
+            return False
+        result = subprocess.run(  # noqa: S603
+            [JBIG2DEC_BINARY, "--version"],
+            capture_output=True,
+            text=True,
+        )
+        version = result.stdout.split(" ", maxsplit=1)[1]
+
+        from ._utils import Version  # noqa: PLC0415
+        return Version(version) >= Version("0.19")
+
+
+def decode_stream_data(stream: Any) -> bytes:
+    """
+    Decode the stream data based on the specified filters.
+
+    This function decodes the stream data using the filters provided in the
+    stream.
+
+    Args:
+        stream: The input stream object containing the data and filters.
+
+    Returns:
+        The decoded stream data.
+
+    Raises:
+        NotImplementedError: If an unsupported filter type is encountered.
+
+    """
+    filters = stream.get(SA.FILTER, ())
+    if isinstance(filters, IndirectObject):
+        filters = cast(ArrayObject, filters.get_object())
+    if not isinstance(filters, ArrayObject):
+        # We have a single filter instance
+        filters = (filters,)
+    decode_parms = stream.get(SA.DECODE_PARMS, ({},) * len(filters))
+    if not isinstance(decode_parms, (list, tuple)):
+        decode_parms = (decode_parms,)
+    data: bytes = stream._data
+    # If there is no data to decode, we should not try to decode it.
+    if not data:
+        return data
+    for filter_name, params in zip(filters, decode_parms):
+        if isinstance(params, NullObject):
+            params = {}
+        if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
+            data = ASCIIHexDecode.decode(data)
+        elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
+            data = ASCII85Decode.decode(data)
+        elif filter_name in (FT.LZW_DECODE, FTA.LZW):
+            data = LZWDecode.decode(data, params)
+        elif filter_name in (FT.FLATE_DECODE, FTA.FL):
+            data = FlateDecode.decode(data, params)
+        elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
+            data = RunLengthDecode.decode(data)
+        elif filter_name == FT.CCITT_FAX_DECODE:
+            height = stream.get(IA.HEIGHT, ())
+            data = CCITTFaxDecode.decode(data, params, height)
+        elif filter_name == FT.DCT_DECODE:
+            data = DCTDecode.decode(data)
+        elif filter_name == FT.JPX_DECODE:
+            data = JPXDecode.decode(data)
+        elif filter_name == FT.JBIG2_DECODE:
+            data = JBIG2Decode.decode(data, params)
+        elif filter_name == "/Crypt":
+            if "/Name" in params or "/Type" in params:
+                raise NotImplementedError(
+                    "/Crypt filter with /Name or /Type not supported yet"
+                )
+        else:
+            raise NotImplementedError(f"Unsupported filter {filter_name}")
+    return data
--- a/venv/lib/python3.12/site-packages/pypdf/generic/init.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/init.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Implementation of generic PDF objects (dictionary, number, string, ...)."""
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+from ..constants import OutlineFontFlag
+from ._base import (
+    BooleanObject,
+    ByteStringObject,
+    FloatObject,
+    IndirectObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    PdfObject,
+    TextStringObject,
+    encode_pdfdocencoding,
+    is_null_or_none,
+)
+from ._data_structures import (
+    ArrayObject,
+    ContentStream,
+    DecodedStreamObject,
+    Destination,
+    DictionaryObject,
+    EncodedStreamObject,
+    Field,
+    StreamObject,
+    TreeObject,
+    read_object,
+)
+from ._files import EmbeddedFile
+from ._fit import Fit
+from ._link import DirectReferenceLink, NamedReferenceLink, ReferenceLink, extract_links
+from ._outline import OutlineItem
+from ._rectangle import RectangleObject
+from ._utils import (
+    create_string_object,
+    decode_pdfdocencoding,
+    hex_to_rgb,
+    read_hex_string_from_stream,
+    read_string_from_stream,
+)
+from ._viewerpref import ViewerPreferences
+
+PAGE_FIT = Fit.fit()
+
+
+__all__ = [
+    "PAGE_FIT",
+    "ArrayObject",
+    "BooleanObject",
+    "ByteStringObject",
+    "ContentStream",
+    "DecodedStreamObject",
+    "Destination",
+    "DictionaryObject",
+    "DirectReferenceLink",
+    "EmbeddedFile",
+    "EncodedStreamObject",
+    "Field",
+    "Fit",
+    "FloatObject",
+    "IndirectObject",
+    "NameObject",
+    "NamedReferenceLink",
+    "NullObject",
+    "NumberObject",
+    "OutlineFontFlag",
+    "OutlineItem",
+    "PdfObject",
+    "RectangleObject",
+    "ReferenceLink",
+    "StreamObject",
+    "TextStringObject",
+    "TreeObject",
+    "ViewerPreferences",
+    # Utility functions
+    "create_string_object",
+    "decode_pdfdocencoding",
+    "encode_pdfdocencoding",
+    "extract_links",
+    "hex_to_rgb",
+    "is_null_or_none",
+    "read_hex_string_from_stream",
+    # Data structures core functions
+    "read_object",
+    "read_string_from_stream",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_appearance_stream.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_appearance_stream.py
@@ -0,0 +1,547 @@
+import re
+from dataclasses import dataclass
+from enum import IntEnum
+from typing import Any, Optional, Union, cast
+
+from .._codecs import fill_from_encoding
+from .._codecs.core_fontmetrics import CORE_FONT_METRICS
+from .._font import Font
+from .._utils import logger_warning
+from ..constants import AnnotationDictionaryAttributes, BorderStyles, FieldDictionaryAttributes
+from ..generic import (
+    DecodedStreamObject,
+    DictionaryObject,
+    NameObject,
+    NumberObject,
+    RectangleObject,
+)
+from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none
+
+DEFAULT_FONT_SIZE_IN_MULTILINE = 12
+
+
+@dataclass
+class BaseStreamConfig:
+    """A container representing the basic layout of an appearance stream."""
+    rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0)
+    border_width: int = 1  # The width of the border in points
+    border_style: str = BorderStyles.SOLID
+
+
+class BaseStreamAppearance(DecodedStreamObject):
+    """A class representing the very base of an appearance stream, that is, a rectangle and a border."""
+
+    def __init__(self, layout: Optional[BaseStreamConfig] = None) -> None:
+        """
+        Takes the appearance stream layout as an argument.
+
+        Args:
+            layout: The basic layout parameters.
+        """
+        super().__init__()
+        self._layout = layout or BaseStreamConfig()
+        self[NameObject("/Type")] = NameObject("/XObject")
+        self[NameObject("/Subtype")] = NameObject("/Form")
+        self[NameObject("/BBox")] = RectangleObject(self._layout.rectangle)
+
+
+class TextAlignment(IntEnum):
+    """Defines the alignment options for text within a form field's appearance stream."""
+
+    LEFT = 0
+    CENTER = 1
+    RIGHT = 2
+
+
+class TextStreamAppearance(BaseStreamAppearance):
+    """
+    A class representing the appearance stream for a text-based form field.
+
+    This class generates the content stream (the `ap_stream_data`) that dictates
+    how text is rendered within a form field's bounding box. It handles properties
+    like font, font size, color, multiline text, and text selection highlighting.
+    """
+
+    def _scale_text(
+        self,
+        font: Font,
+        font_size: float,
+        leading_factor: float,
+        field_width: float,
+        field_height: float,
+        text: str,
+        min_font_size: float,
+        font_size_step: float = 0.2
+    ) -> tuple[list[tuple[float, str]], float]:
+        """
+        Takes a piece of text and scales it to field_width or field_height, given font_name
+        and font_size. Wraps text where necessary.
+
+        Args:
+            font: The font to be used.
+            font_size: The font size in points.
+            leading_factor: The line distance.
+            field_width: The width of the field in which to fit the text.
+            field_height: The height of the field in which to fit the text.
+            text: The text to fit with the field.
+            min_font_size: The minimum font size at which to scale the text.
+            font_size_step: The amount by which to decrement font size per step while scaling.
+
+        Returns:
+            The text in the form of list of tuples, each tuple containing the length of a line
+            and its contents, and the font_size for these lines and lengths.
+        """
+        orig_text = text
+        paragraphs = text.replace("\n", "\r").split("\r")
+        wrapped_lines = []
+        current_line_words: list[str] = []
+        current_line_width: float = 0
+        space_width = font.space_width * font_size / 1000
+        for paragraph in paragraphs:
+            if not paragraph.strip():
+                wrapped_lines.append((0.0, ""))
+                continue
+            words = paragraph.split(" ")
+            for i, word in enumerate(words):
+                word_width = font.text_width(word) * font_size / 1000
+                test_width = current_line_width + word_width + (space_width if i else 0)
+                if test_width > field_width and current_line_words:
+                    wrapped_lines.append((current_line_width, " ".join(current_line_words)))
+                    current_line_words = [word]
+                    current_line_width = word_width
+                elif not current_line_words and word_width > field_width:
+                    wrapped_lines.append((word_width, word))
+                    current_line_words = []
+                    current_line_width = 0
+                else:
+                    if current_line_words:
+                        current_line_width += space_width
+                    current_line_words.append(word)
+                    current_line_width += word_width
+            if current_line_words:
+                wrapped_lines.append((current_line_width, " ".join(current_line_words)))
+                current_line_words = []
+                current_line_width = 0
+        # Estimate total height.
+        estimated_total_height = font_size + (len(wrapped_lines) - 1) * leading_factor * font_size
+        if estimated_total_height > field_height:
+            # Text overflows height; Retry with smaller font size.
+            new_font_size = font_size - font_size_step
+            if new_font_size >= min_font_size:
+                return self._scale_text(
+                    font,
+                    new_font_size,
+                    leading_factor,
+                    field_width,
+                    field_height,
+                    orig_text,
+                    min_font_size,
+                    font_size_step
+                )
+        return wrapped_lines, round(font_size, 1)
+
+    def _generate_appearance_stream_data(
+        self,
+        text: str,
+        selection: Union[list[str], None],
+        font: Font,
+        font_glyph_byte_map: Optional[dict[str, bytes]] = None,
+        font_name: str = "/Helv",
+        font_size: float = 0.0,
+        font_color: str = "0 g",
+        is_multiline: bool = False,
+        alignment: TextAlignment = TextAlignment.LEFT,
+        is_comb: bool = False,
+        max_length: Optional[int] = None
+    ) -> bytes:
+        """
+        Generates the raw bytes of the PDF appearance stream for a text field.
+
+        This private method assembles the PDF content stream operators to draw
+        the provided text within the specified rectangle. It handles text positioning,
+        font application, color, and special formatting like selected text.
+
+        Args:
+            text: The text to be rendered in the form field.
+            selection: An optional list of strings that should be highlighted as selected.
+            font: The font to use.
+            font_glyph_byte_map: An optional dictionary mapping characters to their
+                byte representation for glyph encoding.
+            font_name: The name of the font resource to use (e.g., "/Helv").
+            font_size: The font size. If 0, it is automatically calculated
+                based on whether the field is multiline or not.
+            font_color: The color to apply to the font, represented as a PDF
+                graphics state string (e.g., "0 g" for black).
+            is_multiline: A boolean indicating if the text field is multiline.
+            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
+            is_comb: Boolean that designates fixed-length fields, where every character
+                fills one "cell", such as in a postcode.
+            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
+                length field.
+
+        Returns:
+            A byte string containing the PDF content stream data.
+
+        """
+        rectangle = self._layout.rectangle
+        font_glyph_byte_map = font_glyph_byte_map or {}
+        if isinstance(rectangle, tuple):
+            rectangle = RectangleObject(rectangle)
+        leading_factor = (font.font_descriptor.bbox[3] - font.font_descriptor.bbox[1]) / 1000.0
+
+        # Set margins based on border width and style, but never less than 1 point
+        factor = 2 if self._layout.border_style in {"/B", "/I"} else 1
+        margin = max(self._layout.border_width * factor, 1)
+        field_height = rectangle.height - 2 * margin
+        field_width = rectangle.width - 4 * margin
+
+        # If font_size is 0, apply the logic for multiline or large-as-possible font
+        if font_size == 0:
+            min_font_size = 4.0       # The mininum font size
+            if selection:             # Don't wrap text when dealing with a /Ch field, in order to prevent problems
+                is_multiline = False  # with matching "selection" with "line" later on.
+            if is_multiline:
+                font_size = DEFAULT_FONT_SIZE_IN_MULTILINE
+                lines, font_size = self._scale_text(
+                    font,
+                    font_size,
+                    leading_factor,
+                    field_width,
+                    field_height,
+                    text,
+                    min_font_size
+                )
+            else:
+                max_vertical_size = field_height / leading_factor
+                text_width_unscaled = font.text_width(text) / 1000
+                max_horizontal_size = field_width / (text_width_unscaled or 1)
+                font_size = round(max(min(max_vertical_size, max_horizontal_size), min_font_size), 1)
+                lines = [(text_width_unscaled * font_size, text)]
+        elif is_comb:
+            if max_length and len(text) > max_length:
+                logger_warning (
+                    f"Length of text {text} exceeds maximum length ({max_length}) of field, input truncated.",
+                    __name__
+                )
+            # We act as if each character is one line, because we draw it separately later on
+            lines = [(
+                font.text_width(char) * font_size / 1000,
+                char
+            ) for index, char in enumerate(text) if index < (max_length or len(text))]
+        else:
+            lines = [(
+                font.text_width(line) * font_size / 1000,
+                line
+            ) for line in text.replace("\n", "\r").split("\r")]
+
+        # Set the vertical offset
+        if is_multiline:
+            y_offset = rectangle.height + margin - font.font_descriptor.bbox[3] * font_size / 1000.0
+        else:
+            y_offset = margin + ((field_height - font.font_descriptor.ascent * font_size / 1000) / 2)
+        default_appearance = f"{font_name} {font_size} Tf {font_color}"
+
+        ap_stream = (
+            f"q\n/Tx BMC \nq\n{2 * margin} {margin} {field_width} {field_height} "
+            f"re\nW\nBT\n{default_appearance}\n"
+        ).encode()
+        current_x_pos: float = 0  # Initial virtual position within the text object.
+
+        for line_number, (line_width, line) in enumerate(lines):
+            if selection and line in selection:
+                # Might be improved, but cannot find how to get fill working => replaced with lined box
+                ap_stream += (
+                    f"1 {y_offset - (line_number * font_size * leading_factor) - 1} "
+                    f"{rectangle.width - 2} {font_size + 2} re\n"
+                    f"0.5 0.5 0.5 rg s\n{default_appearance}\n"
+                ).encode()
+
+            # Calculate the desired absolute starting X for the current line
+            desired_abs_x_start: float = 0
+            if is_comb and max_length:
+                # Calculate the width of a cell for one character
+                cell_width = rectangle.width / max_length
+                # Space from the left edge of the cell to the character's baseline start
+                # line_width here is the *actual* character width in points for the single character 'line'
+                centering_offset_in_cell = (cell_width - line_width) / 2
+                # Absolute start X = (Cell Index, i.e., line_number * Cell Width) + Centering Offset
+                desired_abs_x_start = (line_number * cell_width) + centering_offset_in_cell
+            elif alignment == TextAlignment.RIGHT:
+                desired_abs_x_start = rectangle.width - margin * 2 - line_width
+            elif alignment == TextAlignment.CENTER:
+                desired_abs_x_start = (rectangle.width - line_width) / 2
+            else:  # Left aligned; default
+                desired_abs_x_start = margin * 2
+            # Calculate x_rel_offset: how much to move from the current_x_pos
+            # to reach the desired_abs_x_start.
+            x_rel_offset = desired_abs_x_start - current_x_pos
+
+            # Y-offset:
+            y_rel_offset: float = 0
+            if line_number == 0:
+                y_rel_offset = y_offset  # Initial vertical position
+            elif is_comb:
+                y_rel_offset = 0.0  # DO NOT move vertically for subsequent characters
+            else:
+                y_rel_offset = - font_size * leading_factor  # Move down by line height
+
+            # Td is a relative translation (Tx and Ty).
+            # It updates the current text position.
+            ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode()
+            # Update current_x_pos based on the Td operation for the next iteration.
+            # This is the X position where the *current line* will start.
+            current_x_pos = desired_abs_x_start
+
+            encoded_line: list[bytes] = [
+                font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line
+            ]
+            if any(len(c) >= 2 for c in encoded_line):
+                ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"
+            else:
+                ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"
+        ap_stream += b"ET\nQ\nEMC\nQ\n"
+        return ap_stream
+
+    def __init__(
+        self,
+        layout: Optional[BaseStreamConfig] = None,
+        text: str = "",
+        selection: Optional[list[str]] = None,
+        font_resource: Optional[DictionaryObject] = None,
+        font_name: str = "/Helv",
+        font_size: float = 0.0,
+        font_color: str = "0 g",
+        is_multiline: bool = False,
+        alignment: TextAlignment = TextAlignment.LEFT,
+        is_comb: bool = False,
+        max_length: Optional[int] = None
+    ) -> None:
+        """
+        Initializes a TextStreamAppearance object.
+
+        This constructor creates a new PDF stream object configured as an XObject
+        of subtype Form. It uses the `_appearance_stream_data` method to generate
+        the content for the stream.
+
+        Args:
+            layout: The basic layout parameters.
+            text: The text to be rendered in the form field.
+            selection: An optional list of strings that should be highlighted as selected.
+            font_resource: An optional variable that represents a PDF font dictionary.
+            font_name: The name of the font resource, e.g., "/Helv".
+            font_size: The font size. If 0, it's auto-calculated.
+            font_color: The font color string.
+            is_multiline: A boolean indicating if the text field is multiline.
+            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
+            is_comb: Boolean that designates fixed-length fields, where every character
+                fills one "cell", such as in a postcode.
+            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
+                length field.
+
+        """
+        super().__init__(layout)
+
+        # If a font resource was added, get the font character map
+        if font_resource:
+            font_resource = cast(DictionaryObject, font_resource.get_object())
+            font = Font.from_font_resource(font_resource)
+        else:
+            logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__)
+            font_name = "/Helv"
+            font_resource = DictionaryObject({
+                NameObject("/Subtype"): NameObject("/Type1"),
+                NameObject("/Name"): NameObject("/Helv"),
+                NameObject("/Type"): NameObject("/Font"),
+                NameObject("/BaseFont"): NameObject("/Helvetica"),
+                NameObject("/Encoding"): NameObject("/WinAnsiEncoding")
+            })
+            font_descriptor = CORE_FONT_METRICS["Helvetica"]
+            font_descriptor.character_widths["default"] = 2 * font_descriptor.character_widths[" "]
+            font = Font(
+                name="Helvetica",
+                character_map={},
+                encoding=dict(zip(range(256), fill_from_encoding("cp1252"))),  # WinAnsiEncoding
+                sub_type="Type1",
+                font_descriptor = font_descriptor,
+                character_widths = font_descriptor.character_widths
+            )
+
+        font_glyph_byte_map: dict[str, bytes]
+        if isinstance(font.encoding, str):
+            font_glyph_byte_map = {
+                v: k.encode(font.encoding) for k, v in font.character_map.items()
+            }
+        else:
+            font_glyph_byte_map = {v: bytes((k,)) for k, v in font.encoding.items()}
+            font_encoding_rev = {v: bytes((k,)) for k, v in font.encoding.items()}
+            for key, value in font.character_map.items():
+                font_glyph_byte_map[value] = font_encoding_rev.get(key, key)
+
+        ap_stream_data = self._generate_appearance_stream_data(
+            text,
+            selection,
+            font,
+            font_glyph_byte_map,
+            font_name=font_name,
+            font_size=font_size,
+            font_color=font_color,
+            is_multiline=is_multiline,
+            alignment=alignment,
+            is_comb=is_comb,
+            max_length=max_length
+        )
+
+        self.set_data(ByteStringObject(ap_stream_data))
+        self[NameObject("/Length")] = NumberObject(len(ap_stream_data))
+        # Update Resources with font information
+        self[NameObject("/Resources")] = DictionaryObject({
+            NameObject("/Font"): DictionaryObject({
+                NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)
+            })
+        })
+
+    @classmethod
+    def from_text_annotation(
+        cls,
+        acro_form: DictionaryObject,  # _root_object[CatalogDictionary.ACRO_FORM])
+        field: DictionaryObject,
+        annotation: DictionaryObject,
+        user_font_name: str = "",
+        user_font_size: float = -1,
+    ) -> "TextStreamAppearance":
+        """
+        Creates a TextStreamAppearance object from a text field annotation.
+
+        This class method is a factory for creating a `TextStreamAppearance`
+        instance by extracting all necessary information (bounding box, font,
+        text content, etc.) from the PDF field and annotation dictionaries.
+        It respects inheritance for properties like default appearance (`/DA`).
+
+        Args:
+            acro_form: The root AcroForm dictionary from the PDF catalog.
+            field: The field dictionary object.
+            annotation: The widget annotation dictionary object associated with the field.
+            user_font_name: An optional user-provided font name to override the
+                default. Defaults to an empty string.
+            user_font_size: An optional user-provided font size to override the
+                default. A value of -1 indicates no override.
+
+        Returns:
+            A new `TextStreamAppearance` instance configured for the given field.
+
+        """
+        # Calculate rectangle dimensions
+        _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])
+        rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))
+
+        # Get default appearance dictionary from annotation
+        default_appearance = annotation.get_inherited(
+            AnnotationDictionaryAttributes.DA,
+            acro_form.get(AnnotationDictionaryAttributes.DA, None),
+        )
+        if not default_appearance:
+            # Create a default appearance if none was found in the annotation
+            default_appearance = TextStringObject("/Helv 0 Tf 0 g")
+        else:
+            default_appearance = default_appearance.get_object()
+
+        # Derive font name, size and color from the default appearance. Also set
+        # user-provided font name and font size in the default appearance, if given.
+        # For a font name, this presumes that we can find an associated font resource
+        # dictionary. Uses the variable font_properties as an intermediate.
+        # As per the PDF spec:
+        # "At a minimum, the string [that is, default_appearance] shall include a Tf (text
+        # font) operator along with its two operands, font and size" (Section 12.7.4.3
+        # "Variable text" of the PDF 2.0 specification).
+        font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]
+        font_name = font_properties.pop(font_properties.index("Tf") - 2)
+        font_size = float(font_properties.pop(font_properties.index("Tf") - 1))
+        font_properties.remove("Tf")
+        font_color = " ".join(font_properties)
+        # Determine the font name to use, prioritizing the user's input
+        if user_font_name:
+            font_name = user_font_name
+        # Determine the font size to use, prioritizing the user's input
+        if user_font_size > 0:
+            font_size = user_font_size
+
+        # Try to find a resource dictionary for the font
+        document_resources: Any = cast(
+            DictionaryObject,
+            cast(
+                DictionaryObject,
+                annotation.get_inherited(
+                    "/DR",
+                    acro_form.get("/DR", DictionaryObject()),
+                ),
+            ).get_object(),
+        )
+        document_font_resources = document_resources.get("/Font", DictionaryObject()).get_object()
+        # CORE_FONT_METRICS is the dict with Standard font metrics
+        if font_name not in document_font_resources and font_name.removeprefix("/") not in CORE_FONT_METRICS:
+            # ...or AcroForm dictionary
+            document_resources = cast(
+                dict[Any, Any],
+                acro_form.get("/DR", {}),
+            )
+            document_font_resources = document_resources.get_object().get("/Font", DictionaryObject()).get_object()
+        font_resource = document_font_resources.get(font_name, None)
+        if not is_null_or_none(font_resource):
+            font_resource = cast(DictionaryObject, font_resource.get_object())
+
+        # Retrieve field text and selected values
+        field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
+        if (
+                field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
+                field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
+        ):
+            text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))
+            selection = field.get("/V", [])
+            if not isinstance(selection, list):
+                selection = [selection]
+        else:  # /Tx
+            text = field.get("/V", "")
+            selection = []
+
+        # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
+        text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
+
+        # Retrieve formatting information
+        is_comb = False
+        max_length = None
+        if field_flags & FieldDictionaryAttributes.FfBits.Comb:
+            is_comb = True
+            max_length = annotation.get("/MaxLen")
+        is_multiline = False
+        if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
+            is_multiline = True
+        alignment = field.get("/Q", TextAlignment.LEFT)
+        border_width = 1
+        border_style = BorderStyles.SOLID
+        if "/BS" in field:
+            border_width = cast(DictionaryObject, field["/BS"]).get("/W", border_width)
+            border_style = cast(DictionaryObject, field["/BS"]).get("/S", border_style)
+
+        # Create the TextStreamAppearance instance
+        layout = BaseStreamConfig(rectangle=rectangle, border_width=border_width, border_style=border_style)
+        new_appearance_stream = cls(
+            layout,
+            text,
+            selection,
+            font_resource,
+            font_name=font_name,
+            font_size=font_size,
+            font_color=font_color,
+            is_multiline=is_multiline,
+            alignment=alignment,
+            is_comb=is_comb,
+            max_length=max_length
+        )
+        if AnnotationDictionaryAttributes.AP in annotation:
+            for key, value in (
+                cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()
+            ):
+                if key not in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
+                    new_appearance_stream[key] = value
+
+        return new_appearance_stream
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_base.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_base.py
@@ -0,0 +1,937 @@
+# Copyright (c) 2006, Mathieu Fenniak
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+import binascii
+import codecs
+import hashlib
+import re
+import sys
+from binascii import unhexlify
+from collections.abc import Sequence
+from math import log10
+from struct import iter_unpack
+from typing import Any, Callable, ClassVar, Optional, Union, cast
+
+if sys.version_info[:2] >= (3, 10):
+    from typing import TypeGuard
+else:
+    from typing_extensions import TypeGuard  # PEP 647
+
+from .._codecs import _pdfdoc_encoding_rev
+from .._protocols import PdfObjectProtocol, PdfWriterProtocol
+from .._utils import (
+    StreamType,
+    classproperty,
+    deprecation_no_replacement,
+    deprecation_with_replacement,
+    logger_warning,
+    read_non_whitespace,
+    read_until_regex,
+)
+from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError
+
+__author__ = "Mathieu Fenniak"
+__author_email__ = "biziqe@mathieu.fenniak.net"
+
+
+class PdfObject(PdfObjectProtocol):
+    # function for calculating a hash value
+    hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
+    indirect_reference: Optional["IndirectObject"]
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement .hash_bin() so far"
+        )
+
+    def hash_value_data(self) -> bytes:
+        return f"{self}".encode()
+
+    def hash_value(self) -> bytes:
+        return (
+            f"{self.__class__.__name__}:"
+            f"{self.hash_func(self.hash_value_data()).hexdigest()}"
+        ).encode()
+
+    def replicate(
+        self,
+        pdf_dest: PdfWriterProtocol,
+    ) -> "PdfObject":
+        """
+        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
+        without ensuring links. This is used in clone_document_from_root with incremental = True.
+
+        Args:
+          pdf_dest: Target to clone to.
+
+        Returns:
+          The cloned PdfObject
+
+        """
+        return self.clone(pdf_dest)
+
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "PdfObject":
+        """
+        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).
+
+        By default, this method will call ``_reference_clone`` (see ``_reference``).
+
+
+        Args:
+          pdf_dest: Target to clone to.
+          force_duplicate: By default, if the object has already been cloned and referenced,
+            the copy will be returned; when ``True``, a new copy will be created.
+            (Default value = ``False``)
+          ignore_fields: List/tuple of field names (for dictionaries) that will be ignored
+            during cloning (applies to children duplication as well). If fields are to be
+            considered for a limited number of levels, you have to add it as integer, for
+            example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first
+            level only but ``"/TOTO"`` on all levels.
+
+        Returns:
+          The cloned PdfObject
+
+        """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement .clone so far"
+        )
+
+    def _reference_clone(
+        self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False
+    ) -> PdfObjectProtocol:
+        """
+        Reference the object within the _objects of pdf_dest only if
+        indirect_reference attribute exists (which means the objects was
+        already identified in xref/xobjstm) if object has been already
+        referenced do nothing.
+
+        Args:
+          clone:
+          pdf_dest:
+
+        Returns:
+          The clone
+
+        """
+        try:
+            if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:
+                return clone
+        except Exception:
+            pass
+        # if hasattr(clone, "indirect_reference"):
+        try:
+            ind = self.indirect_reference
+        except AttributeError:
+            return clone
+        if (
+            pdf_dest.incremental
+            and ind is not None
+            and ind.pdf == pdf_dest._reader
+            and ind.idnum <= len(pdf_dest._objects)
+        ):
+            i = ind.idnum
+        else:
+            i = len(pdf_dest._objects) + 1
+        if ind is not None:
+            if id(ind.pdf) not in pdf_dest._id_translated:
+                pdf_dest._id_translated[id(ind.pdf)] = {}
+                pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf  # type: ignore[index]
+            if (
+                not force_duplicate
+                and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]
+            ):
+                obj = pdf_dest.get_object(
+                    pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
+                )
+                assert obj is not None
+                return obj
+            pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
+        try:
+            pdf_dest._objects[i - 1] = clone
+        except IndexError:
+            pdf_dest._objects.append(clone)
+            i = len(pdf_dest._objects)
+        clone.indirect_reference = IndirectObject(i, 0, pdf_dest)
+        return clone
+
+    def get_object(self) -> Optional["PdfObject"]:
+        """Resolve indirect references."""
+        return self
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        raise NotImplementedError
+
+
+class NullObject(PdfObject):
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "NullObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__,))
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(b"null")
+
+    @staticmethod
+    def read_from_stream(stream: StreamType) -> "NullObject":
+        nulltxt = stream.read(4)
+        if nulltxt != b"null":
+            raise PdfReadError("Could not read Null object")
+        return NullObject()
+
+    def __repr__(self) -> str:
+        return "NullObject"
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, NullObject)
+
+    def __hash__(self) -> int:
+        return self.hash_bin()
+
+
+class BooleanObject(PdfObject):
+    def __init__(self, value: Any) -> None:
+        self.value = value
+
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "BooleanObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "BooleanObject",
+            self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.value))
+
+    def __eq__(self, o: object, /) -> bool:
+        if isinstance(o, BooleanObject):
+            return self.value == o.value
+        if isinstance(o, bool):
+            return self.value == o
+        return False
+
+    def __hash__(self) -> int:
+        return self.hash_bin()
+
+    def __repr__(self) -> str:
+        return "True" if self.value else "False"
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        if self.value:
+            stream.write(b"true")
+        else:
+            stream.write(b"false")
+
+    @staticmethod
+    def read_from_stream(stream: StreamType) -> "BooleanObject":
+        word = stream.read(4)
+        if word == b"true":
+            return BooleanObject(True)
+        if word == b"fals":
+            stream.read(1)
+            return BooleanObject(False)
+        raise PdfReadError("Could not read Boolean object")
+
+
+class IndirectObject(PdfObject):
+    def __init__(self, idnum: int, generation: int, pdf: Any) -> None:  # PdfReader
+        self.idnum = idnum
+        self.generation = generation
+        self.pdf = pdf
+
+    def __hash__(self) -> int:
+        return hash((self.idnum, self.generation, id(self.pdf)))
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))
+
+    def replicate(
+        self,
+        pdf_dest: PdfWriterProtocol,
+    ) -> "PdfObject":
+        return IndirectObject(self.idnum, self.generation, pdf_dest)
+
+    def clone(
+        self,
+        pdf_dest: PdfWriterProtocol,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "IndirectObject":
+        """Clone object into pdf_dest."""
+        if self.pdf == pdf_dest and not force_duplicate:
+            # Already duplicated and no extra duplication required
+            return self
+        if id(self.pdf) not in pdf_dest._id_translated:
+            pdf_dest._id_translated[id(self.pdf)] = {}
+            pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf  # type: ignore[index]
+
+        if self.idnum in pdf_dest._id_translated[id(self.pdf)]:
+            dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
+            if force_duplicate:
+                assert dup is not None
+                assert dup.indirect_reference is not None
+                idref = dup.indirect_reference
+                return IndirectObject(idref.idnum, idref.generation, idref.pdf)
+        else:
+            obj = self.get_object()
+            # case observed : a pointed object can not be found
+            if obj is None:
+                # this normally
+                obj = NullObject()
+                assert isinstance(self, (IndirectObject,))
+                obj.indirect_reference = self
+            dup = pdf_dest._add_object(
+                obj.clone(pdf_dest, force_duplicate, ignore_fields)
+            )
+        assert dup is not None, "mypy"
+        assert dup.indirect_reference is not None, "mypy"
+        return dup.indirect_reference
+
+    @property
+    def indirect_reference(self) -> "IndirectObject":  # type: ignore[override]
+        return self
+
+    def get_object(self) -> Optional["PdfObject"]:
+        return self.pdf.get_object(self)
+
+    def __deepcopy__(self, memo: Any) -> "IndirectObject":
+        return IndirectObject(self.idnum, self.generation, self.pdf)
+
+    def _get_object_with_check(self) -> Optional["PdfObject"]:
+        o = self.get_object()
+        # the check is done here to not slow down get_object()
+        if isinstance(o, IndirectObject):
+            raise PdfStreamError(
+                f"{self.__repr__()} references an IndirectObject {o.__repr__()}"
+            )
+        return o
+
+    def __getattr__(self, name: str) -> Any:
+        # Attribute not found in object: look in pointed object
+        try:
+            return getattr(self._get_object_with_check(), name)
+        except AttributeError:
+            raise AttributeError(
+                f"No attribute {name} found in IndirectObject or pointed object"
+            )
+
+    def __getitem__(self, key: Any) -> Any:
+        # items should be extracted from pointed Object
+        return self._get_object_with_check()[key]  # type: ignore
+
+    def __contains__(self, key: Any) -> bool:
+        return key in self._get_object_with_check()  # type: ignore
+
+    def __iter__(self) -> Any:
+        return self._get_object_with_check().__iter__()  # type: ignore
+
+    def __float__(self) -> str:
+        # in this case we are looking for the pointed data
+        return self.get_object().__float__()  # type: ignore
+
+    def __int__(self) -> int:
+        # in this case we are looking for the pointed data
+        return self.get_object().__int__()  # type: ignore
+
+    def __str__(self) -> str:
+        # in this case we are looking for the pointed data
+        return self.get_object().__str__()
+
+    def __repr__(self) -> str:
+        return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"
+
+    def __eq__(self, other: object) -> bool:
+        return (
+            other is not None
+            and isinstance(other, IndirectObject)
+            and self.idnum == other.idnum
+            and self.generation == other.generation
+            and self.pdf is other.pdf
+        )
+
+    def __ne__(self, other: object) -> bool:
+        return not self.__eq__(other)
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(f"{self.idnum} {self.generation} R".encode())
+
+    @staticmethod
+    def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject":  # PdfReader
+        idnum = b""
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+            if tok.isspace():
+                break
+            idnum += tok
+        generation = b""
+        while True:
+            tok = stream.read(1)
+            if not tok:
+                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+            if tok.isspace():
+                if not generation:
+                    continue
+                break
+            generation += tok
+        r = read_non_whitespace(stream)
+        if r != b"R":
+            raise PdfReadError(
+                f"Error reading indirect object reference at byte {hex(stream.tell())}"
+            )
+        return IndirectObject(int(idnum), int(generation), pdf)
+
+
+FLOAT_WRITE_PRECISION = 8  # shall be min 5 digits max, allow user adj
+
+
+class FloatObject(float, PdfObject):
+    def __new__(
+        cls, value: Any = "0.0", context: Optional[Any] = None
+    ) -> "FloatObject":
+        try:
+            value = float(value)
+            return float.__new__(cls, value)
+        except Exception as e:
+            # If this isn't a valid decimal (happens in malformed PDFs)
+            # fallback to 0
+            logger_warning(
+                f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__
+            )
+            return float.__new__(cls, 0.0)
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "FloatObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "FloatObject",
+            self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.as_numeric))
+
+    def myrepr(self) -> str:
+        if self == 0:
+            return "0.0"
+        nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))
+        return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")
+
+    def __repr__(self) -> str:
+        return self.myrepr()  # repr(float(self))
+
+    def as_numeric(self) -> float:
+        return float(self)
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(self.myrepr().encode("utf8"))
+
+
+class NumberObject(int, PdfObject):
+    NumberPattern = re.compile(b"[^+-.0-9]")
+
+    def __new__(cls, value: Any) -> "NumberObject":
+        try:
+            return int.__new__(cls, int(value))
+        except ValueError:
+            logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)
+            return int.__new__(cls, 0)
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "NumberObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "NumberObject",
+            self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.as_numeric()))
+
+    def as_numeric(self) -> int:
+        return int(repr(self).encode("utf8"))
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(repr(self).encode("utf8"))
+
+    @staticmethod
+    def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
+        num = read_until_regex(stream, NumberObject.NumberPattern)
+        if b"." in num:
+            return FloatObject(num)
+        return NumberObject(num)
+
+
+class ByteStringObject(bytes, PdfObject):
+    """
+    Represents a string object where the text encoding could not be determined.
+
+    This occurs quite often, as the PDF spec doesn't provide an alternate way to
+    represent strings -- for example, the encryption data stored in files (like
+    /O) is clearly not text, but is still stored in a "String" object.
+    """
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "ByteStringObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "ByteStringObject",
+            self._reference_clone(
+                ByteStringObject(bytes(self)), pdf_dest, force_duplicate
+            ),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, bytes(self)))
+
+    @property
+    def original_bytes(self) -> bytes:
+        """For compatibility with TextStringObject.original_bytes."""
+        return self
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(b"<")
+        stream.write(binascii.hexlify(self))
+        stream.write(b">")
+
+    def __str__(self) -> str:
+        charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
+        for enc in charset_to_try:
+            try:
+                return self.decode(enc)
+            except UnicodeDecodeError:
+                pass
+        raise PdfReadError("Cannot decode ByteStringObject.")
+
+
+class TextStringObject(str, PdfObject):  # noqa: SLOT000
+    """
+    A string object that has been decoded into a real unicode string.
+
+    If read from a PDF document, this string appeared to match the
+    PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding
+    to occur.
+    """
+
+    autodetect_pdfdocencoding: bool
+    autodetect_utf16: bool
+    utf16_bom: bytes
+    _original_bytes: Optional[bytes] = None
+
+    def __new__(cls, value: Any) -> "TextStringObject":
+        original_bytes = None
+        if isinstance(value, bytes):
+            original_bytes = value
+            value = value.decode("charmap")
+        text_string_object = str.__new__(cls, value)
+        text_string_object._original_bytes = original_bytes
+        text_string_object.autodetect_utf16 = False
+        text_string_object.autodetect_pdfdocencoding = False
+        text_string_object.utf16_bom = b""
+        if original_bytes is not None and original_bytes[:2] in {codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE}:
+            # The value of `original_bytes` is only set for inputs being `bytes`.
+            # If this is UTF-16 data according to the BOM (first two characters),
+            # perform special handling. All other cases should not need any special conversion
+            # due to already being a string.
+            try:
+                text_string_object = str.__new__(cls, original_bytes.decode("utf-16"))
+            except UnicodeDecodeError as exception:
+                logger_warning(
+                    f"{exception!s}\ninitial string:{exception.object!r}",
+                    __name__,
+                )
+                text_string_object = str.__new__(cls, exception.object[: exception.start].decode("utf-16"))
+            text_string_object._original_bytes = original_bytes
+            text_string_object.autodetect_utf16 = True
+            text_string_object.utf16_bom = original_bytes[:2]
+        else:
+            try:
+                encode_pdfdocencoding(text_string_object)
+                text_string_object.autodetect_pdfdocencoding = True
+            except UnicodeEncodeError:
+                text_string_object.autodetect_utf16 = True
+                text_string_object.utf16_bom = codecs.BOM_UTF16_BE
+        return text_string_object
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "TextStringObject":
+        """Clone object into pdf_dest."""
+        obj = TextStringObject(self)
+        obj._original_bytes = self._original_bytes
+        obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding
+        obj.autodetect_utf16 = self.autodetect_utf16
+        obj.utf16_bom = self.utf16_bom
+        return cast(
+            "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self.original_bytes))
+
+    @property
+    def original_bytes(self) -> bytes:
+        """
+        It is occasionally possible that a text string object gets created where
+        a byte string object was expected due to the autodetection mechanism --
+        if that occurs, this "original_bytes" property can be used to
+        back-calculate what the original encoded bytes were.
+        """
+        if self._original_bytes is not None:
+            return self._original_bytes
+        return self.get_original_bytes()
+
+    def get_original_bytes(self) -> bytes:
+        # We're a text string object, but the library is trying to get our raw
+        # bytes. This can happen if we auto-detected this string as text, but
+        # we were wrong. It's pretty common. Return the original bytes that
+        # would have been used to create this object, based upon the autodetect
+        # method.
+        if self.autodetect_utf16:
+            if self.utf16_bom == codecs.BOM_UTF16_LE:
+                return codecs.BOM_UTF16_LE + self.encode("utf-16le")
+            if self.utf16_bom == codecs.BOM_UTF16_BE:
+                return codecs.BOM_UTF16_BE + self.encode("utf-16be")
+            return self.encode("utf-16be")
+        if self.autodetect_pdfdocencoding:
+            return encode_pdfdocencoding(self)
+        raise Exception("no information about original bytes")  # pragma: no cover
+
+    def get_encoded_bytes(self) -> bytes:
+        # Try to write the string out as a PDFDocEncoding encoded string. It's
+        # nicer to look at in the PDF file. Sadly, we take a performance hit
+        # here for trying...
+        try:
+            if self._original_bytes is not None:
+                return self._original_bytes
+            if self.autodetect_utf16:
+                raise UnicodeEncodeError("", "forced", -1, -1, "")
+            bytearr = encode_pdfdocencoding(self)
+        except UnicodeEncodeError:
+            if self.utf16_bom == codecs.BOM_UTF16_LE:
+                bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")
+            elif self.utf16_bom == codecs.BOM_UTF16_BE:
+                bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
+            else:
+                bytearr = self.encode("utf-16be")
+        return bytearr
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        bytearr = self.get_encoded_bytes()
+        stream.write(b"(")
+        for c_ in iter_unpack("c", bytearr):
+            c = cast(bytes, c_[0])
+            if not c.isalnum() and c != b" ":
+                # This:
+                #   stream.write(rf"\{c:0>3o}".encode())
+                # gives
+                #   https://github.com/davidhalter/parso/issues/207
+                stream.write(b"\\%03o" % ord(c))
+            else:
+                stream.write(c)
+        stream.write(b")")
+
+
+class NameObject(str, PdfObject):  # noqa: SLOT000
+    delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
+    prefix = b"/"
+    renumber_table: ClassVar[dict[str, bytes]] = {
+        **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},
+        **{chr(i): f"#{i:02X}".encode() for i in range(33)},
+    }
+
+    def clone(
+        self,
+        pdf_dest: Any,
+        force_duplicate: bool = False,
+        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
+    ) -> "NameObject":
+        """Clone object into pdf_dest."""
+        return cast(
+            "NameObject",
+            self._reference_clone(NameObject(self), pdf_dest, force_duplicate),
+        )
+
+    def hash_bin(self) -> int:
+        """
+        Used to detect modified object.
+
+        Returns:
+            Hash considering type and value.
+
+        """
+        return hash((self.__class__, self))
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(self.renumber())
+
+    def renumber(self) -> bytes:
+        out = self[0].encode("utf-8")
+        if out != b"/":
+            deprecation_no_replacement(
+                f"Incorrect first char in NameObject, should start with '/': ({self})",
+                "5.0.0",
+            )
+        for c in self[1:]:
+            if c > "~":
+                for x in c.encode("utf-8"):
+                    out += f"#{x:02X}".encode()
+            else:
+                try:
+                    out += self.renumber_table[c]
+                except KeyError:
+                    out += c.encode("utf-8")
+        return out
+
+    def _sanitize(self) -> "NameObject":
+        """
+        Sanitize the NameObject's name to be a valid PDF name part
+        (alphanumeric, underscore, hyphen). The _sanitize method replaces
+        spaces and any non-alphanumeric/non-underscore/non-hyphen with
+        underscores.
+
+        Returns:
+            NameObject with sanitized name.
+        """
+        name = str(self).removeprefix("/")
+        name = re.sub(r"\ ", "_", name)
+        name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)
+        return NameObject("/" + name)
+
+    @classproperty
+    def surfix(cls) -> bytes:  # noqa: N805
+        deprecation_with_replacement("surfix", "prefix", "5.0.0")
+        return b"/"
+
+    @staticmethod
+    def unnumber(sin: bytes) -> bytes:
+        i = sin.find(b"#", 0)
+        while i >= 0:
+            try:
+                sin = sin[:i] + unhexlify(sin[i + 1 : i + 3]) + sin[i + 3 :]
+                i = sin.find(b"#", i + 1)
+            except ValueError:
+                # if the 2 characters after # can not be converted to hex
+                # we change nothing and carry on
+                i = i + 1
+        return sin
+
+    CHARSETS = ("utf-8", "gbk", "latin1")
+
+    @staticmethod
+    def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject":  # PdfReader
+        name = stream.read(1)
+        if name != NameObject.prefix:
+            raise PdfReadError("Name read error")
+        name += read_until_regex(stream, NameObject.delimiter_pattern)
+        try:
+            # Name objects should represent irregular characters
+            # with a '#' followed by the symbol's hex number
+            name = NameObject.unnumber(name)
+            for enc in NameObject.CHARSETS:
+                try:
+                    ret = name.decode(enc)
+                    return NameObject(ret)
+                except Exception:
+                    pass
+            raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")
+        except (UnicodeEncodeError, UnicodeDecodeError) as e:
+            if not pdf.strict:
+                logger_warning(
+                    f"Illegal character in NameObject ({name!r}), "
+                    "you may need to adjust NameObject.CHARSETS",
+                    __name__,
+                )
+                return NameObject(name.decode("charmap"))
+            raise PdfReadError(
+                f"Illegal character in NameObject ({name!r}). "
+                "You may need to adjust NameObject.CHARSETS.",
+            ) from e
+
+
+def encode_pdfdocencoding(unicode_string: str) -> bytes:
+    try:
+        return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])
+    except KeyError:
+        raise UnicodeEncodeError(
+            "pdfdocencoding",
+            unicode_string,
+            -1,
+            -1,
+            "does not exist in translation table",
+        )
+
+
+def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:
+    """
+    Returns:
+        True if x is None or NullObject.
+
+    """
+    return x is None or (
+        isinstance(x, PdfObject)
+        and (x.get_object() is None or isinstance(x.get_object(), NullObject))
+    )
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_data_structures.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_data_structures.py
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_files.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_files.py
@@ -0,0 +1,401 @@
+from __future__ import annotations
+
+import bisect
+from functools import cached_property
+from typing import TYPE_CHECKING, cast
+
+from pypdf._utils import format_iso8824_date, parse_iso8824_date
+from pypdf.constants import CatalogAttributes as CA
+from pypdf.constants import FileSpecificationDictionaryEntries
+from pypdf.constants import PageAttributes as PG
+from pypdf.errors import PdfReadError, PyPdfError
+from pypdf.generic import (
+    ArrayObject,
+    ByteStringObject,
+    DecodedStreamObject,
+    DictionaryObject,
+    NameObject,
+    NullObject,
+    NumberObject,
+    StreamObject,
+    TextStringObject,
+    is_null_or_none,
+)
+
+if TYPE_CHECKING:
+    import datetime
+    from collections.abc import Generator
+
+    from pypdf._writer import PdfWriter
+
+
+class EmbeddedFile:
+    """
+    Container holding the information on an embedded file.
+
+    Attributes are evaluated lazily if possible.
+
+    Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
+    """
+    def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
+        """
+        Args:
+            name: The (primary) name as provided in the name tree.
+            pdf_object: The corresponding PDF object to allow retrieving further data.
+            parent: The parent list.
+        """
+        self._name = name
+        self.pdf_object = pdf_object
+        self._parent = parent
+
+    @property
+    def name(self) -> str:
+        """The (primary) name of the embedded file as provided in the name tree."""
+        return self._name
+
+    @classmethod
+    def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> EmbeddedFile:
+        """
+        Create a new embedded file and add it to the PdfWriter.
+
+        Args:
+            writer: The PdfWriter instance to add the embedded file to.
+            name: The filename to display.
+            content: The data in the file.
+
+        Returns:
+            EmbeddedFile instance for the newly created embedded file.
+        """
+        # Convert string content to bytes if needed
+        if isinstance(content, str):
+            content = content.encode("latin-1")
+
+        # Create the file entry (the actual embedded file stream)
+        file_entry = DecodedStreamObject()
+        file_entry.set_data(content)
+        file_entry.update({NameObject(PG.TYPE): NameObject("/EmbeddedFile")})
+
+        # Create the /EF entry
+        ef_entry = DictionaryObject()
+        ef_entry.update({NameObject("/F"): writer._add_object(file_entry)})
+
+        # Create the filespec dictionary
+        from pypdf.generic import create_string_object  # noqa: PLC0415
+        filespec = DictionaryObject()
+        filespec_reference = writer._add_object(filespec)
+        name_object = cast(TextStringObject, create_string_object(name))
+        filespec.update(
+            {
+                NameObject(PG.TYPE): NameObject("/Filespec"),
+                NameObject(FileSpecificationDictionaryEntries.F): name_object,
+                NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
+            }
+        )
+
+        # Add the name and filespec to the names array.
+        # We use the inverse order for insertion, as this allows us to re-use the
+        # same index.
+        names_array = cls._get_names_array(writer)
+        insertion_index = cls._get_insertion_index(names_array, name_object)
+        names_array.insert(insertion_index, filespec_reference)
+        names_array.insert(insertion_index, name_object)
+
+        # Return an EmbeddedFile instance
+        return cls(name=name, pdf_object=filespec, parent=names_array)
+
+    @classmethod
+    def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
+        """Get the names array for embedded files, possibly creating and flattening it."""
+        if CA.NAMES not in writer.root_object:
+            # Add the /Names entry to the catalog.
+            writer.root_object[NameObject(CA.NAMES)] = writer._add_object(DictionaryObject())
+
+        names_dict = cast(DictionaryObject, writer.root_object[CA.NAMES])
+        if "/EmbeddedFiles" not in names_dict:
+            # We do not yet have an entry for embedded files. Create and return it.
+            names = ArrayObject()
+            embedded_files_names_dictionary = DictionaryObject(
+                {NameObject(CA.NAMES): names}
+            )
+            names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
+            return names
+
+        # We have an existing embedded files entry.
+        embedded_files_names_tree = cast(DictionaryObject, names_dict["/EmbeddedFiles"])
+        if "/Names" in embedded_files_names_tree:
+            # Simple case: We already have a flat list.
+            return cast(ArrayObject, embedded_files_names_tree[NameObject(CA.NAMES)])
+        if "/Kids" not in embedded_files_names_tree:
+            # Invalid case: This is no name tree.
+            raise PdfReadError("Got neither Names nor Kids in embedded files tree.")
+
+        # Complex case: Convert a /Kids-based name tree to a /Names-based one.
+        # /Name-based ones are much easier to handle and allow us to simplify the
+        # actual insertion logic by only having to consider one case.
+        names = ArrayObject()
+        kids = cast(ArrayObject, embedded_files_names_tree["/Kids"].get_object())
+        embedded_files_names_dictionary = DictionaryObject(
+            {NameObject(CA.NAMES): names}
+        )
+        names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
+        for kid in kids:
+            # Write the flattened file entries. As we do not change the actual files,
+            # this should not have any impact on references to them.
+            # There might be further (nested) kids here.
+            # Wait for an example before evaluating an implementation.
+            for name in kid.get_object().get("/Names", []):
+                names.append(name)
+        return names
+
+    @classmethod
+    def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
+        keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
+        name_bytes = name.encode("utf-8")
+
+        start = bisect.bisect_left(keys, name_bytes)
+        end = bisect.bisect_right(keys, name_bytes)
+
+        if start != end:
+            return end * 2
+        if start == 0:
+            return 0
+        if start == (key_count := len(keys)):
+            return key_count * 2
+        return end * 2
+
+    @property
+    def alternative_name(self) -> str | None:
+        """Retrieve the alternative name (file specification)."""
+        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
+            # PDF 2.0 reference, table 43:
+            #   > A PDF reader shall use the value of the UF key, when present, instead of the F key.
+            if key in self.pdf_object:
+                value = self.pdf_object[key].get_object()
+                if not is_null_or_none(value):
+                    return cast(str, value)
+        return None
+
+    @alternative_name.setter
+    def alternative_name(self, value: TextStringObject | None) -> None:
+        """Set the alternative name (file specification)."""
+        if value is None:
+            if FileSpecificationDictionaryEntries.UF in self.pdf_object:
+                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = NullObject()
+            if FileSpecificationDictionaryEntries.F in self.pdf_object:
+                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = NullObject()
+        else:
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = value
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = value
+
+    @property
+    def description(self) -> str | None:
+        """Retrieve the description."""
+        value = self.pdf_object.get(FileSpecificationDictionaryEntries.DESC)
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @description.setter
+    def description(self, value: TextStringObject | None) -> None:
+        """Set the description."""
+        if value is None:
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = NullObject()
+        else:
+            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = value
+
+    @property
+    def associated_file_relationship(self) -> str:
+        """Retrieve the relationship of the referring document to this embedded file."""
+        return self.pdf_object.get("/AFRelationship", "/Unspecified")
+
+    @associated_file_relationship.setter
+    def associated_file_relationship(self, value: NameObject) -> None:
+        """Set the relationship of the referring document to this embedded file."""
+        self.pdf_object[NameObject("/AFRelationship")] = value
+
+    @property
+    def _embedded_file(self) -> StreamObject:
+        """Retrieve the actual embedded file stream."""
+        if "/EF" not in self.pdf_object:
+            raise PdfReadError(f"/EF entry not found: {self.pdf_object}")
+        ef = cast(DictionaryObject, self.pdf_object["/EF"])
+        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
+            if key in ef:
+                return cast(StreamObject, ef[key].get_object())
+        raise PdfReadError(f"No /(U)F key found in file dictionary: {ef}")
+
+    @property
+    def _params(self) -> DictionaryObject:
+        """Retrieve the file-specific parameters."""
+        return self._embedded_file.get("/Params", DictionaryObject()).get_object()
+
+    @cached_property
+    def _ensure_params(self) -> DictionaryObject:
+        """Ensure the /Params dictionary exists and return it."""
+        embedded_file = self._embedded_file
+        if "/Params" not in embedded_file:
+            embedded_file[NameObject("/Params")] = DictionaryObject()
+        return cast(DictionaryObject, embedded_file["/Params"])
+
+    @property
+    def subtype(self) -> str | None:
+        """Retrieve the subtype. This is a MIME media type, prefixed by a slash."""
+        value = self._embedded_file.get("/Subtype")
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @subtype.setter
+    def subtype(self, value: NameObject | None) -> None:
+        """Set the subtype. This should be a MIME media type, prefixed by a slash."""
+        embedded_file = self._embedded_file
+        if value is None:
+            embedded_file[NameObject("/Subtype")] = NullObject()
+        else:
+            embedded_file[NameObject("/Subtype")] = value
+
+    @property
+    def content(self) -> bytes:
+        """Retrieve the actual file content."""
+        return self._embedded_file.get_data()
+
+    @content.setter
+    def content(self, value: str | bytes) -> None:
+        """Set the file content."""
+        if isinstance(value, str):
+            value = value.encode("latin-1")
+        self._embedded_file.set_data(value)
+
+    @property
+    def size(self) -> int | None:
+        """Retrieve the size of the uncompressed file in bytes."""
+        value = self._params.get("/Size")
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @size.setter
+    def size(self, value: NumberObject | None) -> None:
+        """Set the size of the uncompressed file in bytes."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/Size")] = NullObject()
+        else:
+            params[NameObject("/Size")] = value
+
+    @property
+    def creation_date(self) -> datetime.datetime | None:
+        """Retrieve the file creation datetime."""
+        return parse_iso8824_date(self._params.get("/CreationDate"))
+
+    @creation_date.setter
+    def creation_date(self, value: datetime.datetime | None) -> None:
+        """Set the file creation datetime."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/CreationDate")] = NullObject()
+        else:
+            date_str = format_iso8824_date(value)
+            params[NameObject("/CreationDate")] = TextStringObject(date_str)
+
+    @property
+    def modification_date(self) -> datetime.datetime | None:
+        """Retrieve the datetime of the last file modification."""
+        return parse_iso8824_date(self._params.get("/ModDate"))
+
+    @modification_date.setter
+    def modification_date(self, value: datetime.datetime | None) -> None:
+        """Set the datetime of the last file modification."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/ModDate")] = NullObject()
+        else:
+            date_str = format_iso8824_date(value)
+            params[NameObject("/ModDate")] = TextStringObject(date_str)
+
+    @property
+    def checksum(self) -> bytes | None:
+        """Retrieve the MD5 checksum of the (uncompressed) file."""
+        value = self._params.get("/CheckSum")
+        if is_null_or_none(value):
+            return None
+        return value
+
+    @checksum.setter
+    def checksum(self, value: ByteStringObject | None) -> None:
+        """Set the MD5 checksum of the (uncompressed) file."""
+        params = self._ensure_params
+        if value is None:
+            params[NameObject("/CheckSum")] = NullObject()
+        else:
+            params[NameObject("/CheckSum")] = value
+
+    def delete(self) -> None:
+        """Delete the file from the document."""
+        if not self._parent:
+            raise PyPdfError("Parent required to delete file from document.")
+        if self.pdf_object in self._parent:
+            index = self._parent.index(self.pdf_object)
+        elif (
+                (indirect_reference := getattr(self.pdf_object, "indirect_reference", None)) is not None
+                and indirect_reference in self._parent
+        ):
+            index = self._parent.index(indirect_reference)
+        else:
+            raise PyPdfError("File not found in parent object.")
+        self._parent.pop(index)  # Reference.
+        self._parent.pop(index - 1)  # Name.
+        self.pdf_object = DictionaryObject()  # Invalidate.
+
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} name={self.name!r}>"
+
+    @classmethod
+    def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
+        """
+        Convert the given name tree into class instances.
+
+        Args:
+            names: The name tree to load the data from.
+
+        Returns:
+            Iterable of class instances for the files found.
+        """
+        # This is a name tree of the format [name_1, reference_1, name_2, reference_2, ...]
+        for i, name in enumerate(names):
+            if not isinstance(name, str):
+                # Skip plain strings and retrieve them as `direct_name` by index.
+                file_dictionary = name.get_object()
+                direct_name = names[i - 1].get_object()
+                yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)
+
+    @classmethod
+    def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
+        """
+        Load the embedded files for the given document catalog.
+
+        This method and its signature are considered internal API and thus not exposed publicly for now.
+
+        Args:
+            catalog: The document catalog to load from.
+
+        Returns:
+            Iterable of class instances for the files found.
+        """
+        try:
+            container = cast(
+                DictionaryObject,
+                cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
+            )
+        except KeyError:
+            return
+
+        if "/Kids" in container:
+            for kid in cast(ArrayObject, container["/Kids"].get_object()):
+                # There might be further (nested) kids here.
+                # Wait for an example before evaluating an implementation.
+                kid = kid.get_object()
+                if "/Names" in kid:
+                    yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
+        if "/Names" in container:
+            yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_fit.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_fit.py
@@ -0,0 +1,174 @@
+from typing import Any, Optional, Union
+
+from ._base import is_null_or_none
+
+
+class Fit:
+    def __init__(
+        self, fit_type: str, fit_args: tuple[Union[None, float, Any], ...] = ()
+    ) -> None:
+        from ._base import FloatObject, NameObject, NullObject, NumberObject  # noqa: PLC0415
+
+        self.fit_type = NameObject(fit_type)
+        self.fit_args: list[Union[NullObject, FloatObject, NumberObject]] = [
+            NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
+        ]
+
+    @classmethod
+    def xyz(
+        cls,
+        left: Optional[float] = None,
+        top: Optional[float] = None,
+        zoom: Optional[float] = None,
+    ) -> "Fit":
+        """
+        Display the page designated by page, with the coordinates (left, top)
+        positioned at the upper-left corner of the window and the contents
+        of the page magnified by the factor zoom.
+
+        A null value for any of the parameters left, top, or zoom specifies
+        that the current value of that parameter is to be retained unchanged.
+
+        A zoom value of 0 has the same meaning as a null value.
+
+        Args:
+            left:
+            top:
+            zoom:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/XYZ", fit_args=(left, top, zoom))
+
+    @classmethod
+    def fit(cls) -> "Fit":
+        """
+        Display the page designated by page, with its contents magnified just
+        enough to fit the entire page within the window both horizontally and
+        vertically.
+
+        If the required horizontal and vertical magnification factors are
+        different, use the smaller of the two, centering the page within the
+        window in the other dimension.
+        """
+        return Fit(fit_type="/Fit")
+
+    @classmethod
+    def fit_horizontally(cls, top: Optional[float] = None) -> "Fit":
+        """
+        Display the page designated by page, with the vertical coordinate top
+        positioned at the top edge of the window and the contents of the page
+        magnified just enough to fit the entire width of the page within the
+        window.
+
+        A null value for ``top`` specifies that the current value of that
+        parameter is to be retained unchanged.
+
+        Args:
+            top:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitH", fit_args=(top,))
+
+    @classmethod
+    def fit_vertically(cls, left: Optional[float] = None) -> "Fit":
+        return Fit(fit_type="/FitV", fit_args=(left,))
+
+    @classmethod
+    def fit_rectangle(
+        cls,
+        left: Optional[float] = None,
+        bottom: Optional[float] = None,
+        right: Optional[float] = None,
+        top: Optional[float] = None,
+    ) -> "Fit":
+        """
+        Display the page designated by page, with its contents magnified
+        just enough to fit the rectangle specified by the coordinates
+        left, bottom, right, and top entirely within the window
+        both horizontally and vertically.
+
+        If the required horizontal and vertical magnification factors are
+        different, use the smaller of the two, centering the rectangle within
+        the window in the other dimension.
+
+        A null value for any of the parameters may result in unpredictable
+        behavior.
+
+        Args:
+            left:
+            bottom:
+            right:
+            top:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top))
+
+    @classmethod
+    def fit_box(cls) -> "Fit":
+        """
+        Display the page designated by page, with its contents magnified just
+        enough to fit its bounding box entirely within the window both
+        horizontally and vertically.
+
+        If the required horizontal and vertical magnification factors are
+        different, use the smaller of the two, centering the bounding box
+        within the window in the other dimension.
+        """
+        return Fit(fit_type="/FitB")
+
+    @classmethod
+    def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit":
+        """
+        Display the page designated by page, with the vertical coordinate top
+        positioned at the top edge of the window and the contents of the page
+        magnified just enough to fit the entire width of its bounding box
+        within the window.
+
+        A null value for top specifies that the current value of that parameter
+        is to be retained unchanged.
+
+        Args:
+            top:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitBH", fit_args=(top,))
+
+    @classmethod
+    def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit":
+        """
+        Display the page designated by page, with the horizontal coordinate
+        left positioned at the left edge of the window and the contents of the
+        page magnified just enough to fit the entire height of its bounding box
+        within the window.
+
+        A null value for left specifies that the current value of that
+        parameter is to be retained unchanged.
+
+        Args:
+            left:
+
+        Returns:
+            The created fit object.
+
+        """
+        return Fit(fit_type="/FitBV", fit_args=(left,))
+
+    def __str__(self) -> str:
+        if not self.fit_args:
+            return f"Fit({self.fit_type})"
+        return f"Fit({self.fit_type}, {self.fit_args})"
+
+
+DEFAULT_FIT = Fit.fit()
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_image_inline.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_image_inline.py
@@ -0,0 +1,314 @@
+# Copyright (c) 2024, pypdf contributors
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+from io import BytesIO
+from typing import IO
+
+from .._utils import (
+    WHITESPACES,
+    WHITESPACES_AS_BYTES,
+    StreamType,
+    logger_warning,
+    read_non_whitespace,
+)
+from ..errors import PdfReadError
+
+logger = logging.getLogger(__name__)
+
+# An inline image should be used only for small images (4096 bytes or less),
+# but allow twice this for cases where this has been exceeded.
+BUFFER_SIZE = 8192
+
+
+def _check_end_image_marker(stream: StreamType) -> bool:
+    ei_tok = read_non_whitespace(stream)
+    ei_tok += stream.read(2)
+    stream.seek(-3, 1)
+    return ei_tok[:2] == b"EI" and (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES)
+
+
+def extract_inline__ascii_hex_decode(stream: StreamType) -> bytes:
+    """
+    Extract HexEncoded stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    data_out: bytes = b""
+    # Read data until delimiter > and EI as backup.
+    while True:
+        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_tok = data_buffered.find(b">")
+        if pos_tok >= 0:  # found >
+            data_out += data_buffered[: pos_tok + 1]
+            stream.seek(-len(data_buffered) + pos_tok + 1, 1)
+            break
+        pos_ei = data_buffered.find(b"EI")
+        if pos_ei >= 0:  # found EI
+            stream.seek(-len(data_buffered) + pos_ei - 1, 1)
+            c = stream.read(1)
+            while c in WHITESPACES:
+                stream.seek(-2, 1)
+                c = stream.read(1)
+                pos_ei -= 1
+            data_out += data_buffered[:pos_ei]
+            break
+        if len(data_buffered) == 2:
+            data_out += data_buffered
+            raise PdfReadError("Unexpected end of stream")
+        # Neither > nor EI found
+        data_out += data_buffered[:-2]
+        stream.seek(-2, 1)
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline__ascii85_decode(stream: StreamType) -> bytes:
+    """
+    Extract A85 stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    data_out: bytes = b""
+    # Read data until delimiter ~>
+    while True:
+        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_tok = data_buffered.find(b"~>")
+        if pos_tok >= 0:  # found!
+            data_out += data_buffered[: pos_tok + 2]
+            stream.seek(-len(data_buffered) + pos_tok + 2, 1)
+            break
+        if len(data_buffered) == 2:  # end of buffer
+            data_out += data_buffered
+            raise PdfReadError("Unexpected end of stream")
+        data_out += data_buffered[
+            :-2
+        ]  # back by one char in case of in the middle of ~>
+        stream.seek(-2, 1)
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline__run_length_decode(stream: StreamType) -> bytes:
+    """
+    Extract RL (RunLengthDecode) stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    data_out: bytes = b""
+    # Read data until delimiter 128
+    while True:
+        data_buffered = stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_tok = data_buffered.find(b"\x80")
+        if pos_tok >= 0:  # found
+            # Ideally, we could just use plain run-length decoding here, where 80_16 = 128_10
+            # marks the EOD. But there apparently are cases like in issue #3517, where we have
+            # an inline image with up to 51 EOD markers. In these cases, be resilient here and
+            # use the default `EI` marker detection instead. Please note that this fallback
+            # still omits special `EI` handling within the stream, but for now assume that having
+            # both of these cases occur at the same time is very unlikely (and the image stream
+            # is broken anyway).
+            # For now, do not skip over more than one whitespace character.
+            after_token = data_buffered[pos_tok + 1 : pos_tok + 4]
+            if after_token.startswith(b"EI") or after_token.endswith(b"EI"):
+                data_out += data_buffered[: pos_tok + 1]
+                stream.seek(-len(data_buffered) + pos_tok + 1, 1)
+            else:
+                logger_warning("Early EOD in RunLengthDecode of inline image, using fallback.", __name__)
+                ei_marker = data_buffered.find(b"EI")
+                if ei_marker > 0:
+                    data_out += data_buffered[: ei_marker]
+                    stream.seek(-len(data_buffered) + ei_marker - 1, 1)
+            break
+        data_out += data_buffered
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline__dct_decode(stream: StreamType) -> bytes:
+    """
+    Extract DCT (JPEG) stream from inline image.
+    The stream will be moved onto the EI.
+    """
+    def read(length: int) -> bytes:
+        # If 0 bytes are returned, and *size* was not 0, this indicates end of file.
+        # If the object is in non-blocking mode and no bytes are available, `None` is returned.
+        _result = stream.read(length)
+        if _result is None or len(_result) != length:
+            raise PdfReadError("Unexpected end of stream")
+        return _result
+
+    data_out: bytes = b""
+    # Read Blocks of data (ID/Size/data) up to ID=FF/D9
+    # https://www.digicamsoft.com/itu/itu-t81-36.html
+    not_first = False
+    while True:
+        c = read(1)
+        if not_first or (c == b"\xff"):
+            data_out += c
+        if c != b"\xff":
+            continue
+        not_first = True
+        c = read(1)
+        data_out += c
+        if c == b"\xff":
+            stream.seek(-1, 1)  # pragma: no cover
+        elif c == b"\x00":  # stuffing
+            pass
+        elif c == b"\xd9":  # end
+            break
+        elif c in (
+            b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
+            b"\xda\xdb\xdc\xdd\xde\xdf"
+            b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
+        ):
+            c = read(2)
+            data_out += c
+            sz = c[0] * 256 + c[1]
+            data_out += read(sz - 2)
+
+    if not _check_end_image_marker(stream):
+        raise PdfReadError("EI stream not found")
+    return data_out
+
+
+def extract_inline_default(stream: StreamType) -> bytes:
+    """Legacy method, used by default"""
+    stream_out = BytesIO()
+    # Read the inline image, while checking for EI (End Image) operator.
+    while True:
+        data_buffered = stream.read(BUFFER_SIZE)
+        if not data_buffered:
+            raise PdfReadError("Unexpected end of stream")
+        pos_ei = data_buffered.find(
+            b"E"
+        )  # We can not look straight for "EI" because it may not have been loaded in the buffer
+
+        if pos_ei == -1:
+            stream_out.write(data_buffered)
+        else:
+            # Write out everything including E (the one from EI to be removed)
+            stream_out.write(data_buffered[0 : pos_ei + 1])
+            sav_pos_ei = stream_out.tell() - 1
+            # Seek back in the stream to read the E next
+            stream.seek(pos_ei + 1 - len(data_buffered), 1)
+            saved_pos = stream.tell()
+            # Check for End Image
+            tok2 = stream.read(1)  # I of "EI"
+            if tok2 != b"I":
+                stream.seek(saved_pos, 0)
+                continue
+            tok3 = stream.read(1)  # possible space after "EI"
+            if tok3 not in WHITESPACES:
+                stream.seek(saved_pos, 0)
+                continue
+            while tok3 in WHITESPACES:
+                tok3 = stream.read(1)
+            if data_buffered[pos_ei - 1 : pos_ei] not in WHITESPACES and tok3 not in {
+                b"Q",
+                b"E",
+            }:  # for Q or EMC
+                stream.seek(saved_pos, 0)
+                continue
+            if is_followed_by_binary_data(stream):
+                # Inline image contains `EI ` sequence usually marking the end of it, but
+                # is followed by binary data which does not make sense for the actual end.
+                stream.seek(saved_pos, 0)
+                continue
+            # Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficient
+            # remove E(I) wrongly inserted earlier
+            stream.seek(saved_pos - 1, 0)
+            stream_out.truncate(sav_pos_ei)
+            break
+
+    return stream_out.getvalue()
+
+
+def is_followed_by_binary_data(stream: IO[bytes], length: int = 10) -> bool:
+    """
+    Check if the next bytes of the stream look like binary image data or regular page content.
+
+    This is just some heuristics due to the PDF specification being too imprecise about
+    inline images containing the `EI` marker which would end an image. Starting with PDF 2.0,
+    we finally get a mandatory length field, but with (proper) PDF 2.0 support being very limited
+    everywhere, we should not expect to be able to remove such hacks in the near future - especially
+    considering legacy documents as well.
+
+    The actual implementation draws some inspiration from
+    https://github.com/itext/itext-java/blob/9.1.0/kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/util/InlineImageParsingUtils.java
+    """
+    position = stream.tell()
+    data = stream.read(length)
+    stream.seek(position)
+    if not data:
+        return False
+    operator_start = None
+    operator_end = None
+
+    for index, byte in enumerate(data):
+        if byte < 32 and byte not in WHITESPACES_AS_BYTES:
+            # This covers all characters not being displayable directly, although omitting whitespace
+            # to allow for operator detection.
+            return True
+        is_whitespace = byte in WHITESPACES_AS_BYTES
+        if operator_start is None and not is_whitespace:
+            # Interpret all other non-whitespace characters as the start of an operation.
+            operator_start = index
+        if operator_start is not None and is_whitespace:
+            # A whitespace stops an operation.
+            # Assume that having an inline image with tons of whitespace is rather unlikely.
+            operator_end = index
+            break
+
+    if operator_start is None:
+        # Inline images should not have tons of whitespaces, which would lead to no operator start.
+        return False
+    if operator_end is None:
+        # We probably are inside an operation.
+        operator_end = length
+    operator_length = operator_end - operator_start
+    operator = data[operator_start:operator_end]
+    if operator.startswith(b"/") and operator_length > 1:
+        # Name object.
+        return False
+    if operator.replace(b".", b"").isdigit():
+        # Graphics operator, for example a move. A number (integer or float).
+        return False
+    if operator_length > 3:  # noqa: SIM103
+        # Usually, the operators inside a content stream should not have more than three characters,
+        # especially after an inline image.
+        return True
+    return False
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_link.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_link.py
@@ -0,0 +1,118 @@
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+
+# This module contains code used by _writer.py to track links in pages
+# being added to the writer until the links can be resolved.
+
+from typing import TYPE_CHECKING, Optional, Union, cast
+
+from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject
+
+if TYPE_CHECKING:
+    from .._page import PageObject
+    from .._reader import PdfReader
+    from .._writer import PdfWriter
+
+
+class NamedReferenceLink:
+    """Named reference link being preserved until we can resolve it correctly."""
+
+    def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
+        """reference: TextStringObject with named reference"""
+        self._reference = reference
+        self._source_pdf = source_pdf
+
+    def find_referenced_page(self) -> Union[IndirectObject, None]:
+        destination = self._source_pdf.named_destinations.get(str(self._reference))
+        return destination.page if destination else None
+
+    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
+        """target_pdf: PdfWriter which the new link went into"""
+        # point named destination in new PDF to the new page
+        if str(self._reference) not in target_pdf.named_destinations:
+            target_pdf.add_named_destination(str(self._reference), new_page.page_number)
+
+
+class DirectReferenceLink:
+    """Direct reference link being preserved until we can resolve it correctly."""
+
+    def __init__(self, reference: ArrayObject) -> None:
+        """reference: an ArrayObject whose first element is the Page indirect object"""
+        self._reference = reference
+
+    def find_referenced_page(self) -> IndirectObject:
+        return self._reference[0]
+
+    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
+        """target_pdf: PdfWriter which the new link went into"""
+        self._reference[0] = new_page
+
+
+ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]
+
+
+def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
+    """Extracts links from two pages on the assumption that the two pages are
+    the same. Produces one list of (new link, old link) tuples.
+    """
+    new_links = [_build_link(link, new_page) for link in new_page.get("/Annots", [])]
+    old_links = [_build_link(link, old_page) for link in old_page.get("/Annots", [])]
+
+    return [
+        (new_link, old_link) for (new_link, old_link)
+        in zip(new_links, old_links)
+        if new_link and old_link
+    ]
+
+
+def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
+    src = cast("PdfReader", page.pdf)
+    link = cast(DictionaryObject, indirect_object.get_object())
+    if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
+        return None
+
+    if "/A" in link:
+        action = cast(DictionaryObject, link["/A"])
+        if action.get("/S") != "/GoTo":
+            return None
+
+        if "/D" not in action:
+            return None
+        return _create_link(action["/D"], src)
+
+    if "/Dest" in link:
+        return _create_link(link["/Dest"], src)
+
+    return None  # Nothing to do here
+
+
+def _create_link(reference: PdfObject, source_pdf: "PdfReader")-> Optional[ReferenceLink]:
+    if isinstance(reference, TextStringObject):
+        return NamedReferenceLink(reference, source_pdf)
+    if isinstance(reference, ArrayObject):
+        return DirectReferenceLink(reference)
+    return None
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_outline.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_outline.py
@@ -0,0 +1,33 @@
+from typing import Union
+
+from .._utils import StreamType, deprecation_no_replacement
+from ._base import NameObject
+from ._data_structures import Destination
+
+
+class OutlineItem(Destination):
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        stream.write(b"<<\n")
+        for key in [
+            NameObject(x)
+            for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"]
+            if x in self
+        ]:
+            key.write_to_stream(stream)
+            stream.write(b" ")
+            value = self.raw_get(key)
+            value.write_to_stream(stream)
+            stream.write(b"\n")
+        key = NameObject("/Dest")
+        key.write_to_stream(stream)
+        stream.write(b" ")
+        value = self.dest_array
+        value.write_to_stream(stream)
+        stream.write(b"\n")
+        stream.write(b">>")
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_rectangle.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_rectangle.py
@@ -0,0 +1,132 @@
+from typing import Any, Union
+
+from ._base import FloatObject, NumberObject
+from ._data_structures import ArrayObject
+
+
+class RectangleObject(ArrayObject):
+    """
+    This class is used to represent *page boxes* in pypdf.
+
+    These boxes include:
+
+    * :attr:`artbox <pypdf._page.PageObject.artbox>`
+    * :attr:`bleedbox <pypdf._page.PageObject.bleedbox>`
+    * :attr:`cropbox <pypdf._page.PageObject.cropbox>`
+    * :attr:`mediabox <pypdf._page.PageObject.mediabox>`
+    * :attr:`trimbox <pypdf._page.PageObject.trimbox>`
+    """
+
+    def __init__(
+        self, arr: Union["RectangleObject", tuple[float, float, float, float]]
+    ) -> None:
+        # must have four points
+        assert len(arr) == 4
+        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
+        ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr])
+
+    def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
+        if not isinstance(value, (FloatObject, NumberObject)):
+            value = FloatObject(value)
+        return value
+
+    def scale(self, sx: float, sy: float) -> "RectangleObject":
+        return RectangleObject(
+            (
+                float(self.left) * sx,
+                float(self.bottom) * sy,
+                float(self.right) * sx,
+                float(self.top) * sy,
+            )
+        )
+
+    def __repr__(self) -> str:
+        return f"RectangleObject({list(self)!r})"
+
+    @property
+    def left(self) -> FloatObject:
+        return self[0]
+
+    @left.setter
+    def left(self, f: float) -> None:
+        self[0] = FloatObject(f)
+
+    @property
+    def bottom(self) -> FloatObject:
+        return self[1]
+
+    @bottom.setter
+    def bottom(self, f: float) -> None:
+        self[1] = FloatObject(f)
+
+    @property
+    def right(self) -> FloatObject:
+        return self[2]
+
+    @right.setter
+    def right(self, f: float) -> None:
+        self[2] = FloatObject(f)
+
+    @property
+    def top(self) -> FloatObject:
+        return self[3]
+
+    @top.setter
+    def top(self, f: float) -> None:
+        self[3] = FloatObject(f)
+
+    @property
+    def lower_left(self) -> tuple[float, float]:
+        """
+        Property to read and modify the lower left coordinate of this box
+        in (x,y) form.
+        """
+        return self.left, self.bottom
+
+    @lower_left.setter
+    def lower_left(self, value: tuple[float, float]) -> None:
+        self[0], self[1] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def lower_right(self) -> tuple[float, float]:
+        """
+        Property to read and modify the lower right coordinate of this box
+        in (x,y) form.
+        """
+        return self.right, self.bottom
+
+    @lower_right.setter
+    def lower_right(self, value: tuple[float, float]) -> None:
+        self[2], self[1] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def upper_left(self) -> tuple[float, float]:
+        """
+        Property to read and modify the upper left coordinate of this box
+        in (x,y) form.
+        """
+        return self.left, self.top
+
+    @upper_left.setter
+    def upper_left(self, value: tuple[float, float]) -> None:
+        self[0], self[3] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def upper_right(self) -> tuple[float, float]:
+        """
+        Property to read and modify the upper right coordinate of this box
+        in (x,y) form.
+        """
+        return self.right, self.top
+
+    @upper_right.setter
+    def upper_right(self, value: tuple[float, float]) -> None:
+        self[2], self[3] = (self._ensure_is_number(x) for x in value)
+
+    @property
+    def width(self) -> float:
+        return self.right - self.left
+
+    @property
+    def height(self) -> float:
+        return self.top - self.bottom
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_utils.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_utils.py
@@ -0,0 +1,208 @@
+import codecs
+from typing import Union
+
+from .._codecs import _pdfdoc_encoding
+from .._utils import StreamType, logger_warning, read_non_whitespace
+from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
+from ._base import ByteStringObject, TextStringObject
+
+
+def hex_to_rgb(value: str) -> tuple[float, float, float]:
+    return tuple(int(value.lstrip("#")[i : i + 2], 16) / 255.0 for i in (0, 2, 4))  # type: ignore
+
+
+def read_hex_string_from_stream(
+    stream: StreamType,
+    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
+) -> Union["TextStringObject", "ByteStringObject"]:
+    stream.read(1)
+    arr = []
+    x = b""
+    while True:
+        tok = read_non_whitespace(stream)
+        if not tok:
+            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+        if tok == b">":
+            break
+        x += tok
+        if len(x) == 2:
+            arr.append(int(x, base=16))
+            x = b""
+    if len(x) == 1:
+        x += b"0"
+    if x != b"":
+        arr.append(int(x, base=16))
+    return create_string_object(bytes(arr), forced_encoding)
+
+
+__ESCAPE_DICT__ = {
+    b"n": ord(b"\n"),
+    b"r": ord(b"\r"),
+    b"t": ord(b"\t"),
+    b"b": ord(b"\b"),
+    b"f": ord(b"\f"),
+    b"(": ord(b"("),
+    b")": ord(b")"),
+    b"/": ord(b"/"),
+    b"\\": ord(b"\\"),
+    b" ": ord(b" "),
+    b"%": ord(b"%"),
+    b"<": ord(b"<"),
+    b">": ord(b">"),
+    b"[": ord(b"["),
+    b"]": ord(b"]"),
+    b"#": ord(b"#"),
+    b"_": ord(b"_"),
+    b"&": ord(b"&"),
+    b"$": ord(b"$"),
+}
+__BACKSLASH_CODE__ = 92
+
+
+def read_string_from_stream(
+    stream: StreamType,
+    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
+) -> Union["TextStringObject", "ByteStringObject"]:
+    tok = stream.read(1)
+    parens = 1
+    txt = []
+    while True:
+        tok = stream.read(1)
+        if not tok:
+            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
+        if tok == b"(":
+            parens += 1
+        elif tok == b")":
+            parens -= 1
+            if parens == 0:
+                break
+        elif tok == b"\\":
+            tok = stream.read(1)
+            try:
+                txt.append(__ESCAPE_DICT__[tok])
+                continue
+            except KeyError:
+                if b"0" <= tok <= b"7":
+                    # "The number ddd may consist of one, two, or three
+                    # octal digits; high-order overflow shall be ignored.
+                    # Three octal digits shall be used, with leading zeros
+                    # as needed, if the next character of the string is also
+                    # a digit." (PDF reference 7.3.4.2, p 16)
+                    sav = stream.tell() - 1
+                    for _ in range(2):
+                        ntok = stream.read(1)
+                        if b"0" <= ntok <= b"7":
+                            tok += ntok
+                        else:
+                            stream.seek(-1, 1)  # ntok has to be analyzed
+                            break
+                    i = int(tok, base=8)
+                    if i > 255:
+                        txt.append(__BACKSLASH_CODE__)
+                        stream.seek(sav)
+                    else:
+                        txt.append(i)
+                    continue
+                if tok in b"\n\r":
+                    # This case is hit when a backslash followed by a line
+                    # break occurs. If it's a multi-char EOL, consume the
+                    # second character:
+                    tok = stream.read(1)
+                    if tok not in b"\n\r":
+                        stream.seek(-1, 1)
+                    # Then don't add anything to the actual string, since this
+                    # line break was escaped:
+                    continue
+                msg = f"Unexpected escaped string: {tok.decode('utf-8', 'ignore')}"
+                logger_warning(msg, __name__)
+                txt.append(__BACKSLASH_CODE__)
+        txt.append(ord(tok))
+    return create_string_object(bytes(txt), forced_encoding)
+
+
+def create_string_object(
+    string: Union[str, bytes],
+    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
+) -> Union[TextStringObject, ByteStringObject]:
+    """
+    Create a ByteStringObject or a TextStringObject from a string to represent the string.
+
+    Args:
+        string: The data being used
+        forced_encoding: Typically None, or an encoding string
+
+    Returns:
+        A ByteStringObject
+
+    Raises:
+        TypeError: If string is not of type str or bytes.
+
+    """
+    if isinstance(string, str):
+        return TextStringObject(string)
+    if isinstance(string, bytes):
+        if isinstance(forced_encoding, (list, dict)):
+            out = ""
+            for x in string:
+                try:
+                    out += forced_encoding[x]
+                except Exception:
+                    out += bytes((x,)).decode("charmap")
+            obj = TextStringObject(out)
+            obj._original_bytes = string
+            return obj
+        if isinstance(forced_encoding, str):
+            if forced_encoding == "bytes":
+                return ByteStringObject(string)
+            obj = TextStringObject(string.decode(forced_encoding))
+            obj._original_bytes = string
+            return obj
+        try:
+            if string.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+                retval = TextStringObject(string.decode("utf-16"))
+                retval._original_bytes = string
+                retval.autodetect_utf16 = True
+                retval.utf16_bom = string[:2]
+                return retval
+            if string.startswith(b"\x00"):
+                retval = TextStringObject(string.decode("utf-16be"))
+                retval._original_bytes = string
+                retval.autodetect_utf16 = True
+                retval.utf16_bom = codecs.BOM_UTF16_BE
+                return retval
+            if string[1:2] == b"\x00":
+                retval = TextStringObject(string.decode("utf-16le"))
+                retval._original_bytes = string
+                retval.autodetect_utf16 = True
+                retval.utf16_bom = codecs.BOM_UTF16_LE
+                return retval
+
+            # This is probably a big performance hit here, but we need
+            # to convert string objects into the text/unicode-aware
+            # version if possible... and the only way to check if that's
+            # possible is to try.
+            # Some strings are strings, some are just byte arrays.
+            retval = TextStringObject(decode_pdfdocencoding(string))
+            retval._original_bytes = string
+            retval.autodetect_pdfdocencoding = True
+            return retval
+        except UnicodeDecodeError:
+            return ByteStringObject(string)
+    else:
+        raise TypeError("create_string_object should have str or unicode arg")
+
+
+def decode_pdfdocencoding(byte_array: bytes) -> str:
+    retval = ""
+    for b in byte_array:
+        c = _pdfdoc_encoding[b]
+        if c == "\u0000":
+            raise UnicodeDecodeError(
+                "pdfdocencoding",
+                bytearray(b),
+                -1,
+                -1,
+                "does not exist in translation table",
+            )
+        retval += c
+    return retval
--- a/venv/lib/python3.12/site-packages/pypdf/generic/_viewerpref.py
+++ b/venv/lib/python3.12/site-packages/pypdf/generic/_viewerpref.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2023, Pubpub-ZZ
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# * The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+from typing import (
+    Any,
+    Optional,
+)
+
+from ._base import BooleanObject, NameObject, NumberObject, is_null_or_none
+from ._data_structures import ArrayObject, DictionaryObject
+
+f_obj = BooleanObject(False)
+
+
+class ViewerPreferences(DictionaryObject):
+    def __init__(self, obj: Optional[DictionaryObject] = None) -> None:
+        super().__init__(self)
+        if not is_null_or_none(obj):
+            self.update(obj.items())  # type: ignore
+        try:
+            self.indirect_reference = obj.indirect_reference  # type: ignore
+        except AttributeError:
+            pass
+
+    def _get_bool(self, key: str, default: Optional[BooleanObject]) -> Optional[BooleanObject]:
+        return self.get(key, default)
+
+    def _set_bool(self, key: str, v: bool) -> None:
+        self[NameObject(key)] = BooleanObject(v is True)
+
+    def _get_name(self, key: str, default: Optional[NameObject]) -> Optional[NameObject]:
+        return self.get(key, default)
+
+    def _set_name(self, key: str, lst: list[str], v: NameObject) -> None:
+        if v[0] != "/":
+            raise ValueError(f"{v} does not start with '/'")
+        if lst != [] and v not in lst:
+            raise ValueError(f"{v} is an unacceptable value")
+        self[NameObject(key)] = NameObject(v)
+
+    def _get_arr(self, key: str, default: Optional[list[Any]]) -> Optional[ArrayObject]:
+        return self.get(key, None if default is None else ArrayObject(default))
+
+    def _set_arr(self, key: str, v: Optional[ArrayObject]) -> None:
+        if v is None:
+            try:
+                del self[NameObject(key)]
+            except KeyError:
+                pass
+            return
+        if not isinstance(v, ArrayObject):
+            raise ValueError("ArrayObject is expected")
+        self[NameObject(key)] = v
+
+    def _get_int(self, key: str, default: Optional[NumberObject]) -> Optional[NumberObject]:
+        return self.get(key, default)
+
+    def _set_int(self, key: str, v: int) -> None:
+        self[NameObject(key)] = NumberObject(v)
+
+    @property
+    def PRINT_SCALING(self) -> NameObject:
+        return NameObject("/PrintScaling")
+
+    def __new__(cls: Any, value: Any = None) -> "ViewerPreferences":
+        def _add_prop_bool(key: str, default: Optional[BooleanObject]) -> property:
+            return property(
+                lambda self: self._get_bool(key, default),
+                lambda self, v: self._set_bool(key, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined
+            """,
+            )
+
+        def _add_prop_name(
+            key: str, lst: list[str], default: Optional[NameObject]
+        ) -> property:
+            return property(
+                lambda self: self._get_name(key, default),
+                lambda self, v: self._set_name(key, lst, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined.
+            Acceptable values: {lst}
+            """,
+            )
+
+        def _add_prop_arr(key: str, default: Optional[ArrayObject]) -> property:
+            return property(
+                lambda self: self._get_arr(key, default),
+                lambda self, v: self._set_arr(key, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined
+            """,
+            )
+
+        def _add_prop_int(key: str, default: Optional[int]) -> property:
+            return property(
+                lambda self: self._get_int(key, default),
+                lambda self, v: self._set_int(key, v),
+                None,
+                f"""
+            Returns/Modify the status of {key}, Returns {default} if not defined
+            """,
+            )
+
+        cls.hide_toolbar = _add_prop_bool("/HideToolbar", f_obj)
+        cls.hide_menubar = _add_prop_bool("/HideMenubar", f_obj)
+        cls.hide_windowui = _add_prop_bool("/HideWindowUI", f_obj)
+        cls.fit_window = _add_prop_bool("/FitWindow", f_obj)
+        cls.center_window = _add_prop_bool("/CenterWindow", f_obj)
+        cls.display_doctitle = _add_prop_bool("/DisplayDocTitle", f_obj)
+
+        cls.non_fullscreen_pagemode = _add_prop_name(
+            "/NonFullScreenPageMode",
+            ["/UseNone", "/UseOutlines", "/UseThumbs", "/UseOC"],
+            NameObject("/UseNone"),
+        )
+        cls.direction = _add_prop_name(
+            "/Direction", ["/L2R", "/R2L"], NameObject("/L2R")
+        )
+        cls.view_area = _add_prop_name("/ViewArea", [], None)
+        cls.view_clip = _add_prop_name("/ViewClip", [], None)
+        cls.print_area = _add_prop_name("/PrintArea", [], None)
+        cls.print_clip = _add_prop_name("/PrintClip", [], None)
+        cls.print_scaling = _add_prop_name("/PrintScaling", [], None)
+        cls.duplex = _add_prop_name(
+            "/Duplex", ["/Simplex", "/DuplexFlipShortEdge", "/DuplexFlipLongEdge"], None
+        )
+        cls.pick_tray_by_pdfsize = _add_prop_bool("/PickTrayByPDFSize", None)
+        cls.print_pagerange = _add_prop_arr("/PrintPageRange", None)
+        cls.num_copies = _add_prop_int("/NumCopies", None)
+
+        cls.enforce = _add_prop_arr("/Enforce", ArrayObject())
+
+        return DictionaryObject.__new__(cls)
--- a/venv/lib/python3.12/site-packages/pypdf/pagerange.py
+++ b/venv/lib/python3.12/site-packages/pypdf/pagerange.py
@@ -0,0 +1,200 @@
+"""
+Representation and utils for ranges of PDF file pages.
+
+Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
+All rights reserved. This software is available under a BSD license;
+see https://github.com/py-pdf/pypdf/blob/main/LICENSE
+"""
+
+import re
+from typing import Any, Union
+
+from .errors import ParseError
+
+_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
+PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$"
+# groups:         12     34     5 6     7 8
+
+
+class PageRange:
+    """
+    A slice-like representation of a range of page indices.
+
+    For example, page numbers, only starting at zero.
+
+    The syntax is like what you would put between brackets [ ].
+    The slice is one of the few Python types that can't be subclassed,
+    but this class converts to and from slices, and allows similar use.
+
+      -  PageRange(str) parses a string representing a page range.
+      -  PageRange(slice) directly "imports" a slice.
+      -  to_slice() gives the equivalent slice.
+      -  str() and repr() allow printing.
+      -  indices(n) is like slice.indices(n).
+    """
+
+    def __init__(self, arg: Union[slice, "PageRange", str]) -> None:
+        """
+        Initialize with either a slice -- giving the equivalent page range,
+        or a PageRange object -- making a copy,
+        or a string like
+            "int", "[int]:[int]" or "[int]:[int]:[int]",
+            where the brackets indicate optional ints.
+        Remember, page indices start with zero.
+        Page range expression examples:
+
+            :     all pages.                   -1    last page.
+            22    just the 23rd page.          :-1   all but the last page.
+            0:3   the first three pages.       -2    second-to-last page.
+            :3    the first three pages.       -2:   last two pages.
+            5:    from the sixth page onward.  -3:-1 third & second to last.
+        The third, "stride" or "step" number is also recognized.
+            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
+            1:10:2    1 3 5 7 9                2::-1     2 1 0.
+            ::-1      all pages in reverse order.
+        Note the difference between this notation and arguments to slice():
+            slice(3) means the first three pages;
+            PageRange("3") means the range of only the fourth page.
+            However PageRange(slice(3)) means the first three pages.
+        """
+        if isinstance(arg, slice):
+            self._slice = arg
+            return
+
+        if isinstance(arg, PageRange):
+            self._slice = arg.to_slice()
+            return
+
+        m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg)
+        if not m:
+            raise ParseError(arg)
+        if m.group(2):
+            # Special case: just an int means a range of one page.
+            start = int(m.group(2))
+            stop = start + 1 if start != -1 else None
+            self._slice = slice(start, stop)
+        else:
+            self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)])
+
+    @staticmethod
+    def valid(input: Any) -> bool:
+        """
+        True if input is a valid initializer for a PageRange.
+
+        Args:
+            input: A possible PageRange string or a PageRange object.
+
+        Returns:
+            True, if the ``input`` is a valid PageRange.
+
+        """
+        return isinstance(input, (slice, PageRange)) or (
+            isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input))
+        )
+
+    def to_slice(self) -> slice:
+        """Return the slice equivalent of this page range."""
+        return self._slice
+
+    def __str__(self) -> str:
+        """A string like "1:2:3"."""
+        s = self._slice
+        indices: Union[tuple[int, int], tuple[int, int, int]]
+        if s.step is None:
+            if s.start is not None and s.stop == s.start + 1:
+                return str(s.start)
+
+            indices = s.start, s.stop
+        else:
+            indices = s.start, s.stop, s.step
+        return ":".join("" if i is None else str(i) for i in indices)
+
+    def __repr__(self) -> str:
+        """A string like "PageRange('1:2:3')"."""
+        return "PageRange(" + repr(str(self)) + ")"
+
+    def indices(self, n: int) -> tuple[int, int, int]:
+        """
+        Assuming a sequence of length n, calculate the start and stop indices,
+        and the stride length of the PageRange.
+
+        See help(slice.indices).
+
+        Args:
+            n:  the length of the list of pages to choose from.
+
+        Returns:
+            Arguments for range().
+
+        """
+        return self._slice.indices(n)
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, PageRange):
+            return False
+        return self._slice == other._slice
+
+    def __hash__(self) -> int:
+        return hash((self.__class__, (self._slice.start, self._slice.stop, self._slice.step)))
+
+    def __add__(self, other: "PageRange") -> "PageRange":
+        if not isinstance(other, PageRange):
+            raise TypeError(f"Can't add PageRange and {type(other)}")
+        if self._slice.step is not None or other._slice.step is not None:
+            raise ValueError("Can't add PageRange with stride")
+        a = self._slice.start, self._slice.stop
+        b = other._slice.start, other._slice.stop
+
+        if a[0] > b[0]:
+            a, b = b, a
+
+        # Now a[0] is the smallest
+        if b[0] > a[1]:
+            # There is a gap between a and b.
+            raise ValueError("Can't add PageRanges with gap")
+        return PageRange(slice(a[0], max(a[1], b[1])))
+
+
+PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.
+
+
+def parse_filename_page_ranges(
+    args: list[Union[str, PageRange, None]]
+) -> list[tuple[str, PageRange]]:
+    """
+    Given a list of filenames and page ranges, return a list of (filename, page_range) pairs.
+
+    Args:
+        args: A list where the first element is a filename. The other elements are
+            filenames, page-range expressions, slice objects, or PageRange objects.
+            A filename not followed by a page range indicates all pages of the file.
+
+    Returns:
+        A list of (filename, page_range) pairs.
+
+    """
+    pairs: list[tuple[str, PageRange]] = []
+    pdf_filename: Union[str, None] = None
+    did_page_range = False
+    for arg in [*args, None]:
+        if PageRange.valid(arg):
+            if not pdf_filename:
+                raise ValueError(
+                    "The first argument must be a filename, not a page range."
+                )
+
+            assert arg is not None
+            pairs.append((pdf_filename, PageRange(arg)))
+            did_page_range = True
+        else:
+            # New filename or end of list - use the complete previous file?
+            if pdf_filename and not did_page_range:
+                pairs.append((pdf_filename, PAGE_RANGE_ALL))
+
+            assert not isinstance(arg, PageRange), arg
+            pdf_filename = arg
+            did_page_range = False
+    return pairs
+
+
+PageRangeSpec = Union[str, PageRange, tuple[int, int], tuple[int, int, int], list[int]]
--- a/venv/lib/python3.12/site-packages/pypdf/papersizes.py
+++ b/venv/lib/python3.12/site-packages/pypdf/papersizes.py
@@ -0,0 +1,52 @@
+"""Helper to get paper sizes."""
+
+from typing import NamedTuple
+
+
+class Dimensions(NamedTuple):
+    width: int
+    height: int
+
+
+class PaperSize:
+    """(width, height) of the paper in portrait mode in pixels at 72 ppi."""
+
+    # Notes of how to calculate it:
+    # 1. Get the size of the paper in millimeters
+    # 2. Convert it to inches (25.4 millimeters is equal to 1 inch)
+    # 3. Convert it to pixels at 72dpi (1 inch is equal to 72 pixels)
+
+    # All Din-A paper sizes follow this pattern:
+    # 2 x A(n - 1) = A(n)
+    # So the height of the next bigger one is the width of the smaller one
+    # The ratio is always approximately 1:2**0.5
+    # Additionally, A0 is defined to have an area of 1 m**2
+    # https://en.wikipedia.org/wiki/ISO_216
+    # Be aware of rounding issues!
+    A0 = Dimensions(2384, 3370)  # 841mm x 1189mm
+    A1 = Dimensions(1684, 2384)
+    A2 = Dimensions(1191, 1684)
+    A3 = Dimensions(842, 1191)
+    A4 = Dimensions(
+        595, 842
+    )  # Printer paper, documents - this is by far the most common
+    A5 = Dimensions(420, 595)  # Paperback books
+    A6 = Dimensions(298, 420)  # Postcards
+    A7 = Dimensions(210, 298)
+    A8 = Dimensions(147, 210)
+
+    # Envelopes
+    C4 = Dimensions(649, 918)
+
+
+_din_a = (
+    PaperSize.A0,
+    PaperSize.A1,
+    PaperSize.A2,
+    PaperSize.A3,
+    PaperSize.A4,
+    PaperSize.A5,
+    PaperSize.A6,
+    PaperSize.A7,
+    PaperSize.A8,
+)
--- a/venv/lib/python3.12/site-packages/pypdf/py.typed
+++ b/venv/lib/python3.12/site-packages/pypdf/py.typed
--- a/venv/lib/python3.12/site-packages/pypdf/types.py
+++ b/venv/lib/python3.12/site-packages/pypdf/types.py
@@ -0,0 +1,80 @@
+"""Helpers for working with PDF types."""
+
+import sys
+from typing import Literal, Union
+
+if sys.version_info[:2] >= (3, 10):
+    # Python 3.10+: https://www.python.org/dev/peps/pep-0484
+    from typing import TypeAlias
+else:
+    from typing_extensions import TypeAlias
+
+from .generic._base import NameObject, NullObject, NumberObject
+from .generic._data_structures import ArrayObject, Destination
+from .generic._outline import OutlineItem
+
+BorderArrayType: TypeAlias = list[Union[NameObject, NumberObject, ArrayObject]]
+
+OutlineItemType: TypeAlias = Union[OutlineItem, Destination]
+
+FitType: TypeAlias = Literal[
+    "/XYZ", "/Fit", "/FitH", "/FitV", "/FitR", "/FitB", "/FitBH", "/FitBV"
+]
+# These go with the FitType, they specify values for the fit
+ZoomArgType: TypeAlias = Union[NumberObject, NullObject, float]
+ZoomArgsType: TypeAlias = list[ZoomArgType]
+
+# Recursive types like the following are not yet supported by Sphinx:
+#    OutlineType = List[Union[Destination, "OutlineType"]]
+# Hence use this for the moment:
+OutlineType = list[Union[Destination, list[Union[Destination, list[Destination]]]]]
+
+LayoutType: TypeAlias = Literal[
+    "/NoLayout",
+    "/SinglePage",
+    "/OneColumn",
+    "/TwoColumnLeft",
+    "/TwoColumnRight",
+    "/TwoPageLeft",
+    "/TwoPageRight",
+]
+
+PagemodeType: TypeAlias = Literal[
+    "/UseNone",
+    "/UseOutlines",
+    "/UseThumbs",
+    "/FullScreen",
+    "/UseOC",
+    "/UseAttachments",
+]
+
+AnnotationSubtype: TypeAlias = Literal[
+    "/Text",
+    "/Link",
+    "/FreeText",
+    "/Line",
+    "/Square",
+    "/Circle",
+    "/Polygon",
+    "/PolyLine",
+    "/Highlight",
+    "/Underline",
+    "/Squiggly",
+    "/StrikeOut",
+    "/Caret",
+    "/Stamp",
+    "/Ink",
+    "/Popup",
+    "/FileAttachment",
+    "/Sound",
+    "/Movie",
+    "/Screen",
+    "/Widget",
+    "/PrinterMark",
+    "/TrapNet",
+    "/Watermark",
+    "/3D",
+    "/Redact",
+    "/Projection",
+    "/RichMedia",
+]
--- a/venv/lib/python3.12/site-packages/pypdf/xmp.py
+++ b/venv/lib/python3.12/site-packages/pypdf/xmp.py
@@ -0,0 +1,748 @@
+"""
+Anything related to Extensible Metadata Platform (XMP) metadata.
+
+https://en.wikipedia.org/wiki/Extensible_Metadata_Platform
+"""
+
+import datetime
+import decimal
+import re
+from collections.abc import Iterator
+from typing import (
+    Any,
+    Callable,
+    Optional,
+    TypeVar,
+    Union,
+)
+from xml.dom.minidom import Document, parseString
+from xml.dom.minidom import Element as XmlElement
+from xml.parsers.expat import ExpatError
+
+from ._protocols import XmpInformationProtocol
+from ._utils import StreamType, deprecate_with_replacement, deprecation_no_replacement
+from .errors import PdfReadError, XmpDocumentError
+from .generic import ContentStream, PdfObject
+
+RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
+XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
+PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
+XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"
+
+# What is the PDFX namespace, you might ask?
+# It's documented here: https://github.com/adobe/xmp-docs/raw/master/XMPSpecifications/XMPSpecificationPart3.pdf
+# This namespace is used to place "custom metadata"
+# properties, which are arbitrary metadata properties with no semantic or
+# documented meaning.
+#
+# Elements in the namespace are key/value-style storage,
+# where the element name is the key and the content is the value. The keys
+# are transformed into valid XML identifiers by substituting an invalid
+# identifier character with \u2182 followed by the unicode hex ID of the
+# original character. A key like "my car" is therefore "my\u21820020car".
+#
+# \u2182 is the unicode character \u{ROMAN NUMERAL TEN THOUSAND}
+#
+# The pdfx namespace should be avoided.
+# A custom data schema and sensical XML elements could be used instead, as is
+# suggested by Adobe's own documentation on XMP under "Extensibility of
+# Schemas".
+PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"
+
+# PDF/A
+PDFAID_NAMESPACE = "http://www.aiim.org/pdfa/ns/id/"
+
+# Internal mapping of namespace URI → prefix
+_NAMESPACE_PREFIX_MAP = {
+    DC_NAMESPACE: "dc",
+    XMP_NAMESPACE: "xmp",
+    PDF_NAMESPACE: "pdf",
+    XMPMM_NAMESPACE: "xmpMM",
+    PDFAID_NAMESPACE: "pdfaid",
+    PDFX_NAMESPACE: "pdfx",
+}
+
+iso8601 = re.compile(
+    """
+        (?P<year>[0-9]{4})
+        (-
+            (?P<month>[0-9]{2})
+            (-
+                (?P<day>[0-9]+)
+                (T
+                    (?P<hour>[0-9]{2}):
+                    (?P<minute>[0-9]{2})
+                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
+                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
+                )?
+            )?
+        )?
+        """,
+    re.VERBOSE,
+)
+
+
+K = TypeVar("K")
+
+# Minimal XMP template
+_MINIMAL_XMP = f"""<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pypdf">
+    <rdf:RDF xmlns:rdf="{RDF_NAMESPACE}">
+        <rdf:Description rdf:about=""
+            xmlns:dc="{DC_NAMESPACE}"
+            xmlns:xmp="{XMP_NAMESPACE}"
+            xmlns:pdf="{PDF_NAMESPACE}"
+            xmlns:xmpMM="{XMPMM_NAMESPACE}"
+            xmlns:pdfaid="{PDFAID_NAMESPACE}"
+            xmlns:pdfx="{PDFX_NAMESPACE}">
+        </rdf:Description>
+    </rdf:RDF>
+</x:xmpmeta>
+<?xpacket end="r"?>"""
+
+
+def _identity(value: K) -> K:
+    return value
+
+
+def _converter_date(value: str) -> datetime.datetime:
+    matches = iso8601.match(value)
+    if matches is None:
+        raise ValueError(f"Invalid date format: {value}")
+    year = int(matches.group("year"))
+    month = int(matches.group("month") or "1")
+    day = int(matches.group("day") or "1")
+    hour = int(matches.group("hour") or "0")
+    minute = int(matches.group("minute") or "0")
+    second = decimal.Decimal(matches.group("second") or "0")
+    seconds_dec = second.to_integral(decimal.ROUND_FLOOR)
+    milliseconds_dec = (second - seconds_dec) * 1_000_000
+
+    seconds = int(seconds_dec)
+    milliseconds = int(milliseconds_dec)
+
+    tzd = matches.group("tzd") or "Z"
+    dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
+    if tzd != "Z":
+        tzd_hours, tzd_minutes = (int(x) for x in tzd.split(":"))
+        tzd_hours *= -1
+        if tzd_hours < 0:
+            tzd_minutes *= -1
+        dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
+    return dt
+
+
+def _format_datetime_utc(value: datetime.datetime) -> str:
+    """Format a datetime as UTC with trailing 'Z'.
+
+    - If the input is timezone-aware, convert to UTC first.
+    - If naive, assume UTC.
+    """
+    if value.tzinfo is not None and value.utcoffset() is not None:
+        value = value.astimezone(datetime.timezone.utc)
+
+    value = value.replace(tzinfo=None)
+    return value.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def _generic_get(
+        element: XmlElement, self: "XmpInformation", list_type: str, converter: Callable[[Any], Any] = _identity
+) -> Optional[list[str]]:
+    containers = element.getElementsByTagNameNS(RDF_NAMESPACE, list_type)
+    retval: list[Any] = []
+    if len(containers):
+        for container in containers:
+            for item in container.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                value = self._get_text(item)
+                value = converter(value)
+                retval.append(value)
+        return retval
+    return None
+
+
+class XmpInformation(XmpInformationProtocol, PdfObject):
+    """
+    An object that represents Extensible Metadata Platform (XMP) metadata.
+    Usually accessed by :py:attr:`xmp_metadata()<pypdf.PdfReader.xmp_metadata>`.
+
+    Raises:
+      PdfReadError: if XML is invalid
+
+    """
+
+    def __init__(self, stream: ContentStream) -> None:
+        self.stream = stream
+        try:
+            data = self.stream.get_data()
+            doc_root: Document = parseString(data)  # noqa: S318
+        except (AttributeError, ExpatError) as e:
+            raise PdfReadError(f"XML in XmpInformation was invalid: {e}")
+        self.rdf_root: XmlElement = doc_root.getElementsByTagNameNS(
+            RDF_NAMESPACE, "RDF"
+        )[0]
+        self.cache: dict[Any, Any] = {}
+
+    @classmethod
+    def create(cls) -> "XmpInformation":
+        """
+        Create a new XmpInformation object with minimal structure.
+
+        Returns:
+            A new XmpInformation instance with empty metadata fields.
+        """
+        stream = ContentStream(None, None)
+        stream.set_data(_MINIMAL_XMP.encode("utf-8"))
+        return cls(stream)
+
+    def write_to_stream(
+        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
+    ) -> None:
+        deprecate_with_replacement(
+            "XmpInformation.write_to_stream",
+            "PdfWriter.xmp_metadata",
+            "6.0.0"
+        )
+        if encryption_key is not None:  # deprecated
+            deprecation_no_replacement(
+                "the encryption_key parameter of write_to_stream", "5.0.0"
+            )
+        self.stream.write_to_stream(stream)
+
+    def get_element(self, about_uri: str, namespace: str, name: str) -> Iterator[Any]:
+        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
+                attr = desc.getAttributeNodeNS(namespace, name)
+                if attr is not None:
+                    yield attr
+                yield from desc.getElementsByTagNameNS(namespace, name)
+
+    def get_nodes_in_namespace(self, about_uri: str, namespace: str) -> Iterator[Any]:
+        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
+                for i in range(desc.attributes.length):
+                    attr = desc.attributes.item(i)
+                    if attr and attr.namespaceURI == namespace:
+                        yield attr
+                for child in desc.childNodes:
+                    if child.namespaceURI == namespace:
+                        yield child
+
+    def _get_text(self, element: XmlElement) -> str:
+        text = ""
+        for child in element.childNodes:
+            if child.nodeType == child.TEXT_NODE:
+                text += child.data
+        return text
+
+    def _get_single_value(
+        self,
+        namespace: str,
+        name: str,
+        converter: Callable[[str], Any] = _identity,
+    ) -> Optional[Any]:
+        cached = self.cache.get(namespace, {}).get(name)
+        if cached:
+            return cached
+        value = None
+        for element in self.get_element("", namespace, name):
+            if element.nodeType == element.ATTRIBUTE_NODE:
+                value = element.nodeValue
+            else:
+                value = self._get_text(element)
+            break
+        if value is not None:
+            value = converter(value)
+        ns_cache = self.cache.setdefault(namespace, {})
+        ns_cache[name] = value
+        return value
+
+    def _getter_bag(self, namespace: str, name: str) -> Optional[list[str]]:
+        cached = self.cache.get(namespace, {}).get(name)
+        if cached:
+            return cached
+        retval: list[str] = []
+        for element in self.get_element("", namespace, name):
+            if (bags := _generic_get(element, self, list_type="Bag")) is not None:
+                retval.extend(bags)
+            else:
+                value = self._get_text(element)
+                retval.append(value)
+        ns_cache = self.cache.setdefault(namespace, {})
+        ns_cache[name] = retval
+        return retval
+
+    def _get_seq_values(
+        self,
+        namespace: str,
+        name: str,
+        converter: Callable[[Any], Any] = _identity,
+    ) -> Optional[list[Any]]:
+        cached = self.cache.get(namespace, {}).get(name)
+        if cached:
+            return cached
+        retval: list[Any] = []
+        for element in self.get_element("", namespace, name):
+            if (seqs := _generic_get(element, self, list_type="Seq", converter=converter)) is not None:
+                retval.extend(seqs)
+            elif (bags := _generic_get(element, self, list_type="Bag")) is not None:
+                # See issue at https://github.com/py-pdf/pypdf/issues/3324
+                # Some applications violate the XMP metadata standard regarding `dc:creator` which should
+                # be an "ordered array" and thus a sequence, but use an unordered array (bag) instead.
+                # This seems to stem from the fact that the original Dublin Core specification does indeed
+                # use bags or direct values, while PDFs are expected to follow the XMP standard and ignore
+                # the plain Dublin Core variant. For this reason, add a fallback here to deal with such
+                # issues accordingly.
+                retval.extend(bags)
+            else:
+                value = converter(self._get_text(element))
+                retval.append(value)
+        ns_cache = self.cache.setdefault(namespace, {})
+        ns_cache[name] = retval
+        return retval
+
+    def _get_langalt_values(self, namespace: str, name: str) -> Optional[dict[Any, Any]]:
+        cached = self.cache.get(namespace, {}).get(name)
+        if cached:
+            return cached
+        retval: dict[Any, Any] = {}
+        for element in self.get_element("", namespace, name):
+            alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
+            if len(alts):
+                for alt in alts:
+                    for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
+                        value = self._get_text(item)
+                        retval[item.getAttribute("xml:lang")] = value
+            else:
+                retval["x-default"] = self._get_text(element)
+        ns_cache = self.cache.setdefault(namespace, {})
+        ns_cache[name] = retval
+        return retval
+
+    @property
+    def dc_contributor(self) -> Optional[list[str]]:
+        """Contributors to the resource (other than the authors)."""
+        return self._getter_bag(DC_NAMESPACE, "contributor")
+
+    @dc_contributor.setter
+    def dc_contributor(self, values: Optional[list[str]]) -> None:
+        self._set_bag_values(DC_NAMESPACE, "contributor", values)
+
+    @property
+    def dc_coverage(self) -> Optional[str]:
+        """Text describing the extent or scope of the resource."""
+        return self._get_single_value(DC_NAMESPACE, "coverage")
+
+    @dc_coverage.setter
+    def dc_coverage(self, value: Optional[str]) -> None:
+        self._set_single_value(DC_NAMESPACE, "coverage", value)
+
+    @property
+    def dc_creator(self) -> Optional[list[str]]:
+        """A sorted array of names of the authors of the resource, listed in order of precedence."""
+        return self._get_seq_values(DC_NAMESPACE, "creator")
+
+    @dc_creator.setter
+    def dc_creator(self, values: Optional[list[str]]) -> None:
+        self._set_seq_values(DC_NAMESPACE, "creator", values)
+
+    @property
+    def dc_date(self) -> Optional[list[datetime.datetime]]:
+        """A sorted array of dates of significance to the resource. The dates and times are in UTC."""
+        return self._get_seq_values(DC_NAMESPACE, "date", _converter_date)
+
+    @dc_date.setter
+    def dc_date(self, values: Optional[list[Union[str, datetime.datetime]]]) -> None:
+        if values is None:
+            self._set_seq_values(DC_NAMESPACE, "date", None)
+        else:
+            date_strings = []
+            for value in values:
+                if isinstance(value, datetime.datetime):
+                    date_strings.append(_format_datetime_utc(value))
+                else:
+                    date_strings.append(str(value))
+            self._set_seq_values(DC_NAMESPACE, "date", date_strings)
+
+    @property
+    def dc_description(self) -> Optional[dict[str, str]]:
+        """A language-keyed dictionary of textual descriptions of the content of the resource."""
+        return self._get_langalt_values(DC_NAMESPACE, "description")
+
+    @dc_description.setter
+    def dc_description(self, values: Optional[dict[str, str]]) -> None:
+        self._set_langalt_values(DC_NAMESPACE, "description", values)
+
+    @property
+    def dc_format(self) -> Optional[str]:
+        """The mime-type of the resource."""
+        return self._get_single_value(DC_NAMESPACE, "format")
+
+    @dc_format.setter
+    def dc_format(self, value: Optional[str]) -> None:
+        self._set_single_value(DC_NAMESPACE, "format", value)
+
+    @property
+    def dc_identifier(self) -> Optional[str]:
+        """Unique identifier of the resource."""
+        return self._get_single_value(DC_NAMESPACE, "identifier")
+
+    @dc_identifier.setter
+    def dc_identifier(self, value: Optional[str]) -> None:
+        self._set_single_value(DC_NAMESPACE, "identifier", value)
+
+    @property
+    def dc_language(self) -> Optional[list[str]]:
+        """An unordered array specifying the languages used in the resource."""
+        return self._getter_bag(DC_NAMESPACE, "language")
+
+    @dc_language.setter
+    def dc_language(self, values: Optional[list[str]]) -> None:
+        self._set_bag_values(DC_NAMESPACE, "language", values)
+
+    @property
+    def dc_publisher(self) -> Optional[list[str]]:
+        """An unordered array of publisher names."""
+        return self._getter_bag(DC_NAMESPACE, "publisher")
+
+    @dc_publisher.setter
+    def dc_publisher(self, values: Optional[list[str]]) -> None:
+        self._set_bag_values(DC_NAMESPACE, "publisher", values)
+
+    @property
+    def dc_relation(self) -> Optional[list[str]]:
+        """An unordered array of text descriptions of relationships to other documents."""
+        return self._getter_bag(DC_NAMESPACE, "relation")
+
+    @dc_relation.setter
+    def dc_relation(self, values: Optional[list[str]]) -> None:
+        self._set_bag_values(DC_NAMESPACE, "relation", values)
+
+    @property
+    def dc_rights(self) -> Optional[dict[str, str]]:
+        """A language-keyed dictionary of textual descriptions of the rights the user has to this resource."""
+        return self._get_langalt_values(DC_NAMESPACE, "rights")
+
+    @dc_rights.setter
+    def dc_rights(self, values: Optional[dict[str, str]]) -> None:
+        self._set_langalt_values(DC_NAMESPACE, "rights", values)
+
+    @property
+    def dc_source(self) -> Optional[str]:
+        """Unique identifier of the work from which this resource was derived."""
+        return self._get_single_value(DC_NAMESPACE, "source")
+
+    @dc_source.setter
+    def dc_source(self, value: Optional[str]) -> None:
+        self._set_single_value(DC_NAMESPACE, "source", value)
+
+    @property
+    def dc_subject(self) -> Optional[list[str]]:
+        """An unordered array of descriptive phrases or keywords that specify the topic of the content."""
+        return self._getter_bag(DC_NAMESPACE, "subject")
+
+    @dc_subject.setter
+    def dc_subject(self, values: Optional[list[str]]) -> None:
+        self._set_bag_values(DC_NAMESPACE, "subject", values)
+
+    @property
+    def dc_title(self) -> Optional[dict[str, str]]:
+        """A language-keyed dictionary of the title of the resource."""
+        return self._get_langalt_values(DC_NAMESPACE, "title")
+
+    @dc_title.setter
+    def dc_title(self, values: Optional[dict[str, str]]) -> None:
+        self._set_langalt_values(DC_NAMESPACE, "title", values)
+
+    @property
+    def dc_type(self) -> Optional[list[str]]:
+        """An unordered array of textual descriptions of the document type."""
+        return self._getter_bag(DC_NAMESPACE, "type")
+
+    @dc_type.setter
+    def dc_type(self, values: Optional[list[str]]) -> None:
+        self._set_bag_values(DC_NAMESPACE, "type", values)
+
+    @property
+    def pdf_keywords(self) -> Optional[str]:
+        """An unformatted text string representing document keywords."""
+        return self._get_single_value(PDF_NAMESPACE, "Keywords")
+
+    @pdf_keywords.setter
+    def pdf_keywords(self, value: Optional[str]) -> None:
+        self._set_single_value(PDF_NAMESPACE, "Keywords", value)
+
+    @property
+    def pdf_pdfversion(self) -> Optional[str]:
+        """The PDF file version, for example 1.0 or 1.3."""
+        return self._get_single_value(PDF_NAMESPACE, "PDFVersion")
+
+    @pdf_pdfversion.setter
+    def pdf_pdfversion(self, value: Optional[str]) -> None:
+        self._set_single_value(PDF_NAMESPACE, "PDFVersion", value)
+
+    @property
+    def pdf_producer(self) -> Optional[str]:
+        """The name of the tool that saved the document as a PDF."""
+        return self._get_single_value(PDF_NAMESPACE, "Producer")
+
+    @pdf_producer.setter
+    def pdf_producer(self, value: Optional[str]) -> None:
+        self._set_single_value(PDF_NAMESPACE, "Producer", value)
+
+    @property
+    def xmp_create_date(self) -> Optional[datetime.datetime]:
+        """The date and time the resource was originally created. Returned as a UTC datetime object."""
+        return self._get_single_value(XMP_NAMESPACE, "CreateDate", _converter_date)
+
+    @xmp_create_date.setter
+    def xmp_create_date(self, value: Optional[datetime.datetime]) -> None:
+        if value:
+            date_str = _format_datetime_utc(value)
+            self._set_single_value(XMP_NAMESPACE, "CreateDate", date_str)
+        else:
+            self._set_single_value(XMP_NAMESPACE, "CreateDate", None)
+
+    @property
+    def xmp_modify_date(self) -> Optional[datetime.datetime]:
+        """The date and time the resource was last modified. Returned as a UTC datetime object."""
+        return self._get_single_value(XMP_NAMESPACE, "ModifyDate", _converter_date)
+
+    @xmp_modify_date.setter
+    def xmp_modify_date(self, value: Optional[datetime.datetime]) -> None:
+        if value:
+            date_str = _format_datetime_utc(value)
+            self._set_single_value(XMP_NAMESPACE, "ModifyDate", date_str)
+        else:
+            self._set_single_value(XMP_NAMESPACE, "ModifyDate", None)
+
+    @property
+    def xmp_metadata_date(self) -> Optional[datetime.datetime]:
+        """The date and time that any metadata for this resource was last changed. Returned as a UTC datetime object."""
+        return self._get_single_value(XMP_NAMESPACE, "MetadataDate", _converter_date)
+
+    @xmp_metadata_date.setter
+    def xmp_metadata_date(self, value: Optional[datetime.datetime]) -> None:
+        if value:
+            date_str = _format_datetime_utc(value)
+            self._set_single_value(XMP_NAMESPACE, "MetadataDate", date_str)
+        else:
+            self._set_single_value(XMP_NAMESPACE, "MetadataDate", None)
+
+    @property
+    def xmp_creator_tool(self) -> Optional[str]:
+        """The name of the first known tool used to create the resource."""
+        return self._get_single_value(XMP_NAMESPACE, "CreatorTool")
+
+    @xmp_creator_tool.setter
+    def xmp_creator_tool(self, value: Optional[str]) -> None:
+        self._set_single_value(XMP_NAMESPACE, "CreatorTool", value)
+
+    @property
+    def xmpmm_document_id(self) -> Optional[str]:
+        """The common identifier for all versions and renditions of this resource."""
+        return self._get_single_value(XMPMM_NAMESPACE, "DocumentID")
+
+    @xmpmm_document_id.setter
+    def xmpmm_document_id(self, value: Optional[str]) -> None:
+        self._set_single_value(XMPMM_NAMESPACE, "DocumentID", value)
+
+    @property
+    def xmpmm_instance_id(self) -> Optional[str]:
+        """An identifier for a specific incarnation of a document, updated each time a file is saved."""
+        return self._get_single_value(XMPMM_NAMESPACE, "InstanceID")
+
+    @xmpmm_instance_id.setter
+    def xmpmm_instance_id(self, value: Optional[str]) -> None:
+        self._set_single_value(XMPMM_NAMESPACE, "InstanceID", value)
+
+    @property
+    def pdfaid_part(self) -> Optional[str]:
+        """The part of the PDF/A standard that the document conforms to (e.g., 1, 2, 3)."""
+        return self._get_single_value(PDFAID_NAMESPACE, "part")
+
+    @pdfaid_part.setter
+    def pdfaid_part(self, value: Optional[str]) -> None:
+        self._set_single_value(PDFAID_NAMESPACE, "part", value)
+
+    @property
+    def pdfaid_conformance(self) -> Optional[str]:
+        """The conformance level within the PDF/A standard (e.g., 'A', 'B', 'U')."""
+        return self._get_single_value(PDFAID_NAMESPACE, "conformance")
+
+    @pdfaid_conformance.setter
+    def pdfaid_conformance(self, value: Optional[str]) -> None:
+        self._set_single_value(PDFAID_NAMESPACE, "conformance", value)
+
+    @property
+    def custom_properties(self) -> dict[Any, Any]:
+        """
+        Retrieve custom metadata properties defined in the undocumented pdfx
+        metadata schema.
+
+        Returns:
+            A dictionary of key/value items for custom metadata properties.
+
+        """
+        if not hasattr(self, "_custom_properties"):
+            self._custom_properties = {}
+            for node in self.get_nodes_in_namespace("", PDFX_NAMESPACE):
+                key = node.localName
+                while True:
+                    # see documentation about PDFX_NAMESPACE earlier in file
+                    idx = key.find("\u2182")
+                    if idx == -1:
+                        break
+                    key = (
+                        key[:idx]
+                        + chr(int(key[idx + 1 : idx + 5], base=16))
+                        + key[idx + 5 :]
+                    )
+                if node.nodeType == node.ATTRIBUTE_NODE:
+                    value = node.nodeValue
+                else:
+                    value = self._get_text(node)
+                self._custom_properties[key] = value
+        return self._custom_properties
+
+    def _get_or_create_description(self, about_uri: str = "") -> XmlElement:
+        """Get or create an rdf:Description element with the given about URI."""
+        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
+            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
+                return desc
+
+        doc = self.rdf_root.ownerDocument
+        if doc is None:
+            raise XmpDocumentError("XMP Document is None")
+        desc = doc.createElementNS(RDF_NAMESPACE, "rdf:Description")
+        desc.setAttributeNS(RDF_NAMESPACE, "rdf:about", about_uri)
+        self.rdf_root.appendChild(desc)
+        return desc
+
+    def _clear_cache_entry(self, namespace: str, name: str) -> None:
+        """Remove a cached value for a given namespace/name if present."""
+        ns_cache = self.cache.get(namespace)
+        if ns_cache and name in ns_cache:
+            del ns_cache[name]
+
+    def _set_single_value(self, namespace: str, name: str, value: Optional[str]) -> None:
+        """Set or remove a single metadata value."""
+        self._clear_cache_entry(namespace, name)
+        desc = self._get_or_create_description()
+
+        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
+        for elem in existing_elements:
+            desc.removeChild(elem)
+
+        if existing_attr := desc.getAttributeNodeNS(namespace, name):
+            desc.removeAttributeNode(existing_attr)
+
+        if value is not None:
+            doc = self.rdf_root.ownerDocument
+            if doc is None:
+                raise XmpDocumentError("XMP Document is None")
+            prefix = self._get_namespace_prefix(namespace)
+            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
+            text_node = doc.createTextNode(str(value))
+            elem.appendChild(text_node)
+            desc.appendChild(elem)
+
+        self._update_stream()
+
+    def _set_bag_values(self, namespace: str, name: str, values: Optional[list[str]]) -> None:
+        """Set or remove bag values (unordered array)."""
+        self._clear_cache_entry(namespace, name)
+        desc = self._get_or_create_description()
+
+        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
+        for elem in existing_elements:
+            desc.removeChild(elem)
+
+        if values:
+            doc = self.rdf_root.ownerDocument
+            if doc is None:
+                raise XmpDocumentError("XMP Document is None")
+            prefix = self._get_namespace_prefix(namespace)
+            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
+            bag = doc.createElementNS(RDF_NAMESPACE, "rdf:Bag")
+
+            for value in values:
+                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
+                text_node = doc.createTextNode(str(value))
+                li.appendChild(text_node)
+                bag.appendChild(li)
+
+            elem.appendChild(bag)
+            desc.appendChild(elem)
+
+        self._update_stream()
+
+    def _set_seq_values(self, namespace: str, name: str, values: Optional[list[str]]) -> None:
+        """Set or remove sequence values (ordered array)."""
+        self._clear_cache_entry(namespace, name)
+        desc = self._get_or_create_description()
+
+        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
+        for elem in existing_elements:
+            desc.removeChild(elem)
+
+        if values:
+            doc = self.rdf_root.ownerDocument
+            if doc is None:
+                raise XmpDocumentError("XMP Document is None")
+            prefix = self._get_namespace_prefix(namespace)
+            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
+            seq = doc.createElementNS(RDF_NAMESPACE, "rdf:Seq")
+
+            for value in values:
+                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
+                text_node = doc.createTextNode(str(value))
+                li.appendChild(text_node)
+                seq.appendChild(li)
+
+            elem.appendChild(seq)
+            desc.appendChild(elem)
+
+        self._update_stream()
+
+    def _set_langalt_values(self, namespace: str, name: str, values: Optional[dict[str, str]]) -> None:
+        """Set or remove language alternative values."""
+        self._clear_cache_entry(namespace, name)
+        desc = self._get_or_create_description()
+
+        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
+        for elem in existing_elements:
+            desc.removeChild(elem)
+
+        if values:
+            doc = self.rdf_root.ownerDocument
+            if doc is None:
+                raise XmpDocumentError("XMP Document is None")
+            prefix = self._get_namespace_prefix(namespace)
+            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
+            alt = doc.createElementNS(RDF_NAMESPACE, "rdf:Alt")
+
+            for lang, value in values.items():
+                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
+                li.setAttribute("xml:lang", lang)
+                text_node = doc.createTextNode(str(value))
+                li.appendChild(text_node)
+                alt.appendChild(li)
+
+            elem.appendChild(alt)
+            desc.appendChild(elem)
+
+        self._update_stream()
+
+    def _get_namespace_prefix(self, namespace: str) -> str:
+        """Get the appropriate namespace prefix for a given namespace URI."""
+        return _NAMESPACE_PREFIX_MAP.get(namespace, "unknown")
+
+    def _update_stream(self) -> None:
+        """Update the stream with the current XML content."""
+        doc = self.rdf_root.ownerDocument
+        if doc is None:
+            raise XmpDocumentError("XMP Document is None")
+
+        xml_data = doc.toxml(encoding="utf-8")
+        self.stream.set_data(xml_data)