Update cron, dashboard, root +3 more (+1 ~11)
This commit is contained in:
@@ -812,6 +812,51 @@ def _tts_synthesize(text: str, voice: str) -> dict:
|
||||
return {"ok": False, "error": str(e)}
|
||||
|
||||
|
||||
def split_text_chunks(text: str, max_chars: int = 1500) -> list[str]:
|
||||
"""Împarte text în chunks pe paragrafe fără a depăși max_chars."""
|
||||
import re as _re
|
||||
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||
if not paragraphs:
|
||||
paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
|
||||
|
||||
chunks: list[str] = []
|
||||
current_parts: list[str] = []
|
||||
current_len = 0
|
||||
|
||||
for para in paragraphs:
|
||||
if len(para) > max_chars:
|
||||
if current_parts:
|
||||
chunks.append("\n\n".join(current_parts))
|
||||
current_parts = []
|
||||
current_len = 0
|
||||
sentences = _re.split(r'(?<=[.!?])\s+', para)
|
||||
for sent in sentences:
|
||||
if current_len + len(sent) + 1 > max_chars and current_parts:
|
||||
chunks.append(" ".join(current_parts))
|
||||
current_parts = [sent]
|
||||
current_len = len(sent)
|
||||
else:
|
||||
current_parts.append(sent)
|
||||
current_len += len(sent) + 1
|
||||
elif current_len + len(para) + 2 > max_chars and current_parts:
|
||||
chunks.append("\n\n".join(current_parts))
|
||||
current_parts = [para]
|
||||
current_len = len(para)
|
||||
else:
|
||||
current_parts.append(para)
|
||||
current_len += len(para) + 2
|
||||
|
||||
if current_parts:
|
||||
chunks.append("\n\n".join(current_parts))
|
||||
|
||||
return chunks if chunks else [text[:max_chars]]
|
||||
|
||||
|
||||
def extract_url_text(url: str) -> str | None:
|
||||
"""Extrage textul principal dintr-un URL (publică)."""
|
||||
return _extract_url_text(url)
|
||||
|
||||
|
||||
def _extract_url_text(url: str) -> str | None:
|
||||
"""Extrage textul principal dintr-un URL cu trafilatura."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user