130 lines
3.4 KiB
Bash
Executable File
130 lines
3.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Descarcă un video (Facebook, YouTube etc.), extrage audio, transcrie cu Whisper.
|
|
# Usage: ./transcribe_video.sh <URL> [language]
|
|
# ./transcribe_video.sh <URL> [language] --save-kb
|
|
# Exemple:
|
|
# ./transcribe_video.sh "https://www.facebook.com/share/v/1EdPt3q2sq/"
|
|
# ./transcribe_video.sh "https://www.facebook.com/share/r/1akfPJYvTw/" ro --save-kb
|
|
# ./transcribe_video.sh "https://youtu.be/xyz" ro
|
|
|
|
set -euo pipefail
|
|
|
|
URL="${1:-}"
|
|
LANG="${2:-en}"
|
|
SAVE_KB=0
|
|
|
|
# Parse flags
|
|
for arg in "$@"; do
|
|
if [[ "$arg" == "--save-kb" ]]; then
|
|
SAVE_KB=1
|
|
fi
|
|
done
|
|
|
|
WORKDIR="/tmp/transcribe_$$"
|
|
PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
|
KB_DIR="$PROJECT_ROOT/memory/kb"
|
|
|
|
if [[ -z "$URL" ]]; then
|
|
echo "Usage: $0 <URL> [language (default: en)] [--save-kb]"
|
|
exit 1
|
|
fi
|
|
|
|
export PATH="/home/moltbot/bin:$PATH"
|
|
|
|
mkdir -p "$WORKDIR"
|
|
trap 'rm -rf "$WORKDIR"' EXIT
|
|
|
|
echo "→ Obțin informații video..."
|
|
INFO_JSON=$(yt-dlp "$URL" --dump-json --no-download -q 2>/dev/null || echo "{}")
|
|
TITLE=$(echo "$INFO_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('title','Unknown'))" 2>/dev/null || echo "Unknown")
|
|
CREATOR=$(echo "$INFO_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('uploader') or d.get('channel') or '')" 2>/dev/null || echo "")
|
|
DURATION=$(echo "$INFO_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=d.get('duration',0); print(f'{s//60}:{s%60:02d}')" 2>/dev/null || echo "?")
|
|
|
|
echo "→ Descarc video: $TITLE..."
|
|
yt-dlp "$URL" -o "$WORKDIR/video.%(ext)s" --no-playlist -q
|
|
|
|
VIDEO_FILE=$(ls "$WORKDIR"/video.* 2>/dev/null | head -1)
|
|
if [[ -z "$VIDEO_FILE" ]]; then
|
|
echo "Eroare: descărcarea a eșuat."
|
|
exit 1
|
|
fi
|
|
|
|
echo "→ Extrag audio..."
|
|
ffmpeg -i "$VIDEO_FILE" -vn -acodec pcm_s16le -ar 16000 -ac 1 "$WORKDIR/audio.wav" -y -loglevel error
|
|
|
|
echo "→ Transcriu cu Whisper (model: small, limbă: $LANG)..."
|
|
TRANSCRIPT=$(python3 -c "
|
|
import whisper
|
|
model = whisper.load_model('small')
|
|
result = model.transcribe('$WORKDIR/audio.wav', language='$LANG')
|
|
print(result['text'])
|
|
" 2>/dev/null)
|
|
|
|
echo ""
|
|
echo "=== $TITLE ==="
|
|
echo "$TRANSCRIPT"
|
|
echo ""
|
|
echo "✓ Transcriere completă."
|
|
|
|
if [[ "$SAVE_KB" == "1" ]]; then
|
|
DATE=$(date +%Y-%m-%d)
|
|
|
|
# Slug din titlu: lowercase, fără diacritice, doar alfanumerice și cratime
|
|
SLUG=$(echo "$TITLE" | python3 -c "
|
|
import sys, re, unicodedata
|
|
s = sys.stdin.read().strip()
|
|
s = unicodedata.normalize('NFD', s)
|
|
s = ''.join(c for c in s if unicodedata.category(c) != 'Mn')
|
|
s = s.lower()
|
|
s = re.sub(r'[^a-z0-9]+', '-', s)
|
|
s = s.strip('-')[:50]
|
|
print(s)
|
|
")
|
|
|
|
# Detectează categoria din URL
|
|
if echo "$URL" | grep -qi "facebook\.com"; then
|
|
CATEGORY="facebook"
|
|
FORMAT="Reel (~${DURATION} min)"
|
|
elif echo "$URL" | grep -qi "youtube\.com\|youtu\.be"; then
|
|
CATEGORY="youtube"
|
|
FORMAT="Video (~${DURATION} min)"
|
|
else
|
|
CATEGORY="media"
|
|
FORMAT="Video (~${DURATION} min)"
|
|
fi
|
|
|
|
NOTE_DIR="$KB_DIR/$CATEGORY"
|
|
mkdir -p "$NOTE_DIR"
|
|
NOTE_FILE="$NOTE_DIR/${DATE}_${SLUG}.md"
|
|
|
|
cat > "$NOTE_FILE" << NOTEEOF
|
|
# $TITLE
|
|
|
|
**Sursa:** $URL
|
|
**Data:** $DATE
|
|
**Creator:** $CREATOR
|
|
**Format:** $FORMAT
|
|
**Tags:** @coaching
|
|
|
|
---
|
|
|
|
## TL;DR
|
|
|
|
<!-- Completează un rezumat de 2-3 rânduri -->
|
|
|
|
---
|
|
|
|
## Transcrierea
|
|
|
|
$TRANSCRIPT
|
|
NOTEEOF
|
|
|
|
echo ""
|
|
echo "→ Notiță salvată: $NOTE_FILE"
|
|
|
|
echo "→ Reindexez KB..."
|
|
python3 "$PROJECT_ROOT/tools/update_notes_index.py"
|
|
|
|
echo "✓ KB actualizat. Link: /echo/files.html#memory/kb/$CATEGORY/${DATE}_${SLUG}.md"
|
|
fi
|