#!/usr/bin/env bash # Descarcă un video (Facebook, YouTube etc.), extrage audio, transcrie cu Whisper. # Usage: ./transcribe_video.sh [language] # ./transcribe_video.sh [language] --save-kb # Exemple: # ./transcribe_video.sh "https://www.facebook.com/share/v/1EdPt3q2sq/" # ./transcribe_video.sh "https://www.facebook.com/share/r/1akfPJYvTw/" ro --save-kb # ./transcribe_video.sh "https://youtu.be/xyz" ro set -euo pipefail URL="${1:-}" LANG="${2:-en}" SAVE_KB=0 # Parse flags for arg in "$@"; do if [[ "$arg" == "--save-kb" ]]; then SAVE_KB=1 fi done WORKDIR="/tmp/transcribe_$$" PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" KB_DIR="$PROJECT_ROOT/memory/kb" if [[ -z "$URL" ]]; then echo "Usage: $0 [language (default: en)] [--save-kb]" exit 1 fi export PATH="/home/moltbot/bin:$PATH" mkdir -p "$WORKDIR" trap 'rm -rf "$WORKDIR"' EXIT echo "→ Obțin informații video..." INFO_JSON=$(yt-dlp "$URL" --dump-json --no-download -q 2>/dev/null || echo "{}") TITLE=$(echo "$INFO_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('title','Unknown'))" 2>/dev/null || echo "Unknown") CREATOR=$(echo "$INFO_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('uploader') or d.get('channel') or '')" 2>/dev/null || echo "") DURATION=$(echo "$INFO_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=d.get('duration',0); print(f'{s//60}:{s%60:02d}')" 2>/dev/null || echo "?") echo "→ Descarc video: $TITLE..." yt-dlp "$URL" -o "$WORKDIR/video.%(ext)s" --no-playlist -q VIDEO_FILE=$(ls "$WORKDIR"/video.* 2>/dev/null | head -1) if [[ -z "$VIDEO_FILE" ]]; then echo "Eroare: descărcarea a eșuat." exit 1 fi echo "→ Extrag audio..." ffmpeg -i "$VIDEO_FILE" -vn -acodec pcm_s16le -ar 16000 -ac 1 "$WORKDIR/audio.wav" -y -loglevel error echo "→ Transcriu cu Whisper (model: small, limbă: $LANG)..." TRANSCRIPT=$(python3 -c " import whisper model = whisper.load_model('small') result = model.transcribe('$WORKDIR/audio.wav', language='$LANG') print(result['text']) " 2>/dev/null) echo "" echo "=== $TITLE ===" echo "$TRANSCRIPT" echo "" echo "✓ Transcriere completă." if [[ "$SAVE_KB" == "1" ]]; then DATE=$(date +%Y-%m-%d) # Slug din titlu: lowercase, fără diacritice, doar alfanumerice și cratime SLUG=$(echo "$TITLE" | python3 -c " import sys, re, unicodedata s = sys.stdin.read().strip() s = unicodedata.normalize('NFD', s) s = ''.join(c for c in s if unicodedata.category(c) != 'Mn') s = s.lower() s = re.sub(r'[^a-z0-9]+', '-', s) s = s.strip('-')[:50] print(s) ") # Detectează categoria din URL if echo "$URL" | grep -qi "facebook\.com"; then CATEGORY="facebook" FORMAT="Reel (~${DURATION} min)" elif echo "$URL" | grep -qi "youtube\.com\|youtu\.be"; then CATEGORY="youtube" FORMAT="Video (~${DURATION} min)" else CATEGORY="media" FORMAT="Video (~${DURATION} min)" fi NOTE_DIR="$KB_DIR/$CATEGORY" mkdir -p "$NOTE_DIR" NOTE_FILE="$NOTE_DIR/${DATE}_${SLUG}.md" cat > "$NOTE_FILE" << NOTEEOF # $TITLE **Sursa:** $URL **Data:** $DATE **Creator:** $CREATOR **Format:** $FORMAT **Tags:** @coaching --- ## TL;DR --- ## Transcrierea $TRANSCRIPT NOTEEOF echo "" echo "→ Notiță salvată: $NOTE_FILE" echo "→ Reindexez KB..." python3 "$PROJECT_ROOT/tools/update_notes_index.py" echo "✓ KB actualizat. Link: /echo/files.html#memory/kb/$CATEGORY/${DATE}_${SLUG}.md" fi