#!/bin/bash
# Reel Villarroel v4
# Estructura: Roger on-camera 4s → voiceover → calle Barcelona → terraza cierre
# Audio: voz real Roger (0-15s) + ElevenLabs "Centro de Barcelona" (15-17s) + música 20%
# Total objetivo: ~21s

RAW="/root/instagram-insights/reels/villarroel/raw"
TMP="/tmp/reel_v4"
OUT="/root/instagram-insights/reels/villarroel/villarroel_v4.mp4"

mkdir -p "$TMP"

echo "=== Reel Villarroel v4 ==="
echo ""

VIDEO_OPTS="-c:v libx264 -preset slow -crf 23 -pix_fmt yuv420p"
SCALE_FILTER="scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2:color=black,setsar=1,fps=30"

# ─── PASO 1: Audio de Roger (0-15s de IMG_2324) ─────────────────────────────
echo "[1/8] Extrayendo voz Roger (IMG_2324 t=0-15s)..."
ffmpeg -y \
  -i "$RAW/IMG_2324 2.MOV" \
  -ss 0 -t 15 \
  -vn -c:a aac -b:a 128k \
  "$TMP/voz_roger.aac" 2>/dev/null
echo "  OK (15s)"

# ─── PASO 2: Línea ElevenLabs "Centro de Barcelona" (ya generada) ────────────
echo "[2/8] Línea Barcelona ElevenLabs..."
if [ ! -f "$TMP/barcelona_line.mp3" ]; then
  curl -s \
    -H "xi-api-key: sk_3a5e40483ba9507798f87dad9ca76a3740c21888e19cbaf3" \
    -H "Content-Type: application/json" \
    -d '{
      "text": "Ático en el centro de Barcelona.",
      "model_id": "eleven_multilingual_v2",
      "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}
    }' \
    "https://api.elevenlabs.io/v1/text-to-speech/uYPRVzK9mOV2Vt16MamC" \
    -o "$TMP/barcelona_line.mp3"
fi
cp /tmp/barcelona_line.mp3 "$TMP/barcelona_line.mp3" 2>/dev/null || true
echo "  OK"

# ─── PASO 3: Música ambient ──────────────────────────────────────────────────
if [ ! -f "$TMP/musica_bg.mp3" ]; then
  echo "[3/8] Generando música ambient..."
  python3 - <<'PYEOF'
import wave, struct, math, array

sample_rate = 44100
duration = 30
num_samples = sample_rate * duration

chords = [
    [220.0, 261.63, 329.63],
    [196.0, 246.94, 293.66],
    [174.61, 220.0, 261.63],
    [196.0, 246.94, 293.66],
]

samples = array.array('h')
for i in range(num_samples):
    t = i / sample_rate
    chord_idx = int(t / (duration / len(chords))) % len(chords)
    chord = chords[chord_idx]
    fade = 1.0
    if t < 2.0:
        fade = t / 2.0
    elif t > (duration - 3.0):
        fade = (duration - t) / 3.0
    val = 0
    for freq in chord:
        val += math.sin(2 * math.pi * freq * t) * 0.25
        val += math.sin(2 * math.pi * freq * 2 * t) * 0.08
    samples.append(int(val * 16000 * fade))

with wave.open('/tmp/reel_v4/musica_bg.wav', 'w') as f:
    f.setnchannels(1)
    f.setsampwidth(2)
    f.setframerate(sample_rate)
    f.writeframes(samples.tobytes())
PYEOF
  ffmpeg -y -i "$TMP/musica_bg.wav" -c:a libmp3lame -b:a 128k "$TMP/musica_bg.mp3" 2>/dev/null
  echo "  OK"
else
  echo "[3/8] Música reutilizada"
fi

# ─── PASO 4: Segmentos de vídeo ─────────────────────────────────────────────
echo "[4/8] Codificando segmentos de vídeo..."

# Seg 1: Roger a cámara (4s)
echo "  Seg 1 — Roger on-camera (IMG_2324 t=0-4s)..."
ffmpeg -y -i "$RAW/IMG_2324 2.MOV" -ss 0 -t 4 \
  -vf "$SCALE_FILTER" -an $VIDEO_OPTS "$TMP/seg1.mp4" 2>/dev/null

# Seg 2: Toldo retractándose → cielo azul (4s)
echo "  Seg 2 — Toldo retractándose (IMG_2358 t=1-5s)..."
ffmpeg -y -i "$RAW/IMG_2358 2.MOV" -ss 1 -t 4 \
  -vf "$SCALE_FILTER" -an $VIDEO_OPTS "$TMP/seg2.mp4" 2>/dev/null

# Seg 3: Tejido detalle (3s)
echo "  Seg 3 — Detalle tejido (IMG_2344 t=0-3s)..."
ffmpeg -y -i "$RAW/IMG_2344 2.MOV" -ss 0 -t 3 \
  -vf "$SCALE_FILTER" -an $VIDEO_OPTS "$TMP/seg3.mp4" 2>/dev/null

# Seg 4: Toldo extendiéndose (4s)
echo "  Seg 4 — Toldo extendiéndose (IMG_2351 t=3-7s)..."
ffmpeg -y -i "$RAW/IMG_2351 2.MOV" -ss 3 -t 4 \
  -vf "$SCALE_FILTER" -an $VIDEO_OPTS "$TMP/seg4.mp4" 2>/dev/null

# Seg 5: Calle con árboles desde el ático - "Centro de Barcelona" (3s)
echo "  Seg 5 — Calle Barcelona desde ático (IMG_2320 t=1-4s)..."
ffmpeg -y -i "$RAW/IMG_2320 2.MOV" -ss 1 -t 3 \
  -vf "$SCALE_FILTER" -an $VIDEO_OPTS "$TMP/seg5.mp4" 2>/dev/null

# Seg 6: Terraza lifestyle — cierre (3s)
echo "  Seg 6 — Terraza final (IMG_2373 t=2-5s)..."
ffmpeg -y -i "$RAW/IMG_2373 2.MOV" -ss 2 -t 3 \
  -vf "$SCALE_FILTER" -an $VIDEO_OPTS "$TMP/seg6.mp4" 2>/dev/null
echo "  OK — 6 segmentos listos (4+4+3+4+3+3 = 21s)"

# ─── PASO 5: Concat vídeo ────────────────────────────────────────────────────
echo "[5/8] Concatenando vídeo..."
cat > "$TMP/concat.txt" << 'EOF'
file 'seg1.mp4'
file 'seg2.mp4'
file 'seg3.mp4'
file 'seg4.mp4'
file 'seg5.mp4'
file 'seg6.mp4'
EOF

ffmpeg -y -f concat -safe 0 -i "$TMP/concat.txt" -c:v copy "$TMP/video_only.mp4" 2>/dev/null
echo "  OK"

# ─── PASO 6: Construir pista de audio completa ───────────────────────────────
# Roger (0-15s) + Barcelona (15-17s) = narración total ~17s
# Música ambient por encima de todo, al 20%
echo "[6/8] Construyendo pista de narración (Roger + Barcelona)..."
ffmpeg -y \
  -i "$TMP/voz_roger.aac" \
  -i "$TMP/barcelona_line.mp3" \
  -filter_complex "[0:a][1:a]concat=n=2:v=0:a=1[narracion]" \
  -map "[narracion]" \
  -c:a aac -b:a 128k \
  "$TMP/narracion.aac" 2>/dev/null
echo "  OK"

echo "[7/8] Mezclando narración + música (20%)..."
ffmpeg -y \
  -i "$TMP/narracion.aac" \
  -i "$TMP/musica_bg.mp3" \
  -filter_complex "[0:a]volume=1.0[voz];[1:a]volume=0.20[musica];[voz][musica]amix=inputs=2:duration=first:dropout_transition=2[audio_mix]" \
  -map "[audio_mix]" \
  -c:a aac -b:a 128k \
  "$TMP/audio_mix.aac" 2>/dev/null
echo "  OK"

# ─── PASO 7: Combinar vídeo + audio ─────────────────────────────────────────
echo "[8/8] Render final..."
ffmpeg -y \
  -i "$TMP/video_only.mp4" \
  -i "$TMP/audio_mix.aac" \
  -c:v copy -c:a aac -shortest \
  "$OUT" 2>/dev/null

DURACION=$(ffprobe -v quiet -show_entries format=duration -of csv=p=0 "$OUT" 2>/dev/null | xargs printf "%.1f")
SIZE=$(ls -lh "$OUT" | awk '{print $5}')
echo ""
echo "========================================="
echo "  REEL V4 LISTO"
echo "  Archivo : $OUT"
echo "  Duracion: ${DURACION}s"
echo "  Tamano  : ${SIZE}"
echo "========================================="