./kaisetsu-app/src/components/DownloadModal.tsx
'use client';
import { useState } from 'react';
import {
X,
Download,
FileText,
Check,
Mic,
BookOpen,
Volume2,
Headphones,
Loader2
} from 'lucide-react';
interface TimelineBlock {
id: string;
type: 'dialogue' | 'narration' | 'silence';
startTime: number;
endTime?: number;
speaker?: string;
text?: string;
voiceType?: string;
timecode?: string;
audioUrl?: string;
}
interface DownloadOption {
id: string;
label: string;
description: string;
icon: React.ReactNode;
format: string;
}
interface DebugData {
whisperSegments?: unknown[];
preGeneratedMeta?: string;
audioMetaRaw?: string;
visualMetaRaw?: string;
}
interface DownloadModalProps {
isOpen: boolean;
onClose: () => void;
blocks?: TimelineBlock[];
videoFile?: File | null;
tcOffsetSeconds?: number;
debugData?: DebugData | null;
}
// --- WAV encoding helpers ---
function writeString(view: DataView, offset: number, str: string) {
for (let i = 0; i < str.length; i++) {
view.setUint8(offset + i, str.charCodeAt(i));
}
}
function encodeWAV(audioBuffer: AudioBuffer): ArrayBuffer {
const numChannels = audioBuffer.numberOfChannels;
const sampleRate = audioBuffer.sampleRate;
const bitsPerSample = 16;
const numSamples = audioBuffer.length;
const dataLength = numSamples * numChannels * (bitsPerSample / 8);
const buffer = new ArrayBuffer(44 + dataLength);
const view = new DataView(buffer);
writeString(view, 0, 'RIFF');
view.setUint32(4, 36 + dataLength, true);
writeString(view, 8, 'WAVE');
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true); // PCM
view.setUint16(22, numChannels, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * numChannels * (bitsPerSample / 8), true);
view.setUint16(32, numChannels * (bitsPerSample / 8), true);
view.setUint16(34, bitsPerSample, true);
writeString(view, 36, 'data');
view.setUint32(40, dataLength, true);
const channels: Float32Array[] = [];
for (let i = 0; i < numChannels; i++) {
channels.push(audioBuffer.getChannelData(i));
}
let offset = 44;
for (let i = 0; i < numSamples; i++) {
for (let ch = 0; ch < numChannels; ch++) {
const sample = Math.max(-1, Math.min(1, channels[ch][i]));
view.setInt16(offset, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
offset += 2;
}
}
return buffer;
}
function downloadBlob(blob: Blob, filename: string) {
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
// --- Component ---
export default function DownloadModal({
isOpen,
onClose,
blocks = [],
videoFile,
tcOffsetSeconds = 35985,
debugData
}: DownloadModalProps) {
const hasNarrationAudio = blocks.some((b) => b.type === 'narration' && b.text);
const downloadOptions: DownloadOption[] = [
{
id: 'script',
label: '原稿データ',
description: '解説音声の原稿のみ(TC + テキスト)',
icon: <Mic className="w-5 h-5" />,
format: 'TXT'
},
{
id: 'all_script',
label: 'オールの台本',
description: 'セリフ・解説音声を含む全ての台本',
icon: <BookOpen className="w-5 h-5" />,
format: 'TXT'
},
{
id: 'transcript',
label: '番組の文字起こし',
description: '出演者のセリフのみの文字起こし',
icon: <FileText className="w-5 h-5" />,
format: 'TXT'
},
{
id: 'narration_audio',
label: '解説放送単体',
description: 'ナレーション音声のみをTC位置に配置した1本のWAV',
icon: <Volume2 className="w-5 h-5" />,
format: 'WAV'
},
{
id: 'full_mix',
label: 'フルミックス',
description: '元番組音声 + ナレーション音声をミックス',
icon: <Headphones className="w-5 h-5" />,
format: 'WAV'
},
{
id: 'whisper_json',
label: 'Whisper出力',
description: 'Whisper音声認識の生データ(セグメント一覧)',
icon: <FileText className="w-5 h-5" />,
format: 'JSON'
},
{
id: 'pre_generated_meta',
label: 'Whisper事前生成メタ',
description: 'Whisperから生成した音声メタ(silence区間含む)',
icon: <FileText className="w-5 h-5" />,
format: 'TXT'
},
{
id: 'gemini_audio_meta',
label: 'Gemini音声メタ',
description: 'Geminiが解析した音声メタデータ(生出力)',
icon: <FileText className="w-5 h-5" />,
format: 'TXT'
},
{
id: 'gemini_visual_meta',
label: 'Gemini映像メタ',
description: 'Geminiが解析した映像メタデータ(生出力)',
icon: <FileText className="w-5 h-5" />,
format: 'TXT'
}
];
const [selectedOptions, setSelectedOptions] = useState<Set<string>>(
new Set(['script'])
);
const [isDownloading, setIsDownloading] = useState(false);
const [downloadError, setDownloadError] = useState<string | null>(null);
const [downloadProgress, setDownloadProgress] = useState('');
const toggleOption = (id: string) => {
const newSelected = new Set(selectedOptions);
if (newSelected.has(id)) {
newSelected.delete(id);
} else {
newSelected.add(id);
}
setSelectedOptions(newSelected);
};
const selectAll = () => {
setSelectedOptions(new Set(downloadOptions.map((o) => o.id)));
};
const formatTC = (seconds: number): string => {
const abs = seconds + tcOffsetSeconds;
const hrs = Math.floor(abs / 3600);
const mins = Math.floor((abs % 3600) / 60);
const secs = Math.floor(abs % 60);
const frames = Math.floor((seconds % 1) * 30);
return `${hrs.toString().padStart(2, '0')};${mins.toString().padStart(2, '0')};${secs.toString().padStart(2, '0')};${frames.toString().padStart(2, '0')}`;
};
const downloadTextFile = (data: string, filename: string) => {
downloadBlob(new Blob([data], { type: 'text/plain;charset=utf-8' }), filename);
};
// Prepare narration audio buffers (generate TTS if needed, then decode all)
async function prepareNarrationAudio(
audioCtx: AudioContext
): Promise<{ buffer: AudioBuffer; startTime: number }[]> {
const narrationBlocks = blocks.filter((b) => b.type === 'narration' && b.text);
const decoded: { buffer: AudioBuffer; startTime: number }[] = [];
for (let i = 0; i < narrationBlocks.length; i++) {
const block = narrationBlocks[i];
let audioUrl = block.audioUrl;
// Generate TTS if no audio URL
if (!audioUrl) {
setDownloadProgress(`TTS生成中... (${i + 1}/${narrationBlocks.length})`);
const savedDict = localStorage.getItem('kaisetsu-user-dictionary');
const dictionary = savedDict ? JSON.parse(savedDict) : [];
const ttsResponse = await fetch('/api/tts', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
text: block.text,
voiceType: block.voiceType || '7nO7lVCISGqz9Dhm3AAx',
dictionary,
}),
});
if (!ttsResponse.ok) {
console.error(`TTS failed for block ${block.id}`);
continue;
}
const blob = await ttsResponse.blob();
audioUrl = URL.createObjectURL(blob);
}
// Decode audio
setDownloadProgress(`音声デコード中... (${i + 1}/${narrationBlocks.length})`);
try {
const response = await fetch(audioUrl);
const arrayBuffer = await response.arrayBuffer();
const buffer = await audioCtx.decodeAudioData(arrayBuffer);
decoded.push({ buffer, startTime: block.startTime });
} catch (err) {
console.error(`Failed to decode audio for block ${block.id}:`, err);
}
}
return decoded;
}
// Render narration-only WAV
async function renderNarrationOnly(
decodedNarrations: { buffer: AudioBuffer; startTime: number }[]
): Promise<ArrayBuffer> {
let totalDuration = 0;
for (const { buffer, startTime } of decodedNarrations) {
totalDuration = Math.max(totalDuration, startTime + buffer.duration);
}
totalDuration += 1; // 1s buffer
const sampleRate = 44100;
const offlineCtx = new OfflineAudioContext(1, Math.ceil(totalDuration * sampleRate), sampleRate);
for (const { buffer, startTime } of decodedNarrations) {
const source = offlineCtx.createBufferSource();
source.buffer = buffer;
source.connect(offlineCtx.destination);
source.start(startTime);
}
const rendered = await offlineCtx.startRendering();
return encodeWAV(rendered);
}
// Render full mix (video audio + narration with ducking)
async function renderFullMix(
videoAudioBuffer: AudioBuffer,
decodedNarrations: { buffer: AudioBuffer; startTime: number }[]
): Promise<ArrayBuffer> {
const totalDuration = videoAudioBuffer.duration;
const sampleRate = videoAudioBuffer.sampleRate;
const numChannels = videoAudioBuffer.numberOfChannels;
const offlineCtx = new OfflineAudioContext(
numChannels,
Math.ceil(totalDuration * sampleRate),
sampleRate
);
// Video audio with ducking gain
const videoSource = offlineCtx.createBufferSource();
videoSource.buffer = videoAudioBuffer;
const videoGain = offlineCtx.createGain();
videoGain.gain.setValueAtTime(1.0, 0);
// Calculate ducking regions (merge overlapping narrations)
const narrationPeriods = decodedNarrations
.map((n) => ({ start: n.startTime, end: n.startTime + n.buffer.duration }))
.sort((a, b) => a.start - b.start);
const mergedPeriods: { start: number; end: number }[] = [];
for (const period of narrationPeriods) {
const last = mergedPeriods[mergedPeriods.length - 1];
if (last && period.start - last.end < 1.0) {
last.end = Math.max(last.end, period.end);
} else {
mergedPeriods.push({ start: period.start, end: period.end });
}
}
// Apply ducking automation
const fadeTime = 0.3;
for (const period of mergedPeriods) {
const duckStart = Math.max(0, period.start - fadeTime);
videoGain.gain.setValueAtTime(1.0, duckStart);
videoGain.gain.linearRampToValueAtTime(0.15, period.start);
videoGain.gain.setValueAtTime(0.15, period.end);
videoGain.gain.linearRampToValueAtTime(1.0, period.end + fadeTime);
}
videoSource.connect(videoGain);
videoGain.connect(offlineCtx.destination);
videoSource.start(0);
// Add narration audio
for (const { buffer, startTime } of decodedNarrations) {
const source = offlineCtx.createBufferSource();
source.buffer = buffer;
source.connect(offlineCtx.destination);
source.start(startTime);
}
const rendered = await offlineCtx.startRendering();
return encodeWAV(rendered);
}
const handleDownload = async () => {
setIsDownloading(true);
setDownloadError(null);
setDownloadProgress('');
try {
const narrationBlocks = blocks.filter((b) => b.type === 'narration' && b.text);
const dialogueBlocks = blocks.filter((b) => b.type === 'dialogue' && b.text);
// --- Text downloads ---
if (selectedOptions.has('script')) {
const lines = narrationBlocks.map((b) => {
const tc = b.timecode || formatTC(b.startTime);
return `${tc}\t${b.speaker || '解説音声'}\t${b.text}`;
});
const header = 'TC\t話者\t発話内容';
downloadTextFile([header, ...lines].join('\n'), 'narration_script.txt');
}
if (selectedOptions.has('all_script')) {
const allBlocks = blocks
.filter((b) => (b.type === 'dialogue' || b.type === 'narration') && b.text)
.sort((a, b) => a.startTime - b.startTime);
const lines = allBlocks.map((b) => {
const tc = b.timecode || formatTC(b.startTime);
const speaker = b.type === 'narration'
? (b.speaker || '解説音声')
: (b.speaker || '話者');
return `${tc}\t${speaker}\t${b.text}`;
});
const header = 'TC\t話者\t発話内容';
downloadTextFile([header, ...lines].join('\n'), 'full_script.txt');
}
if (selectedOptions.has('transcript')) {
const lines = dialogueBlocks.map((b) => {
const tc = b.timecode || formatTC(b.startTime);
return `${tc}\t${b.speaker || '話者'}\t${b.text}`;
});
const header = 'TC\t話者\t発話内容';
downloadTextFile([header, ...lines].join('\n'), 'transcript.txt');
}