
/*

Purpose:
This file handles the transcription of speech to text using AWS Transcribe

*/

import { TranscribeStreamingClient } from "@aws-sdk/client-transcribe-streaming";
import { StartStreamTranscriptionCommand } from "@aws-sdk/client-transcribe-streaming";
import { TranslateClient, TranslateTextCommand } from "@aws-sdk/client-translate";


const MicrophoneStream = require('microphone-stream').default;
const { Buffer } = require('node:buffer');

const REGION = "us-east-1";
const SAMPLE_RATE = 44100;
/** @type {MicrophoneStream | undefined} */
let microphoneStream = undefined;
/** @type {TranscribeStreamingClient | undefined} */
let transcribeClient = undefined;

const isPartial = true;


const CovertList = {
    "zh": "zh-CN",
    "ja" :"ja-JP",
    "en" : "en-US",
    "fr-CA": "fr-CA",
    "fr": "fr-FR",
    "de": "de-DE",
    "hi": "hi-IN",
    "it": "it-IT",
    "ko": "ko-KR",
    "pt": "pt-BR",
    "sl": "es-US",
    "th": "th-TH"
}

const LanguageWithoutSpace = ["ja-JP", "zh-CN", "th-TH"];

export function getLanguageCode(language) {
    const langCode = CovertList[language];
    if (langCode) {
        return langCode;
    }
}

export const TranscribeResultType = Object.freeze({
    Finished: 'Finished',
    TranslateFinished: 'TranslateFinished',
    Partial: 'Partial',
    Begin: 'Begin',
    TranslatePartial: 'TranslatePartial'

})

const EnglishGapSize = 108;
const ZhCNGapSize = 24;

export function detectOS() {
    let os = 'Unknown';

    // First try with navigator.platform
    const platform = navigator.platform.toLowerCase();
    if (platform.includes('win')) os = 'Windows';
    if (platform.includes('mac')) os = 'MacOS';
    if (platform.includes('linux')) os = 'Linux';
    if (platform.includes('iphone') || platform.includes('ipad')) os = 'iOS';
    if (platform.includes('android')) os = 'Android';

    // Fallback to more detailed checks with navigator.userAgent if necessary
    if (os !== 'Unknown') {
        const userAgent = navigator.userAgent;
        if (userAgent.includes("Win")) os = "Windows";
        if (userAgent.includes("Mac")) os = "MacOS";
        if (userAgent.includes("X11")) os = "UNIX";
        if (userAgent.includes("Linux")) os = "Linux";
        if (userAgent.includes("Android")) os = "Android";
        if (userAgent.includes("like Mac")) os = "iOS";
    }

    return os;
}

export function detectBrowser() {
    const userAgent = navigator.userAgent;

    // Browser detection
    if (userAgent.includes('Firefox')) {
        return 'Mozilla Firefox';
    } else if (userAgent.includes('SamsungBrowser')) {
        return 'Samsung Internet';
    } else if (userAgent.includes('Opera') || userAgent.includes('OPR')) {
        return 'Opera';
    } else if (userAgent.includes('Trident')) {
        return 'Microsoft Internet Explorer';
    } else if (userAgent.includes('Edge')) {
        return 'Microsoft Edge';
    } else if (userAgent.includes('Chrome')) {
        return 'Google Chrome';
    } else if (userAgent.includes('Safari')) {
        return 'Apple Safari';
    } else {
        return 'Unknown';
    }
}

export const stopRecording = function () {
    if (microphoneStream) {
        microphoneStream.stop();
        microphoneStream.destroy();
        microphoneStream = undefined;
    }
    if (transcribeClient) {
        transcribeClient.destroy();
        transcribeClient = undefined;
    }

};

export const createTranscribeClient = (creds) => {
    transcribeClient = new TranscribeStreamingClient({
        region: REGION,
        credentials: creds
    });
};

const startStreaming = async (creds, fromLanguage, toLanguage, callback) => {
    console.log("=================> will start real streaming!!")

    let command;
    let autoFrom = false;
    let autoTo = false;
    if (toLanguage === 'auto') {
        autoTo = true;
    }
    let partialTranslatedLength = 0;

    if (fromLanguage.indexOf(",") < 0) {
        command = new StartStreamTranscriptionCommand({
            LanguageCode: fromLanguage,
            MediaEncoding: "pcm",
            MediaSampleRateHertz: SAMPLE_RATE,
            AudioStream: getAudioStream()
        });
    } else {
        autoFrom = true;
        command = new StartStreamTranscriptionCommand({
            MediaEncoding: "pcm",
            MediaSampleRateHertz: SAMPLE_RATE,
            AudioStream: getAudioStream(),
            IdentifyMultipleLanguages: true,
            LanguageOptions: fromLanguage
        });
    }


    try {
        const data = await transcribeClient.send(command);
        for await (const event of data.TranscriptResultStream) {
            for (const result of event.TranscriptEvent.Transcript.Results || []) {
                const noOfResults = result.Alternatives[0].Items.length;
                let resultString = "";
                let space = "";
                if (LanguageWithoutSpace.indexOf(fromLanguage) <= 0) {
                    //this language need space between word
                    space = " ";
                }

                for (let i = 0; i < noOfResults; i++) {
                    resultString = resultString + result.Alternatives[0].Items[i].Content + space;
                }
                const resultLanguageCode = autoFrom ? result.LanguageCode : fromLanguage;
                if (result.IsPartial === false) {
                    partialTranslatedLength = 0;
                    callback(resultString, TranscribeResultType.Finished);
                    let TranslatedText = "";
                    if (autoFrom === false) {
                        TranslatedText = await translateTextFromLanguageToLanguage(creds, resultString, resultLanguageCode, toLanguage);
                    } else {

                        if (autoTo === true) {
                            const targetLanguages = fromLanguage.split(",");
                            for (const element of targetLanguages) {
                                if (element !== resultLanguageCode) {
                                    TranslatedText = TranslatedText + "\n" + await translateTextFromLanguageToLanguage(creds, resultString, resultLanguageCode, element);
                                } else {
                                    TranslatedText = TranslatedText + "\n" + resultString;
                                }
                            }
                        } else {
                            TranslatedText = await translateTextFromLanguageToLanguage(creds, resultString, resultLanguageCode, toLanguage);
                        }

                    }

                    callback(TranslatedText, TranscribeResultType.TranslateFinished);

                } else {
                    if (isPartial) {
                        callback(resultString, TranscribeResultType.Partial);
                        const currentTranscribedSize = resultString.length - partialTranslatedLength;

                        let gapSize = EnglishGapSize;
                        if (resultLanguageCode === "ja-JP" || resultLanguageCode === "zh-CN") {
                            gapSize = ZhCNGapSize;
                        }
                        // check how many words are not translated. If the size is large enough, send back some translated
                        // text first.
                        if (currentTranscribedSize >= gapSize) {
                            const TranslatedTextTmp = await translateTextFromLanguageToLanguage(creds, resultString, resultLanguageCode, toLanguage);
                            callback(TranslatedTextTmp, TranscribeResultType.TranslatePartial);
                            partialTranslatedLength = resultString.length;
                        }
                    } else {
                        callback(resultString, TranscribeResultType.Begin);
                    }
                }
            }
        }
    } catch (error) {
        console.error("Error during streaming transcription: ", error.message);
        throw error;
    }


};

const inputLabel = 'audioinput';
const blackHole = "blackhole";

let combinedAudioTracks = new MediaStream();

const setAllMicStreams = async () => {

    let selectedDevice = "";

    await navigator.mediaDevices.getUserMedia({audio: true, video: false});
    await navigator.mediaDevices.enumerateDevices()
        .then(function(devices) {
            devices.forEach(function(device) {
                console.log(device.kind + ": " + device.label +
                    " id = " + device.deviceId);
                const tmpLabel = device.label.toLowerCase();
                if (device.kind === inputLabel && tmpLabel.indexOf(blackHole) >= 0){
                    selectedDevice = device.deviceId;
                    console.log("will use " + selectedDevice + " as the input device");
                }
            });
        })
        .catch(function(err) {
            console.log("=======> meet error when enumerate the audio devices " + err.message);
            alert("meet error when check the audio devices " + err.message + ". Please restart the audio translation")
        });

    if (combinedAudioTracks.getTracks().length === 0) {
    const micAudioStream = await captureAndCombineMicrophoneAudio();//getAllAudioInputs();
    // Check if micAudioStream is not null and add its tracks
    if (micAudioStream) {
        micAudioStream.getAudioTracks().forEach(track => {
                combinedAudioTracks.addTrack(track.clone());

        });
    }
    microphoneStream = new MicrophoneStream();
    microphoneStream.setStream(micAudioStream);
    } else {
        microphoneStream = new MicrophoneStream();
        const micAudioStream = new MediaStream();
        combinedAudioTracks.getAudioTracks().forEach(track => {
            micAudioStream.addTrack(track.clone());
        });
        microphoneStream.setStream(micAudioStream);
    }

};

async function captureAndCombineMicrophoneAudio() {
    const audioContext = new AudioContext();
    const destination = audioContext.createMediaStreamDestination();
    try {
        const devices = await navigator.mediaDevices.enumerateDevices();
        const micDevices = devices.filter(device => device.kind === 'audioinput');

        for (const mic of micDevices) {
            const stream = await navigator.mediaDevices.getUserMedia({ audio: { deviceId: mic.deviceId }});
            const source = audioContext.createMediaStreamSource(stream);
            const gain = audioContext.createGain();
            gain.gain.value = 1.0;  // Adjust volume
            source.connect(gain);
            gain.connect(destination);
        }

        return destination.stream;
    } catch (error) {
        console.error("Error capturing and combining audio:", error);
    }
}

async function getAllAudioInputs() {
    try {
        await navigator.mediaDevices.getUserMedia({audio: true, video: false});
        const devices = await navigator.mediaDevices.enumerateDevices();
        const audioDevices = devices.filter(device => device.kind === 'audioinput');
        const audioOnlyStream = new MediaStream();

        for (const device of audioDevices) {
            const stream = await navigator.mediaDevices.getUserMedia({
                audio: { deviceId: device.deviceId }
            });
            stream.getAudioTracks().forEach(track => {
                audioOnlyStream.addTrack(track);
            });
        }

        return audioOnlyStream;
    } catch (error) {
        console.error("Error capturing audio from multiple devices:", error);
        return null; // Return null in case of error
    }
}

const createDisplayMediaStream = async () => {

    const options = {audio: true, video: true,};
    const displaySurface = 'monitor'
    options.video = {displaySurface};
    const currentOS = detectOS();

    if (currentOS === "MacOS" || currentOS === "iOS") {
        await setAllMicStreams();
    } else {
        if (combinedAudioTracks.getTracks().length === 0) {
        const UserMediaStream = await window.navigator.mediaDevices.getDisplayMedia(options);
        // Listen for the 'inactive' event on the MediaStream
        UserMediaStream.oninactive = () => {
            console.log('The user has stopped sharing the screen.');
        };

        // Alternatively, listen for the 'ended' event on audio track
        UserMediaStream.getTracks().forEach(track => {
            if (track.kind === 'audio') {
                track.onended = () => {
                        if (combinedAudioTracks.getTracks().length > 0) {
                            combinedAudioTracks.getTracks().forEach(track => {
                                track.stop();
                                combinedAudioTracks.removeTrack(track);
                            });

                            combinedAudioTracks = new MediaStream();
                        }
                    console.log('A track has ended, possibly because the user stopped sharing.');
                    alert("You have stopped the sharing. If you accidentally did it, please restart the voice translation if needed.")

                };
            }
        });

        const micAudioStream = await getAllAudioInputs(); // Get all microphone inputs

            const composedStream = new MediaStream();

        if (UserMediaStream.getAudioTracks().length > 0) {
            //merge the system audio with the mic audio
            let context = new AudioContext();
            let audioDestination = context.createMediaStreamDestination();

            const systemSource = context.createMediaStreamSource(UserMediaStream);
            const systemGain = context.createGain();
            systemGain.gain.value = 1.0;
            systemSource.connect(audioDestination);
            console.log("added system audio");

            if (micAudioStream && micAudioStream.getAudioTracks().length > 0) {
                const micSource = context.createMediaStreamSource(micAudioStream);
                const micGain = context.createGain();
                micGain.gain.value = 1.0;
                micSource.connect(audioDestination);
                console.log("added mic audio");
            }

            audioDestination.stream.getAudioTracks().forEach(function(audioTrack) {
                    combinedAudioTracks.addTrack(audioTrack.clone());
                composedStream.addTrack(audioTrack);
            });
                microphoneStream = new MicrophoneStream();
                microphoneStream.setStream(composedStream);

        }

        } else {
            console.log("=============> will re-use the audio");
            microphoneStream = new MicrophoneStream();
            const micAudioStream = new MediaStream();
            combinedAudioTracks.getAudioTracks().forEach(track => {
                micAudioStream.addTrack(track.clone());
            });

            microphoneStream.setStream(micAudioStream);
    }

    }
};

export const startRecordingSpeaker = async (creds, inputlanguage, toLanguage, callback) => {
    if (!inputlanguage) {
        return false;
    }
    if (!toLanguage) {
        return false;
    }
    if (microphoneStream || transcribeClient) {
        stopRecording();
    }
    createTranscribeClient(creds);
    await createDisplayMediaStream();
    await startStreaming(creds, inputlanguage, toLanguage, callback);
};

const getAudioStream = async function* () {
    if (!microphoneStream) {
        throw new Error(
            "Cannot get audio stream. microphoneStream is not initialized.",
        );
    }

    for await (const chunk of /** @type {[][]} */ (microphoneStream)) {
        if (chunk.length <= SAMPLE_RATE) {
            yield {
                AudioEvent: {
                    AudioChunk: encodePCMChunk(chunk),
                },
            };
        }
    }
};

const encodePCMChunk = (chunk) => {
    /** @type {Float32Array} */
    const input = MicrophoneStream.toRaw(chunk);
    let offset = 0;
    const buffer = new ArrayBuffer(input.length * 2);
    const view = new DataView(buffer);
    for (let i = 0; i < input.length; i++, offset += 2) {
        let s = Math.max(-1, Math.min(1, input[i]));
        view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
    }
    return Buffer.from(buffer);
};

let translateClientforTranscribe = undefined;

const translateTextFromLanguageToLanguage = async (creds, text, sourceLanguage, targetLanguage) => {
    const translateParams = {
        Text: text,
        SourceLanguageCode: sourceLanguage,
        TargetLanguageCode: targetLanguage,
    };
    const data = await translateClientforTranscribe.send(
        new TranslateTextCommand(translateParams)
    );
    return data.TranslatedText;

}
const createTranslateClient = (creds) => {
    return new TranslateClient({
        region: REGION,//awsID.REGION,
        credentials: creds
    })
};

export function createTranslateClientforTranscribe(creds) {

    translateClientforTranscribe = new TranslateClient({
        region: REGION,//awsID.REGION,
        credentials: creds
    });

}
