diff --git a/index.html b/index.html
index 5e010b9..ae4a6b9 100644
--- a/index.html
+++ b/index.html
@@ -13,8 +13,6 @@
-
-
@@ -28,11 +26,9 @@
import { DagloAPI } from 'https://actionpower.github.io/dagloapi-js-beta/lib/daglo-api.module.js';
document.getElementById('enableButton').addEventListener('click', async (event) => {
- const baseURL = document?.getElementById('host').value?.trim();
const dagloToken = document?.getElementById('token').value?.trim();
let client = new DagloAPI({
- host: baseURL,
apiToken: dagloToken
});
let transcriber = client.stream.transcriber();
@@ -46,8 +42,6 @@
document.getElementById('transcripts').append(span);
}
})
- // transcriber.start();
- // or
let stream;
try {
diff --git a/lib/daglo-api.module.js b/lib/daglo-api.module.js
index 8dea94a..5885447 100644
--- a/lib/daglo-api.module.js
+++ b/lib/daglo-api.module.js
@@ -16,10 +16,17 @@
import { AudioClassifier, FilesetResolver } from "https://actionpower.github.io/dagloapi-js-beta/lib/mediapipe-audio.module.js";
export class DagloAPI {
- constructor({apiToken = '', host = 'https://apis.daglo.ai'}) {
- host = host.replace(/\/+$/gim, '');
+ // Declare static class variables for common configuration
+ static DEFAULT_CONFIG = {
+ threshold: 0.65,
+ concatLevel: 3,
+ bufferSize: 8192,
+ frameSize: 3,
+ maxDuration : 20,
+ sampleRate: 44100,
+ };
+ constructor({apiToken = ''}) {
this.apiToken = apiToken;
- this.host = host;
}
stream = {
@@ -27,8 +34,8 @@ export class DagloAPI {
let lib = {
model: {
audioClassifier: undefined,
- audioCtx: undefined,
- audio: undefined,
+ audioCtx: undefined,
+ audio: undefined,
},
utils: {
__callback: {
@@ -71,25 +78,25 @@ export class DagloAPI {
view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
}
-
+
const numOfChannels = 1; // 단일 채널 (모노)
const numOfFrames = buffer.length;
const bytesPerSample = 2; // 16비트 PCM
-
+
// WAV 파일의 헤더 크기와 데이터 크기 계산
const headerSize = 44;
const dataSize = numOfFrames * numOfChannels * bytesPerSample;
const totalSize = headerSize + dataSize;
-
+
// WAV 파일을 위한 Uint8Array 생성
const wavBuffer = new ArrayBuffer(totalSize);
const view = new DataView(wavBuffer);
-
+
// RIFF 헤더 작성
writeString(view, 0, 'RIFF');
view.setUint32(4, totalSize - 8, true);
writeString(view, 8, 'WAVE');
-
+
// fmt 서브체크 작성
writeString(view, 12, 'fmt ');
view.setUint32(16, 16, true); // 서브체크 크기 (16바이트)
@@ -99,14 +106,14 @@ export class DagloAPI {
view.setUint32(28, sampleRate * numOfChannels * bytesPerSample, true); // 초당 바이트 수
view.setUint16(32, numOfChannels * bytesPerSample, true); // 블록 정렬
view.setUint16(34, bytesPerSample * 8, true); // 비트 수 (16비트)
-
+
// data 서브체크 작성
writeString(view, 36, 'data');
view.setUint32(40, dataSize, true);
-
+
// Float32Array 데이터를 16비트 PCM으로 변환하여 삽입
floatTo16BitPCM(view, 44, buffer);
-
+
return new Blob([view], { type: 'audio/wav' });
},
},
@@ -129,63 +136,87 @@ export class DagloAPI {
modelAssetPath: 'https://actionpower.github.io/dagloapi-js-beta/lib/audio.tflite'
}
})
-
+
if (typeof(lib.utils.__callback['open']) == 'function') {
lib.utils.__callback['open'](lib.utils.nanoid());
}
},
- start: async ({threshold = 0.65, concatLevel = 3, sttConfig = undefined} = {}) => {
- const constraints = { audio: true };
- let stream;
-
- const bufferSize = 8192;
- const frameSize = 3;
-
+ connect: async (stream) => {
+ const threshold = DagloAPI.DEFAULT_CONFIG.threshold;
+ const concatLevel = DagloAPI.DEFAULT_CONFIG.concatLevel;
+ const bufferSize = DagloAPI.DEFAULT_CONFIG.bufferSize;
+ const frameSize = DagloAPI.DEFAULT_CONFIG.frameSize;
+
let audioBuffer = new Float32Array(bufferSize * frameSize);
let SpeechBuffer = [];
let buffers = Array(frameSize).fill(null).map(() => Array(bufferSize).fill(0));
-
+
let bufferIdx = 0;
let nonSpeechTime = -1;
-
- if (sttConfig && sttConfig?.keywordBoost.enable == undefined && !sttConfig?.keywordBoost.keywords?.length) {
- sttConfig.keywordBoost.enable = true;
- }
- if (sttConfig?.keywordBoost?.boost) {
- sttConfig.keywordBoost.boost = Math.max(Math.min(15, Math.floor(sttConfig.keywordBoost.boost * 15)), 1);
- }
-
- try {
- stream = await navigator.mediaDevices.getUserMedia(constraints);
- }
- catch (err) {
- console.log("The following error occured: " + err);
- return alert("getUserMedia not supported on your browser");
- }
-
+
if (!lib.model.audioCtx) {
- lib.model.audioCtx = new AudioContext({ sampleRate: 44100 });
-
+ lib.model.audioCtx = new AudioContext({ sampleRate: DagloAPI.DEFAULT_CONFIG.sampleRate });
+
const source = lib.model.audioCtx.createMediaStreamSource(stream);
const scriptNode = lib.model.audioCtx.createScriptProcessor(bufferSize, 1, 1);
-
+
+ const processAudioBuffer = () => {
+ if (typeof(lib.utils.__callback['transcript']) == 'function' ||
+ typeof(lib.utils.__callback['vad']) == 'function') {
+
+ let audioBuffer = new Float32Array(bufferSize * (SpeechBuffer.length - Math.floor(concatLevel / 2)));
+
+ for (let i = 0; i < SpeechBuffer.length - Math.floor(concatLevel / 2); i++) {
+ audioBuffer.set(SpeechBuffer[i], i * bufferSize);
+ }
+
+ const wavBlob = lib.utils.float32ToWav(audioBuffer, DagloAPI.DEFAULT_CONFIG.sampleRate);
+
+ if (typeof(lib.utils.__callback['vad']) == 'function') {
+ lib.utils.__callback['vad'](audioBuffer, wavBlob);
+ }
+
+ const formData = new FormData();
+ formData.append('file', wavBlob, 'recording.wav');
+
+ fetch(`https://apis.daglo.ai/stt/v1/sync/transcripts`, {
+ method: 'POST',
+ headers: {
+ 'Authorization': 'Bearer ' + this.apiToken
+ },
+ body: formData,
+ })
+ .then(response => response.json())
+ .then(data => {
+ if (typeof(lib.utils.__callback['transcript']) == 'function') {
+ lib.utils.__callback['transcript']({text: data?.sttResult?.transcript});
+ }
+ })
+ .catch(error => {
+ console.error('Error:', error);
+ });
+ }
+ };
+
scriptNode.onaudioprocess = (audioProcessingEvent) => {
const inputBuffer = audioProcessingEvent.inputBuffer;
let inputData = inputBuffer.getChannelData(0);
let _inputData = Object.assign([], inputData);
buffers[bufferIdx % frameSize] = _inputData;
-
+
+ const MAX_SAMPLES = DagloAPI.DEFAULT_CONFIG.maxDuration * DagloAPI.DEFAULT_CONFIG.sampleRate;
+
for (let i = 0; i < frameSize; i++) {
audioBuffer.set(buffers[(i + bufferIdx + 1) % frameSize], i * bufferSize);
}
-
+
bufferIdx++;
-
+
const result = lib.model.audioClassifier.classify(audioBuffer);
const categories = result[0].classifications[0].categories;
const output = categories.filter((item) => item?.score >= threshold);
const speechIdx = output.findIndex((item)=>item.categoryName == 'Speech');
-
+
if (speechIdx != -1) {
SpeechBuffer.push(_inputData);
nonSpeechTime = 0;
@@ -193,47 +224,7 @@ export class DagloAPI {
if (++nonSpeechTime < concatLevel) {
SpeechBuffer.push(_inputData);
} else {
- if (typeof(lib.utils.__callback['transcript']) == 'function' || typeof(lib.utils.__callback['vad']) == 'function') {
- let audioBuffer = new Float32Array(bufferSize * (SpeechBuffer.length - Math.floor(concatLevel / 2)));
-
- for (let i = 0; i < SpeechBuffer.length - Math.floor(concatLevel / 2); i++) {
- audioBuffer.set(SpeechBuffer[i], i * bufferSize);
- }
-
- const wavBlob = lib.utils.float32ToWav(audioBuffer, 44100);
-
- if (typeof(lib.utils.__callback['vad']) == 'function') {
- lib.utils.__callback['vad'](audioBuffer, wavBlob);
- }
-
- // FormData 객체 생성
- const formData = new FormData();
- formData.append('file', wavBlob, 'record.wav');
-
- if (sttConfig) {
- formData.append('sttConfig', JSON.stringify(sttConfig));
- }
-
- // Daglo transcript API
- fetch(`${this.host}/stt/v1/sync/transcripts`, {
- method: 'POST',
- headers: {
- 'Authorization': 'Bearer ' + this.apiToken
- },
- body: formData,
- })
- .then(response => response.json())
- .then(data => {
- if (typeof(lib.utils.__callback['transcript']) == 'function') {
- lib.utils.__callback['transcript']({text: data?.sttResult?.transcript}); //, audioBuffer, wavBlob
- }
- })
- .catch(error => {
- console.error('Error:', error);
- });
- }
-
- // buffers = Array(frameSize).fill(null).map(() => Array(bufferSize).fill(0));
+ processAudioBuffer();
nonSpeechTime = -1;
SpeechBuffer = [];
}
@@ -242,125 +233,11 @@ export class DagloAPI {
SpeechBuffer = [];
SpeechBuffer.push(_inputData);
}
-
- if (typeof(lib.utils.__callback['debug']) == 'function') {
- lib.utils.__callback['debug'](output);
- }
- };
- source.connect(scriptNode);
- scriptNode.connect(lib.model.audioCtx.destination);
- }
- else if (lib.model.audioCtx.state === "running") {
- await lib.model.audioCtx.suspend();
-
- if (typeof(lib.utils.__callback['stop']) == 'function') {
- lib.utils.__callback['stop']();
- }
-
- return;
- }
-
- audioBuffer = new Float32Array(bufferSize * frameSize);
- SpeechBuffer = [];
- buffers = Array(frameSize).fill(null).map(() => Array(bufferSize).fill(0));
-
- bufferIdx = 0;
- nonSpeechTime = -1;
-
- await lib.model.audioCtx.resume();
-
- if (typeof(lib.utils.__callback['start']) == 'function') {
- lib.utils.__callback['start']();
- }
- },
- connect: async (stream) => {
- const threshold = 0.65;
- const concatLevel = 3;
- const bufferSize = 8192;
- const frameSize = 3;
-
- let audioBuffer = new Float32Array(bufferSize * frameSize);
- let SpeechBuffer = [];
- let buffers = Array(frameSize).fill(null).map(() => Array(bufferSize).fill(0));
-
- let bufferIdx = 0;
- let nonSpeechTime = -1;
-
- if (!lib.model.audioCtx) {
- lib.model.audioCtx = new AudioContext({ sampleRate: 44100 });
-
- const source = lib.model.audioCtx.createMediaStreamSource(stream);
- const scriptNode = lib.model.audioCtx.createScriptProcessor(bufferSize, 1, 1);
-
- scriptNode.onaudioprocess = (audioProcessingEvent) => {
- const inputBuffer = audioProcessingEvent.inputBuffer;
- let inputData = inputBuffer.getChannelData(0);
- let _inputData = Object.assign([], inputData);
- buffers[bufferIdx % frameSize] = _inputData;
-
- for (let i = 0; i < frameSize; i++) {
- audioBuffer.set(buffers[(i + bufferIdx + 1) % frameSize], i * bufferSize);
- }
-
- bufferIdx++;
-
- const result = lib.model.audioClassifier.classify(audioBuffer);
- const categories = result[0].classifications[0].categories;
- const output = categories.filter((item) => item?.score >= threshold);
- const speechIdx = output.findIndex((item) => item.categoryName == 'Speech');
-
- if (speechIdx != -1) {
- SpeechBuffer.push(_inputData);
- nonSpeechTime = 0;
- } else if (nonSpeechTime != -1) {
- if (++nonSpeechTime < concatLevel) {
- SpeechBuffer.push(_inputData);
- } else {
- if (typeof(lib.utils.__callback['transcript']) == 'function' || typeof(lib.utils.__callback['vad']) == 'function') {
- let audioBuffer = new Float32Array(bufferSize * (SpeechBuffer.length - Math.floor(concatLevel / 2)));
-
- for (let i = 0; i < SpeechBuffer.length - Math.floor(concatLevel / 2); i++) {
- audioBuffer.set(SpeechBuffer[i], i * bufferSize);
- }
-
- const wavBlob = lib.utils.float32ToWav(audioBuffer, 44100);
-
- if (typeof(lib.utils.__callback['vad']) == 'function') {
- lib.utils.__callback['vad'](audioBuffer, wavBlob);
- }
-
- // FormData 객체 생성
- const formData = new FormData();
- formData.append('file', wavBlob, 'recording.wav'); // 서버로 전송할 때 파일 이름 지정
-
- // API 호출 (예시: fetch 사용)
- fetch(`${this.host}/stt/v1/sync/transcripts`, {
- method: 'POST',
- headers: {
- 'Authorization': 'Bearer ' + this.apiToken
- },
- body: formData,
- })
- .then(response => response.json())
- .then(data => {
- console.log(data);
- if (typeof(lib.utils.__callback['transcript']) == 'function') {
- lib.utils.__callback['transcript']({text: data?.sttResult?.transcript}); //, audioBuffer, wavBlob
- }
- })
- .catch(error => {
- console.error('Error:', error);
- });
- }
-
- // buffers = Array(frameSize).fill(null).map(() => Array(bufferSize).fill(0));
- nonSpeechTime = -1;
- SpeechBuffer = [];
- }
- } else {
+
+ if (SpeechBuffer.length * bufferSize >= MAX_SAMPLES) {
+ processAudioBuffer();
nonSpeechTime = -1;
SpeechBuffer = [];
- SpeechBuffer.push(_inputData);
}
};
source.connect(scriptNode);
@@ -368,20 +245,19 @@ export class DagloAPI {
}
else if (lib.model.audioCtx.state === "running") {
await lib.model.audioCtx.suspend();
-
return;
}
-
+
audioBuffer = new Float32Array(bufferSize * frameSize);
SpeechBuffer = [];
buffers = Array(frameSize).fill(null).map(() => Array(bufferSize).fill(0));
-
+
bufferIdx = 0;
nonSpeechTime = -1;
-
+
await lib.model.audioCtx.resume();
}
- }
+ };
lib.init();