Skip to content

Commit

Permalink
misc changes
Browse files Browse the repository at this point in the history
  • Loading branch information
pathnirvana committed Jul 25, 2023
1 parent 49a55dc commit a86de0c
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion dev-prompts/combine-lists.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import fs from 'fs'
import {loadPrompts} from '../common_functions.js'
import lodash from 'lodash'

const rootFolder = 'sinhala-prompts',
const rootFolder = 'dev-prompts',
textLengthToTimeRatio = 0.1035, // median from 11 test recordings from Ven mettananada
maxPromptLength = 13 / textLengthToTimeRatio

Expand Down
2 changes: 1 addition & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ <h1>Sinhala Text to Speech Demo</h1>
The training took about 24 hours.</p>
<p>The trained model is included in the releases section of this repository <a href="https://github.com/pathnirvana/coqui-tts/releases">here</a></p>
<p>Example texts for the following voice synthesis come from the <a href="https://tipitaka.lk/">Buddha Jayanthi Tipiṭaka</a> and <a href="https://pitaka.lk/books/app-index.html">Buddhist Books App</a></p>
<p>TODO: Currently even thought there are two speakers in the dataset only the male voice was used for the training since when trained with both speakers' voices the synthesis quality was not good.
<p>TODO: Currently even though there are two speakers in the dataset only the male voice was used for the training since when trained with both speakers' voices the synthesis quality was not good.
</div>
<div class="audio-group">
<h2>Buddhist Text</h2>
Expand Down
10 changes: 5 additions & 5 deletions prompt-creator-sinhala.js → prompt-creator.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ const minSplitPromptLength = 10, maxTimeNeeded = 3600 * 20,
console.log(`max prompt length ${maxPromptLength}, dataset name ${datasetName}`)
const selected = {}, selectedFiles = {}, selectedTypes = {}
let countSelected = 0, timeSelected = 0, filesUsed = 0, promptsConsidered = 0, entriesConsidered = 0
const allSyls = JSON.parse(fs.readFileSync(`sinhala-prompts/all-syls.json`, 'utf-8'))
const syls = JSON.parse(fs.readFileSync(`sinhala-prompts/syls-syl-1.json`, 'utf-8')) //{select: {}, need: {}} //
const allSyls = JSON.parse(fs.readFileSync(`dev-prompts/all-syls.json`, 'utf-8'))
const syls = JSON.parse(fs.readFileSync(`dev-prompts/syls-syl-1.json`, 'utf-8')) //{select: {}, need: {}} //

const files = fs.readdirSync(textInputFolder).filter(f => f.endsWith('json') && !f.startsWith('atta') && !f.startsWith('anya'))
lodash.shuffle(files).forEach(file => {
Expand All @@ -94,12 +94,12 @@ lodash.shuffle(files).forEach(file => {
})

syls.need = Object.fromEntries(Object.entries(allSyls).filter(([s, c]) => !syls.select[s] && c >= 2))
fs.writeFileSync(`sinhala-prompts/syls-${datasetName}.json`, jsb(syls, null, '\t', 100), 'utf-8')
fs.writeFileSync(`sinhala-prompts/prompts-${datasetName}.txt`, Object.entries(selected)
fs.writeFileSync(`dev-prompts/syls-${datasetName}.json`, jsb(syls, null, '\t', 100), 'utf-8')
fs.writeFileSync(`dev-prompts/prompts-${datasetName}.txt`, Object.entries(selected)
.sort((a, b) => a[1].type.localeCompare(b[1].type))
.map(([text, {type, file, length}], i) => `${i + 1}\t${type}\t${file.slice(0, -5)}\t${length}\n${text.replace(/ x /g, '\n')}`).join('\n\n'), 'utf-8')

// fs.writeFileSync(`sinhala-prompts/prompts-common.txt`, Object.entries(allPrompts)
// fs.writeFileSync(`dev-prompts/prompts-common.txt`, Object.entries(allPrompts)
// .sort((a, b) => sortLength(b) - sortLength(a)).slice(0, 1000).sort((a, b) => a[0].localeCompare(b[0]))
// .map(([text, count], i) => [i, count, sortLength([text, count]), text].join('\t')).join('\n'), 'utf-8')

Expand Down

0 comments on commit a86de0c

Please sign in to comment.