Skip to content

Commit

Permalink
Merge pull request #2 from kumarks1122/main
Browse files Browse the repository at this point in the history
Audio file and config file uploading feature and invalid option audio playing functionality added
  • Loading branch information
maheshkumargangula authored Feb 8, 2024
2 parents 17c8ac4 + 9e2c541 commit d38d155
Show file tree
Hide file tree
Showing 16 changed files with 1,772 additions and 130 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/nodejs-docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: Docker Image CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Build the Docker image
run: cd nodejs/voice-streaming && docker build . --file Dockerfile --tag hyd.ocir.io/ax2cel5zyviy/ekstep-djp/ivrs:latest
- name: Log in to Docker Hub
uses: docker/login-action@v1
with:
registry: hyd.ocir.io
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Push the docker image
run: docker push hyd.ocir.io/ax2cel5zyviy/ekstep-djp/ivrs:latest
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
**/node_modules
**/bruno
venv/**
**/.DS_Store
output.wav
.vscode
**/__pycache__
temp_audio_files/**
drive-files/**
temp_converted_files/**
!temp_audio_files/.keep
!temp_converted_files/.keep
!temp_converted_files/.keep
!drive-files/.keep
51 changes: 50 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,50 @@
# ivrs-bot
# MyJP IVRS Bot

## Steps to prerequisites:

### Step 1
Install python 3.11

### Step 2
Install dependency libraries with below command
```
pip install -r requirements.txt
```

## Steps To Upload audio files to cloud

### Step 1

Get the audio files and place the audio files in the `drive-files` folder

### Step 2

Update the config file `config.yaml` with cloud storage credentials in below format

```
region_name: ""
aws_secret_access_key: ""
aws_access_key_id: ""
endpoint_url: ""
```

### Step 3

Create the csv file in below format.
```
filename,category,language
"32-Track 1 Bangla PoemPoem Rhymes for eJadui Pitara.mp3",song,bengali
"33-Track 2 Bangla Poem Rhymes for eJadui Pitara.mp3",song,bengali
```

### Step 4

Run the below command to upload the audio files and update the ivrs_config.json file

```
python generate-config.py csv_file_name
# Example
python generate-config.py audios.csv
```
1 change: 0 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

app = Flask(__name__)
sockets = Sockets(app)
connection_chunks = json.loads('{}')

from api import api
app.register_blueprint(api)
Expand Down
53 changes: 33 additions & 20 deletions audio_socket.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pydub.utils import make_chunks
from flask import Blueprint
from datetime import datetime
from urllib.parse import parse_qs
from urllib import parse
from telemetry import Telemetry

import os
Expand All @@ -17,42 +17,36 @@

audio_socket = Blueprint('audio_socket', __name__)
duration = 20 # no of milliseconds of each base64 string from audio file
audio_types = ["story", "rhyme", "riddle"]
audio_types = ["story", "song", "riddle"]

AUDIO_CACHE = {}

def get_audio(input_selector, language):

if input_selector > 2:
return None
def get_audio(audio_key):

urlData = os.environ['IVRS_CONFIG_URL']
webURL = downloader.urlopen(urlData)
data = webURL.read()
encoding = webURL.info().get_content_charset('utf-8')
config = json.loads(data.decode(encoding))

audio_type = audio_types[input_selector]
no_of_audios = len(config[f"{audio_type}:{language}"])
if audio_key not in config:
return None

no_of_audios = len(config[audio_key])

day_of_year = datetime.today().timetuple().tm_yday
mod_day_no = int(day_of_year % no_of_audios)

audio_index = no_of_audios if mod_day_no == 0 else mod_day_no

return config[f"{audio_type}:{language}"][audio_index - 1]
return config[audio_key][audio_index - 1]

def get_chunks(input_selector, language, file_path):
audio_type = audio_types[input_selector]
def get_chunks(audio_key, file_path):

day_of_year = datetime.today().timetuple().tm_yday
path = Path(file_path)
filename = path.stem + path.suffix.split("?")[0]

cache_key = f"{audio_type}:{language}"

if cache_key in AUDIO_CACHE:
chunk_detail = AUDIO_CACHE[cache_key]
if audio_key in AUDIO_CACHE:
chunk_detail = AUDIO_CACHE[audio_key]

if chunk_detail['cached_on'] == day_of_year:
return chunk_detail['chunks']
Expand Down Expand Up @@ -84,7 +78,7 @@ def get_chunks(input_selector, language, file_path):
}
})

AUDIO_CACHE[cache_key] = {'cached_on': day_of_year, 'chunks': chunks_array}
AUDIO_CACHE[audio_key] = {'cached_on': day_of_year, 'chunks': chunks_array}

remove_temp_file(local_file_path)
remove_temp_file(local_converted_file_path)
Expand All @@ -111,6 +105,7 @@ def echo(ws, language):
event = request_payload['event']

if event == 'start':
print("inside start")
did = hashlib.md5(request_payload['start']['from'].encode()).hexdigest()
telemetry = Telemetry(request_payload['stream_sid'], did)
request_payload['start']['from'] = did
Expand All @@ -127,11 +122,26 @@ def echo(ws, language):

input_selector = int(request_payload["dtmf"]["digit"]) - 1

audio_url = get_audio(input_selector, language)
selected_audio_type = audio_types[input_selector] if input_selector < len(audio_types) else None

audio_key = f"{selected_audio_type}:{language}"
audio_url = None
if selected_audio_type:
audio_url = get_audio(audio_key)

if not audio_url:
audio_key = f"{selected_audio_type}:{language}:empty"
audio_url = get_audio(audio_key)

if not audio_url:
audio_key = f"invalid_option:{language}"
audio_url = get_audio(audio_key)

telemetry.interact(input=input_selector, language=language, audio_type=selected_audio_type,audio_name=audio_url)

if audio_url:
chunks = get_chunks(input_selector, language, audio_url)
audio_url = audio_url.replace(" ", "%20");
chunks = get_chunks(audio_key, audio_url)
counter = 1
for chunk in chunks:
chunk["stream_sid"] = session_id
Expand All @@ -146,6 +156,9 @@ def echo(ws, language):
ws.send(json.dumps(mark_event))
push_telemetry_events(telemetry)
is_audio_sent = True
else:
pass

elif event == "mark":
# mark_event = {"event":"stop", "sequence_number": len(chunks) + 1, "stream_sid": session_id,"mark":{"name":"audio_complete"}}
# ws.send(json.dumps(mark_event))
Expand Down
4 changes: 4 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
region_name: ""
aws_secret_access_key: ""
aws_access_key_id: ""
endpoint_url: ""
Empty file added drive-files/.keep
Empty file.
93 changes: 93 additions & 0 deletions generate-config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import sys
import csv
import re
import json
# import gdown
import boto3
import yaml


from pathlib import Path

def get_upload_path(file_name, category, language):
if category != "" and language != "":
return f'audio/{category}/{language}/{file_name}'
else:
return f'audio/{file_name}'

def upload_to_oci_storage(s3, file_name, category, language):
try:
upload_path = get_upload_path(file_name, category, language)
s3.meta.client.upload_file(f"drive-files/{file_name}", 'sbdjp-ivrs', upload_path)
print(upload_path, "uploaded successfully")
except Exception as e:
print(e)

def create_config(s3):
config = {}
categories = set()
languages = set()

invalid_option_link = ["https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/invalid_option_english.wav"]

for obj in s3.Bucket('sbdjp-ivrs').objects.all():
path = Path(obj.key)

if len(path.parts) < 4: continue

if path.parts[0] != "audio": continue

category = path.parts[1].strip().lower()
language = path.parts[2].strip().lower()

categories.add(category)
languages.add(language)
audio_key = f"{category}:{language}"
if audio_key not in config: config[audio_key] = []

url = s3.meta.client.generate_presigned_url(ClientMethod = 'get_object', Params = { 'Bucket': 'sbdjp-ivrs', 'Key': obj.key })
url = url.split("?")[0]

config[audio_key].append(url)

for lang in languages:
for cat in categories:
audio_key = f"{cat}:{lang}:empty"
config[audio_key] = invalid_option_link

audio_key = f"invalid_option:{lang}"
config[audio_key] = invalid_option_link

with open('drive-files/ivrs_config.json', 'w') as f:
f.write(json.dumps(config))

if __name__ == '__main__':
n = len(sys.argv)
if n < 1:
print('Usage: generate-config.py <csv file>')
exit(1)

with open("config.yaml", "r") as stream:
config = yaml.safe_load(stream)
s3 = boto3.resource(
's3',
region_name=config["region_name"],
aws_secret_access_key=config["aws_secret_access_key"],
aws_access_key_id=config["aws_access_key_id"],
endpoint_url=config["endpoint_url"]
)

with open(sys.argv[1]) as csv_file:
files_list = [d for d in csv.DictReader(csv_file)]

for file in files_list:
category = file["category"].strip().lower()
language = file["language"].strip().lower()
try:
upload_to_oci_storage(s3, file["filename"], category, language)
except Exception as inst:
print(inst)

create_config(s3)
upload_to_oci_storage(s3, "ivrs_config.json", "", "")

30 changes: 13 additions & 17 deletions ivrs_config.json
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
{
"story:english":[
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/story/English/Fur-Feather-Africa.mp3",
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/story/English/Windswept.mp3"
"song:english": [
"https://ax2cel5zyviy.compat.objectstorage.ap-hyderabad-1.oraclecloud.com/sbdjp-ivrs/audio/song/English/100-The%20Swing.mp3"
],
"rhyme:english":[
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/rhyme/English/rabbit.m4a"
"song:hindi": [
"https://ax2cel5zyviy.compat.objectstorage.ap-hyderabad-1.oraclecloud.com/sbdjp-ivrs/audio/song/Hindi/39-Track%208%20Hindi%20Poem%20Rhymes%20for%20eJadui%20Pitara%20Pd.mp3"
],
"riddle:english":[
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/riddle/riddle.mp3"
"story:english": [
"https://ax2cel5zyviy.compat.objectstorage.ap-hyderabad-1.oraclecloud.com/sbdjp-ivrs/audio/story/English/1-Birbal%20finds%20the%20thief.mp3",
"https://ax2cel5zyviy.compat.objectstorage.ap-hyderabad-1.oraclecloud.com/sbdjp-ivrs/audio/story/English/10-Goose%20with%20the%20golden%20egg.mp3"
],
"story:hindi":[
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/story/Hindi/bhediyabakri.mp3",
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/story/Hindi/pakshiparakh.mp3",
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/story/Hindi/darpokkharghosh.mp3"
"story:hindi:empty": [
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/invalid_option_english.wav"
],
"rhyme:hindi":[
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/rhyme/Hindi/hathiraja.m4a",
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/rhyme/Hindi/janmashtami.m4a",
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/rhyme/Hindi/words.m4a"
"invalid_option:english": [
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/invalid_option_english.wav"
],
"riddle:hindi":[
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/riddle/riddle.mp3"
"invalid_option:hindi": [
"https://objectstorage.ap-hyderabad-1.oraclecloud.com/n/ax2cel5zyviy/b/sbdjp-ivrs/o/audio/invalid_option_english.wav"
]
}
Loading

0 comments on commit d38d155

Please sign in to comment.