From e4085f2518bd96ba71260d48400cc3c19849cbab Mon Sep 17 00:00:00 2001 From: vasiliadi <9432426+vasiliadi@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:35:34 -0500 Subject: [PATCH] yt-dlp for youtube (#66) * pytube replaced with yt-dlp * yt-dlp docs link added * yt-dlp added --- README.md | 2 +- requirements.txt | 2 +- streamlit_app.py | 30 +++++++++++++++++++++++------- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index cc569b3..d7b3f3a 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ You need to replace the path to the env_file in `compose.yaml` | | Links | | ---|--- | -| Libraries | [streamlit](https://docs.streamlit.io)
[replicate](https://replicate.com/docs/get-started/python)
[google-generativeai](https://ai.google.dev/gemini-api/docs/get-started/python)
[pytube](https://pytube.io/en/latest/) | +| Libraries | [streamlit](https://docs.streamlit.io)
[replicate](https://replicate.com/docs/get-started/python)
[google-generativeai](https://ai.google.dev/gemini-api/docs/get-started/python)
~~[pytube](https://pytube.io/en/latest/)~~
[yt-dlp](https://github.com/yt-dlp/yt-dlp) | | Docker | [Docker Best Practices](https://testdriven.io/blog/docker-best-practices/)

[Docker](https://docs.docker.com/language/python/)
[Dockerfile reference](https://docs.docker.com/reference/dockerfile/)
[Dockerfile Linter](https://hadolint.github.io/hadolint/)

[.dockerignore](https://docs.docker.com/build/building/context/#dockerignore-files)

[Docker Compose](https://docs.docker.com/compose/)
[Syntax for environment files in Docker Compose](https://docs.docker.com/compose/environment-variables/env-file/)
[Ways to set environment variables with Compose](https://docs.docker.com/compose/environment-variables/set-environment-variables/)
[Compose file version 3 reference](https://docs.docker.com/compose/compose-file/compose-file-v3/)| | GitHub Actions | [Workflow syntax for GitHub Actions](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions)
[Publishing images to Docker Hub and GitHub Packages](https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-docker-hub-and-github-packages) | | Dev Containers | [An open specification for enriching containers with development specific content and settings](https://containers.dev/)
[Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers) | diff --git a/requirements.txt b/requirements.txt index 308476a..bffbe35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ streamlit==1.37.1 google-generativeai==0.7.2 -pytube==15.0.0 requests==2.32.3 replicate==0.32.0 +yt-dlp==2024.8.6 diff --git a/streamlit_app.py b/streamlit_app.py index 435605e..d95fe45 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -6,7 +6,7 @@ import google.generativeai as genai import replicate import requests -from pytube import YouTube +from yt_dlp import YoutubeDL # Google Gemini config gemini_api_key = os.environ["GEMINI_API_KEY"] @@ -54,9 +54,18 @@ def download(mode=st.session_state.mode): with open(audio_file_name, "wb") as f: f.write(uploaded_file.getbuffer()) case "YouTube link": - YouTube(yt_url).streams.filter(only_audio=True).order_by( - "abr" - ).asc().first().download(filename=audio_file_name) + ydl_opts = { + "format": "worstaudio", + "outtmpl": "audio", + "postprocessors": [ + { # Extract audio using ffmpeg + "key": "FFmpegExtractAudio", + "preferredcodec": "mp3", + } + ], + } + with YoutubeDL(ydl_opts) as ydl: + ydl.download(yt_url) case "Audio file link": downloaded_file = requests.get(audio_link) with open(audio_file_name, "wb") as f: @@ -101,7 +110,11 @@ def summarize(audio_file_name=audio_file_name): def transcribe(model_name=st.session_state.model_name): match model_name: case "whisper-diarization": - latest_model_version = replicate_client.models.get("thomasmol/whisper-diarization").versions.list()[0].id + latest_model_version = ( + replicate_client.models.get("thomasmol/whisper-diarization") + .versions.list()[0] + .id + ) with open(converted_file_name, "rb") as audio: transcription = replicate_client.run( f"thomasmol/whisper-diarization:{latest_model_version}", @@ -109,7 +122,11 @@ def transcribe(model_name=st.session_state.model_name): ) return transcription case "incredibly-fast-whisper": - latest_model_version = replicate_client.models.get("vaibhavs10/incredibly-fast-whisper").versions.list()[0].id + latest_model_version = ( + replicate_client.models.get("vaibhavs10/incredibly-fast-whisper") + .versions.list()[0] + .id + ) with open(converted_file_name, "rb") as audio: try: transcription = replicate.run( @@ -124,7 +141,6 @@ def transcribe(model_name=st.session_state.model_name): st.error("Model error 😫 Try to switch model 👍", icon="🚨") st.stop() - def detected_num_speakers(transcription): speakers = [i["speaker"] for i in transcription[0:-1]] return len(set(speakers))