Skip to content

Commit

Permalink
Add 'verify' parameter for SSL control in transcript methods
Browse files Browse the repository at this point in the history
  • Loading branch information
marcelodiaz558 committed Dec 30, 2024
1 parent 97522b7 commit 495b402
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 7 deletions.
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

<h1 align="center">
✨ YouTube Transcript API ✨
</h1>
Expand Down Expand Up @@ -365,6 +364,30 @@ Using the CLI:
youtube_transcript_api <first_video_id> <second_video_id> --cookies /path/to/your/cookies.txt
```

## SSL Verification

You can customize SSL certificate verification by providing a path to a custom certificate bundle or disabling verification entirely:

```python
from youtube_transcript_api import YouTubeTranscriptApi

# Using custom certificate bundle
YouTubeTranscriptApi.get_transcript(video_id, verify='/path/to/cacert.pem')

# Disabling SSL verification (not recommended for production)
YouTubeTranscriptApi.get_transcript(video_id, verify=False)
```

Using the CLI:

```
# Using custom certificate bundle
youtube_transcript_api <first_video_id> <second_video_id> --verify /path/to/cacert.pem
# Disabling SSL verification
youtube_transcript_api <first_video_id> <second_video_id> --verify False
```

## Warning

This code uses an undocumented part of the YouTube API, which is called by the YouTube web-client. So there is no guarantee that it won't stop working tomorrow, if they change how things work. I will however do my best to make things working again as soon as possible if that happens. So if it stops working, let me know!
Expand Down
16 changes: 13 additions & 3 deletions youtube_transcript_api/_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

class YouTubeTranscriptApi(object):
@classmethod
def list_transcripts(cls, video_id, proxies=None, cookies=None):
def list_transcripts(cls, video_id, proxies=None, cookies=None, verify=None):
"""
Retrieves the list of transcripts which are available for a given video. It returns a `TranscriptList` object
which is iterable and provides methods to filter the list of transcripts for specific languages. While iterating
Expand Down Expand Up @@ -61,13 +61,17 @@ def list_transcripts(cls, video_id, proxies=None, cookies=None):
:type proxies: {'http': str, 'https': str} - http://docs.python-requests.org/en/master/user/advanced/#proxies
:param cookies: a string of the path to a text file containing youtube authorization cookies
:type cookies: str
:param verify: custom SSL verification path or boolean
:type verify: str|bool|None
:return: the list of available transcripts
:rtype TranscriptList:
"""
with requests.Session() as http_client:
if cookies:
http_client.cookies = cls._load_cookies(cookies, video_id)
http_client.proxies = proxies if proxies else {}
if verify is not None:
http_client.verify = verify
return TranscriptListFetcher(http_client).fetch(video_id)

@classmethod
Expand All @@ -79,6 +83,7 @@ def get_transcripts(
proxies=None,
cookies=None,
preserve_formatting=False,
verify=None,
):
"""
Retrieves the transcripts for a list of videos.
Expand All @@ -98,6 +103,8 @@ def get_transcripts(
:type cookies: str
:param preserve_formatting: whether to keep select HTML text formatting
:type preserve_formatting: bool
:param verify: custom SSL verification path or boolean
:type verify: str|bool|None
:return: a tuple containing a dictionary mapping video ids onto their corresponding transcripts, and a list of
video ids, which could not be retrieved
:rtype ({str: [{'text': str, 'start': float, 'end': float}]}, [str]}):
Expand All @@ -110,7 +117,7 @@ def get_transcripts(
for video_id in video_ids:
try:
data[video_id] = cls.get_transcript(
video_id, languages, proxies, cookies, preserve_formatting
video_id, languages, proxies, cookies, preserve_formatting, verify
)
except Exception as exception:
if not continue_after_error:
Expand All @@ -128,6 +135,7 @@ def get_transcript(
proxies=None,
cookies=None,
preserve_formatting=False,
verify=None,
):
"""
Retrieves the transcript for a single video. This is just a shortcut for calling::
Expand All @@ -146,12 +154,14 @@ def get_transcript(
:type cookies: str
:param preserve_formatting: whether to keep select HTML text formatting
:type preserve_formatting: bool
:param verify: custom SSL verification path or boolean
:type verify: str|bool|None
:return: a list of dictionaries containing the 'text', 'start' and 'duration' keys
:rtype [{'text': str, 'start': float, 'end': float}]:
"""
assert isinstance(video_id, str), "`video_id` must be a string"
return (
cls.list_transcripts(video_id, proxies, cookies)
cls.list_transcripts(video_id, proxies, cookies, verify)
.find_transcript(languages)
.fetch(preserve_formatting=preserve_formatting)
)
Expand Down
16 changes: 15 additions & 1 deletion youtube_transcript_api/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run(self):

def _fetch_transcript(self, parsed_args, proxies, cookies, video_id):
transcript_list = YouTubeTranscriptApi.list_transcripts(
video_id, proxies=proxies, cookies=cookies
video_id, proxies=proxies, cookies=cookies, verify=parsed_args.verify
)

if parsed_args.list_transcripts:
Expand Down Expand Up @@ -147,9 +147,23 @@ def _parse_args(self):
default=None,
help="The cookie file that will be used for authorization with youtube.",
)
parser.add_argument(
"--verify",
default=None,
type=self._parse_verify,
help="Path to a custom SSL certificate bundle or False to disable verification.",
)

return self._sanitize_video_ids(parser.parse_args(self._args))

def _parse_verify(self, value):
if value.lower() == 'false':
return False
elif value.lower() == 'true':
return True
else:
return value

def _sanitize_video_ids(self, args):
args.video_ids = [video_id.replace("\\", "") for video_id in args.video_ids]
return args
25 changes: 23 additions & 2 deletions youtube_transcript_api/test/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,29 @@ def test_run__cookies(self):
("v1 v2 --languages de en " "--cookies blahblah.txt").split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call(
"v1", proxies=None, cookies="blahblah.txt"
"v1", proxies=None, cookies="blahblah.txt", verify=None
)
YouTubeTranscriptApi.list_transcripts.assert_any_call(
"v2", proxies=None, cookies="blahblah.txt"
"v2", proxies=None, cookies="blahblah.txt", verify=None
)

def test_run__verify(self):
YouTubeTranscriptCli(
("v1 v2 --languages de en " "--verify /path/to/cert.pem").split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call(
"v1", proxies=None, cookies=None, verify="/path/to/cert.pem"
)
YouTubeTranscriptApi.list_transcripts.assert_any_call(
"v2", proxies=None, cookies=None, verify="/path/to/cert.pem"
)

YouTubeTranscriptCli(
("v1 v2 --languages de en " "--verify False").split()
).run()
YouTubeTranscriptApi.list_transcripts.assert_any_call(
"v1", proxies=None, cookies=None, verify="False"
)
YouTubeTranscriptApi.list_transcripts.assert_any_call(
"v2", proxies=None, cookies=None, verify="False"
)

0 comments on commit 495b402

Please sign in to comment.