Skip to content

Commit

Permalink
Update formatters.py
Browse files Browse the repository at this point in the history
  • Loading branch information
vinay-jose authored May 16, 2024
1 parent f5c9e16 commit c91f34d
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions youtube_transcript_api/formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,23 +61,29 @@ def format_transcripts(self, transcripts, **kwargs):


class TextFormatter(Formatter):
def format_transcript(self, transcript, **kwargs):
def format_transcript(self, transcript, sep="\n", **kwargs):
"""Converts a transcript into plain text with no timestamps.
:param transcript:
:return: all transcript text lines separated by newline breaks.'
:param sep: A separator for each line in the transcript with default set as newline break ('\n').
:type str:
:return: all transcript text lines separated by `sep`.'
:rtype str
"""
return '\n'.join(line['text'] for line in transcript)
return sep.join(line['text'] for line in transcript)

def format_transcripts(self, transcripts, **kwargs):
def format_transcripts(self, transcripts, sep="\n", end="\n\n\n", **kwargs):
"""Converts a list of transcripts into plain text with no timestamps.
:param transcripts:
:return: all transcript text lines separated by newline breaks.'
:param sep: A separator for each line in the transcript with default set as newline break ('\n').
:type str:
:param end: Each individual transcript to be followed by `end` with default set as 3 newline breaks ('\n\n\n').
:type str:
:return: list of all transcript with text lines separated by `sep` and each transcript followed by `end`.
:rtype str
"""
return '\n\n\n'.join([self.format_transcript(transcript, **kwargs) for transcript in transcripts])
return end.join([self.format_transcript(transcript, sep=sep, **kwargs) for transcript in transcripts])

class _TextBasedFormatter(TextFormatter):
def _format_timestamp(self, hours, mins, secs, ms):
Expand Down

1 comment on commit c91f34d

@vinay-jose
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added optional arguments sep and end with default values as existing strings ('\n' and '\n\n\n' respectively). This was useful to me while building a RAG app which uses youtube tutorials's transcripts as knowledge base.

Please sign in to comment.