From f68458558b1119890e0f96a356181fe50786cf1a Mon Sep 17 00:00:00 2001 From: John Warburton Date: Wed, 6 Sep 2023 17:02:57 +0100 Subject: [PATCH] Refresh --- checkplaylist.py | 5 ++++- getCBS.py | 37 +++++++++++++++++++++++++++++++++++++ handle_duplicates.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 getCBS.py create mode 100644 handle_duplicates.py diff --git a/checkplaylist.py b/checkplaylist.py index 6c7521f..913a660 100644 --- a/checkplaylist.py +++ b/checkplaylist.py @@ -26,6 +26,7 @@ def filenamesfromm3u8(playlist: str): filenames.append(line.strip()) else: filenames.append(line[lastcolon + 1:].strip()) + #print("Filenames are %s" % filenames) return filenames def findhash(inputfilename: str): @@ -36,7 +37,7 @@ def findhash(inputfilename: str): def playlistedfilesmissingfromdirectory(playlist, directory): # Need to build a list of files expected from a playlist files = filenamesfromm3u8(playlist) - # print(files) + #print(files) errors = [] for item in files: if not os.path.isfile(item): @@ -69,6 +70,7 @@ def weedplaylist(playlist: str, filelist: list): removedlist = [] with open(playlist, 'r', encoding='utf-8') as pl: for line in pl: + # print("Checking playlist" % pl) workingentry = ['', ''] # We do a try / except because the first line is an "#EXTM3U" try: @@ -78,6 +80,7 @@ def weedplaylist(playlist: str, filelist: list): except: continue workingdictionary[findhash(workingentry[1])] = workingentry + # print("workingdictionary is" % workingdictionary) #print(workingdirectory) # Now to step through the list of files. # It doesn't matter if these are full pathnames or just filenames diff --git a/getCBS.py b/getCBS.py new file mode 100644 index 0000000..1d0a6c9 --- /dev/null +++ b/getCBS.py @@ -0,0 +1,37 @@ +#!/usr/bin/python3 +# Program to retrieve the latest CBS radio news as a file. +# The news URL changes hourly, but it is easy to work out: +# 27May2019 - changed timezone to Central. Since May 20th, +# bulletins have stepped back by an hour, and I don't +# know why. + + +import datetime, pytz +#, wget + +PREFIX = "http://audio.cbsradionewsfeed.com/" +#RAW_DL = "CBS_raw.mp3" +#PART1_AUDIO = "CBS_part1.wav" +#PART2_AUDIO = "CBS_part2.wav" +#PART2_AUDIO_CUT = "CBS_part2-cut.wav" +#CBS_EDITED = "CBS_news.mka" + +# Time, in seconds, where we start to look for the silence marking the end of bulletin. + +#SPLIT = 240 + + +est = datetime.datetime.now(pytz.timezone('US/Eastern')) + +year = str(est.year) +month = "{:02}".format(est.month) +date = "{:02}".format(est.day) +hour = "{:02}".format(est.hour) + +URL = year + '/' + month + '/' + date + '/' + hour + '/Hourly-' + hour + '.mp3' + +# We need to get this URL, divide it into two, and search for the first silence in the second part. + +print(PREFIX + URL) + + diff --git a/handle_duplicates.py b/handle_duplicates.py new file mode 100644 index 0000000..c593b13 --- /dev/null +++ b/handle_duplicates.py @@ -0,0 +1,32 @@ +#!/usr/bin/python3 + +# Takes duplicates CSV file, +# plus original playlist m3u8 file, +# then: +# sorts CSV file by similarity (field 0) +# make a holding directory for audio +# open a new m3u8 file for possible duplicates +# for each line: +# move file in field 1 to holding directory +# move file in field 2 to holding directory + +import csv +import argparse + +parser = argparse.ArgumentParser(description='Separate out possible duplicates for manual checking.') +parser.add_argument('-d', '--duplicates', required=True, type=str, help='Filename of CSV with possible duplicates') +args = parser.parse_args() + +duplicates = args.duplicates + +# listofDupes will contain the duplicates, sorted with the best matches at the top +with open(duplicates, 'r') as csvfile: + reader = csv.reader(csvfile) + listofDupes = list(reader) + +listofDupes.sort(key=lambda value: int(value[0]), reverse=True) +# print(listofDupes) + +# We'd + +