From f68458558b1119890e0f96a356181fe50786cf1a Mon Sep 17 00:00:00 2001
From: John Warburton <john@johnwarburton.net>
Date: Wed, 6 Sep 2023 17:02:57 +0100
Subject: [PATCH] Refresh

---
 checkplaylist.py     |  5 ++++-
 getCBS.py            | 37 +++++++++++++++++++++++++++++++++++++
 handle_duplicates.py | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 getCBS.py
 create mode 100644 handle_duplicates.py

diff --git a/checkplaylist.py b/checkplaylist.py
index 6c7521f..913a660 100644
--- a/checkplaylist.py
+++ b/checkplaylist.py
@@ -26,6 +26,7 @@ def filenamesfromm3u8(playlist: str):
                 filenames.append(line.strip())
             else:
                 filenames.append(line[lastcolon + 1:].strip())
+    #print("Filenames are %s" % filenames)
     return filenames
 
 def findhash(inputfilename: str):
@@ -36,7 +37,7 @@ def findhash(inputfilename: str):
 def playlistedfilesmissingfromdirectory(playlist, directory):
     # Need to build a list of files expected from a playlist
     files = filenamesfromm3u8(playlist)
-    # print(files)
+    #print(files)
     errors = []
     for item in files:
         if not os.path.isfile(item):
@@ -69,6 +70,7 @@ def weedplaylist(playlist: str, filelist: list):
     removedlist = []
     with open(playlist, 'r', encoding='utf-8') as pl:
         for line in pl:
+            # print("Checking playlist" % pl)
             workingentry = ['', '']
             # We do a try / except because the first line is an "#EXTM3U"
             try:
@@ -78,6 +80,7 @@ def weedplaylist(playlist: str, filelist: list):
             except:
                 continue
             workingdictionary[findhash(workingentry[1])] = workingentry
+            # print("workingdictionary is" % workingdictionary)
     #print(workingdirectory)
     # Now to step through the list of files.
     # It doesn't matter if these are full pathnames or just filenames
diff --git a/getCBS.py b/getCBS.py
new file mode 100644
index 0000000..1d0a6c9
--- /dev/null
+++ b/getCBS.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python3
+# Program to retrieve the latest CBS radio news as a file.
+# The news URL changes hourly, but it is easy to work out:
+# 27May2019 - changed timezone to Central. Since May 20th,
+# bulletins have stepped back by an hour, and I don't
+# know why.
+
+
+import datetime, pytz
+#, wget
+
+PREFIX = "http://audio.cbsradionewsfeed.com/"
+#RAW_DL = "CBS_raw.mp3"
+#PART1_AUDIO = "CBS_part1.wav"
+#PART2_AUDIO = "CBS_part2.wav"
+#PART2_AUDIO_CUT = "CBS_part2-cut.wav"
+#CBS_EDITED = "CBS_news.mka"
+
+# Time, in seconds, where we start to look for the silence marking the end of bulletin.
+
+#SPLIT = 240
+
+
+est = datetime.datetime.now(pytz.timezone('US/Eastern'))
+
+year = str(est.year)
+month = "{:02}".format(est.month)
+date = "{:02}".format(est.day)
+hour = "{:02}".format(est.hour)
+
+URL = year + '/' + month + '/' + date + '/' + hour + '/Hourly-' + hour + '.mp3'
+
+# We need to get this URL, divide it into two, and search for the first silence in the second part.
+
+print(PREFIX + URL)
+
+
diff --git a/handle_duplicates.py b/handle_duplicates.py
new file mode 100644
index 0000000..c593b13
--- /dev/null
+++ b/handle_duplicates.py
@@ -0,0 +1,32 @@
+#!/usr/bin/python3
+
+# Takes duplicates CSV file,
+# plus original playlist m3u8 file,
+# then:
+# sorts CSV file by similarity (field 0)
+# make a holding directory for audio
+# open a new m3u8 file for possible duplicates
+# for each line:
+#   move file in field 1 to holding directory
+#   move file in field 2 to holding directory
+
+import csv
+import argparse
+
+parser = argparse.ArgumentParser(description='Separate out possible duplicates for manual checking.')
+parser.add_argument('-d', '--duplicates', required=True, type=str, help='Filename of CSV with possible duplicates')
+args = parser.parse_args()
+
+duplicates = args.duplicates
+
+# listofDupes will contain the duplicates, sorted with the best matches at the top
+with open(duplicates, 'r') as csvfile:
+    reader = csv.reader(csvfile)
+    listofDupes = list(reader)
+
+listofDupes.sort(key=lambda value: int(value[0]), reverse=True)
+# print(listofDupes)
+
+# We'd
+
+