Skip to content

Commit

Permalink
Refresh
Browse files Browse the repository at this point in the history
  • Loading branch information
Warblefly authored Sep 6, 2023
1 parent a2c3dab commit f684585
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 1 deletion.
5 changes: 4 additions & 1 deletion checkplaylist.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def filenamesfromm3u8(playlist: str):
filenames.append(line.strip())
else:
filenames.append(line[lastcolon + 1:].strip())
#print("Filenames are %s" % filenames)
return filenames

def findhash(inputfilename: str):
Expand All @@ -36,7 +37,7 @@ def findhash(inputfilename: str):
def playlistedfilesmissingfromdirectory(playlist, directory):
# Need to build a list of files expected from a playlist
files = filenamesfromm3u8(playlist)
# print(files)
#print(files)
errors = []
for item in files:
if not os.path.isfile(item):
Expand Down Expand Up @@ -69,6 +70,7 @@ def weedplaylist(playlist: str, filelist: list):
removedlist = []
with open(playlist, 'r', encoding='utf-8') as pl:
for line in pl:
# print("Checking playlist" % pl)
workingentry = ['', '']
# We do a try / except because the first line is an "#EXTM3U"
try:
Expand All @@ -78,6 +80,7 @@ def weedplaylist(playlist: str, filelist: list):
except:
continue
workingdictionary[findhash(workingentry[1])] = workingentry
# print("workingdictionary is" % workingdictionary)
#print(workingdirectory)
# Now to step through the list of files.
# It doesn't matter if these are full pathnames or just filenames
Expand Down
37 changes: 37 additions & 0 deletions getCBS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/python3
# Program to retrieve the latest CBS radio news as a file.
# The news URL changes hourly, but it is easy to work out:
# 27May2019 - changed timezone to Central. Since May 20th,
# bulletins have stepped back by an hour, and I don't
# know why.


import datetime, pytz
#, wget

PREFIX = "http://audio.cbsradionewsfeed.com/"
#RAW_DL = "CBS_raw.mp3"
#PART1_AUDIO = "CBS_part1.wav"
#PART2_AUDIO = "CBS_part2.wav"
#PART2_AUDIO_CUT = "CBS_part2-cut.wav"
#CBS_EDITED = "CBS_news.mka"

# Time, in seconds, where we start to look for the silence marking the end of bulletin.

#SPLIT = 240


est = datetime.datetime.now(pytz.timezone('US/Eastern'))

year = str(est.year)
month = "{:02}".format(est.month)
date = "{:02}".format(est.day)
hour = "{:02}".format(est.hour)

URL = year + '/' + month + '/' + date + '/' + hour + '/Hourly-' + hour + '.mp3'

# We need to get this URL, divide it into two, and search for the first silence in the second part.

print(PREFIX + URL)


32 changes: 32 additions & 0 deletions handle_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/python3

# Takes duplicates CSV file,
# plus original playlist m3u8 file,
# then:
# sorts CSV file by similarity (field 0)
# make a holding directory for audio
# open a new m3u8 file for possible duplicates
# for each line:
# move file in field 1 to holding directory
# move file in field 2 to holding directory

import csv
import argparse

parser = argparse.ArgumentParser(description='Separate out possible duplicates for manual checking.')
parser.add_argument('-d', '--duplicates', required=True, type=str, help='Filename of CSV with possible duplicates')
args = parser.parse_args()

duplicates = args.duplicates

# listofDupes will contain the duplicates, sorted with the best matches at the top
with open(duplicates, 'r') as csvfile:
reader = csv.reader(csvfile)
listofDupes = list(reader)

listofDupes.sort(key=lambda value: int(value[0]), reverse=True)
# print(listofDupes)

# We'd


0 comments on commit f684585

Please sign in to comment.