Skip to content

Commit

Permalink
fixes for set, passed parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
Barbara Miller committed May 30, 2024
1 parent 6fc409f commit 2e4a19e
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
8 changes: 5 additions & 3 deletions brozzler/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,10 +545,12 @@ def dump_state(signum, frame):
signal.signal(signal.SIGQUIT, dump_state)

def get_skip_av_seeds():
# TODO: develop UI and refactor
SKIP_AV_SEEDS_FILE = "/opt/local/brozzler/skip_av_seeds.txt"
try:
with open(skip_av_seeds_file) as skips:
skip_av_seeds = set(skips.readlines())
# make set from seed IDs in SKIP_AV_SEEDS_FILE
with open(SKIP_AV_SEEDS_FILE) as skips:
skip_av_seeds = {int(l) for l in skips.readlines()}
logging.info("running with skip_av_seeds file %s" % SKIP_AV_SEEDS_FILE)
except Exception as e:
skip_av_seeds = set()
Expand All @@ -562,7 +564,7 @@ def get_skip_av_seeds():
worker = brozzler.worker.BrozzlerWorker(
frontier,
service_registry,
skip_av_seeds,
skip_av_seeds=skip_av_seeds,
max_browsers=int(args.max_browsers),
chrome_exe=args.chrome_exe,
proxy=args.proxy,
Expand Down
2 changes: 1 addition & 1 deletion brozzler/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def brozzle_page(
except brozzler.PageInterstitialShown:
self.logger.info("page interstitial shown (http auth): %s", page)

if enable_youtube_dl and ydl.should_ytdlp(self, site, page):
if enable_youtube_dl and ydl.should_ytdlp(site, page, self.skip_av_seeds):
try:
ydl_outlinks = ydl.do_youtube_dl(self, site, page)
outlinks.update(ydl_outlinks)
Expand Down
15 changes: 8 additions & 7 deletions brozzler/ydl.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
thread_local = threading.local()


def should_ytdlp(worker, site, page):
def should_ytdlp(site, page, skip_av_seeds):
# called only after we've passed needs_browsing() check
if page.status_code != 200:
logging.info("skipping ytdlp: non-200 page status")
Expand All @@ -41,22 +41,23 @@ def should_ytdlp(worker, site, page):
logging.info("skipping ytdlp: site marked skip_ytdlp")
return False

ytdlp_url = page.redirect_url if page.redirect_url else page.url

if "chrome-error:" in ytdlp_url:
return False

ytdlp_seed = (
site["metadata"]["ait_seed_id"]
if "metadata" in site and "ait_seed_id" in site["metadata"]
else None
)

if ytdlp_seed and ytdlp_seed in worker.skip_av_seeds:
# TODO: develop UI and refactor
if ytdlp_seed and ytdlp_seed in skip_av_seeds:
logging.info("skipping ytdlp: site in skip_av_seeds")
site.skip_ytdlp = True
return False

ytdlp_url = page.redirect_url if page.redirect_url else page.url

if "chrome-error:" in ytdlp_url:
return False

return True


Expand Down

0 comments on commit 2e4a19e

Please sign in to comment.