From 4f7cf874cd557a996e9bab91cc6f060215732175 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Debora=20W=C3=B6pcke?= Date: Sun, 12 Jan 2025 12:32:01 +0100 Subject: [PATCH 1/5] Clean out leo/node-comments --- fcp3/sitemgr.py | 190 +++++++++++++++++++++++------------------------- 1 file changed, 90 insertions(+), 100 deletions(-) diff --git a/fcp3/sitemgr.py b/fcp3/sitemgr.py index 4cf86f7..039db57 100644 --- a/fcp3/sitemgr.py +++ b/fcp3/sitemgr.py @@ -1,5 +1,3 @@ -#@+leo-ver=4 -#@+node:@file sitemgr.py """ new persistent SiteMgr class """ @@ -22,8 +20,8 @@ # --only-external-files -#@+others -#@+node:imports + + import fnmatch import io import json @@ -40,8 +38,8 @@ from fcp3 import CRITICAL, ERROR, INFO, DETAIL, DEBUG #, NOISY from fcp3.node import hashFile -#@-node:imports -#@+node:globals + + defaultBaseDir = os.path.join(os.path.expanduser('~'), ".freesitemgr") maxretries = -1 @@ -64,14 +62,13 @@ class Hell(Exception): """Something smells wrong here...""" -#@-node:globals -#@+node:class SiteMgr + + class SiteMgr: """ New nuclear-war-resistant Freesite insertion class """ - #@ @+others - #@+node:__init__ + def __init__(self, *args, **kw): """ Creates a new SiteMgr object @@ -114,8 +111,8 @@ def __init__(self, *args, **kw): self.load() - #@-node:__init__ - #@+node:load + + def load(self): """ Loads all site records @@ -188,8 +185,8 @@ def load(self): ) self.sites.append(site) - #@-node:load - #@+node:create + + def create(self): """ Creates a sites config @@ -205,8 +202,8 @@ def create(self): self.save() - #@-node:create - #@+node:save + + def save(self): # now write out some boilerplate @@ -231,8 +228,8 @@ def save(self): for site in self.sites: site.save() - #@-node:save - #@+node:addSite + + def addSite(self, **kw): """ adds a new site @@ -264,8 +261,8 @@ def addSite(self, **kw): return site - #@-node:addSite - #@+node:hasSite + + def hasSite(self, name): """ Returns True if site 'name' already exists @@ -276,8 +273,8 @@ def hasSite(self, name): except: return False - #@-node:hasSite - #@+node:getSite + + def getSite(self, name): """ Returns a ref to the SiteState object for site 'name', or @@ -288,16 +285,16 @@ def getSite(self, name): except: raise Exception("No such site '%s'" % name) - #@-node:getSite - #@+node:getSiteNames + + def getSiteNames(self): """ Returns a list of names of known sites """ return [site.name for site in self.sites] - #@-node:getSiteNames - #@+node:removeSite + + def removeSite(self, name): """ Removes given site @@ -306,8 +303,8 @@ def removeSite(self, name): self.sites.remove(site) os.unlink(site.path) - #@-node:removeSite - #@+node:cancelUpdate + + def cancelUpdate(self, name): """ Removes given site @@ -315,8 +312,8 @@ def cancelUpdate(self, name): site = self.getSite(name) site.cancelUpdate() - #@-node:cancelUpdate - #@+node:insert + + def insert(self, *sites, **kw): """ Inserts either named site, or all sites if no name given @@ -336,8 +333,8 @@ def insert(self, *sites, **kw): print("freesitemgr: updating site '%s' on %s" % (site.name, time.asctime())) site.insert() - #@-node:insert - #@+node:reinsert + + def reinsert(self, *sites, **kw): """ Mark sites for reinsert: set all external files as needsupload @@ -358,8 +355,8 @@ def reinsert(self, *sites, **kw): site.mark_for_reinsert() site.insert() - #@-node:reinsert - #@+node:cleanup + + def cleanup(self, *sites, **kw): """ Cleans up node queue in respect of completed inserts for given sites @@ -372,8 +369,8 @@ def cleanup(self, *sites, **kw): for site in sites: site.cleanup() - #@-node:cleanup - #@+node:securityCheck + + def securityCheck(self): # a nice little tangent for the entertainment of those who @@ -426,19 +423,19 @@ def wln(delay, s): print() time.sleep(0.5) - #@-node:securityCheck - #@+node:fallbackLogger + + def fallbackLogger(self, level, msg): """ This logger is used if no node FCP port is available """ print(msg) - #@-node:fallbackLogger - #@-others -#@-node:class SiteMgr -#@+node:class SiteState + + + + class SiteState: """ Stores the current state of a single freesite's insertion, in a way @@ -446,8 +443,7 @@ class SiteState: The state is saved as a pretty-printed python dict, in ~/.freesitemgr/ """ - #@ @+others - #@+node:__init__ + def __init__(self, **kw): """ Create a sitemgr object @@ -522,8 +518,8 @@ def __init__(self, **kw): # raise Exception("Site %s, directory %s, no %s present" % ( # self.name, self.dir, self.index)) - #@-node:__init__ - #@+node:load + + def load(self): """ Attempt to load a freesite @@ -601,8 +597,8 @@ def load(self): finally: self.fileLock.release() - #@-node:load - #@+node:create + + def create(self): """ Creates initial site config @@ -622,8 +618,8 @@ def create(self): # now can save self.save() - #@-node:create - #@+node:mark_for_reinsert + + def mark_for_reinsert(self): """ mark all files as changed @@ -633,8 +629,8 @@ def mark_for_reinsert(self): self.needToUpdate = True self.save() - #@-node:mark_for_reinsert - #@+node:save + + def save(self): """ Saves the node state @@ -724,8 +720,8 @@ def writeVars(comment="", tail="", **kw): finally: self.fileLock.release() - #@-node:save - #@+node:getFile + + def getFile(self, name): """ returns the control record for file 'name' @@ -735,8 +731,8 @@ def getFile(self, name): return f return None - #@-node:getFile - #@+node:cancelUpdate + + def cancelUpdate(self): """ Cancels an insert that was happening @@ -755,8 +751,8 @@ def cancelUpdate(self): self.log(INFO, "cancel:%s:update cancelled" % self.name) - #@-node:cancelUpdate - #@+node:insert + + def insert(self): """ Performs insertion of this site, or gets as far as @@ -975,8 +971,8 @@ def insert(self): self.save() - #@-node:insert - #@+node:cleanup + + def cleanup(self): """ Cleans up node queue in respect of currently-inserting freesite, @@ -989,8 +985,8 @@ def cleanup(self): else: self.clearNodeQueue() - #@-node:cleanup - #@+node:managePendingInsert + + def managePendingInsert(self): """ Check on the status of the currently running insert @@ -1100,8 +1096,8 @@ def updateEdition(uri, ed): self.save() - #@-node:managePendingInsert - #@+node:scan + + def scan(self): """ Scans all files in the site's filesystem directory, marking @@ -1208,8 +1204,8 @@ def scan(self): else: self.log(INFO, "scan: site %s has not changed" % self.name) - #@-node:scan - #@+node:clearNodeQueue + + def clearNodeQueue(self): """ remove all node queue records relating to this site @@ -1222,8 +1218,8 @@ def clearNodeQueue(self): if idparts[0] == 'freesitemgr' and idparts[1] == self.name: self.node.clearGlobalJob(id) - #@-node:clearNodeQueue - #@+node:readNodeQueue + + def readNodeQueue(self): """ Refreshes the node global queue, and reads from the queue a dict of @@ -1241,8 +1237,8 @@ def readNodeQueue(self): jobs[name] = job return jobs - #@-node:readNodeQueue - #@+node:createIndexAndSitemapIfNeeded + + def createIndexAndSitemapIfNeeded(self): """ generate and insert an index.html if none exists @@ -1412,16 +1408,16 @@ def createsitemap(): self.files.append(self.sitemapRec) - #@-node:createIndexAndSitemapIfNeeded - #@+node:allocId + + def allocId(self, name): """ Allocates a unique ID for a given file """ return "freesitemgr|%s|%s" % (self.name, name) - #@-node:allocId - #@+node:markManifestFiles + + def markManifestFiles(self): """ Selects the files which should directly be put in the manifest and @@ -1547,8 +1543,8 @@ def markManifestFiles(self): totalsize += rec['sizebytes'] - #@-node:markManifestFiles - #@+node:makeManifest + + def makeManifest(self): """ Create a site manifest insertion command buffer from our @@ -1697,31 +1693,31 @@ def fileMsgLines(n, rec): self.log(ERROR, "The datalength of %s to be uploaded does not match the length reported to the node of %s. This is a bug, please report it to the pyFreenet maintainer." % (datalength, reportedlength)) - #@-node:makeManifest - #@+node:fallbackLogger + + def fallbackLogger(self, level, msg): """ This logger is used if no node FCP port is available """ print(msg) - #@-node:fallbackLogger - #@-others -#@-node:class SiteState -#@+node:funcs + + + + # utility funcs -#@+others -#@+node:getFileSize + + def getFileSize(filepath): """ Get the size of the file in bytes. """ return os.stat(filepath)[stat.ST_SIZE] -#@-node:getFileSize -#@+node:fixUri + + def fixUri(uri, name, version=0): """ Conditions a URI to be suitable for freesitemgr @@ -1740,30 +1736,24 @@ def fixUri(uri, name, version=0): return uri -#@-node:fixUri -#@+node:targetFilename + + def ChkTargetFilename(name): """ Make the name suitable for a ChkTargetFilename """ return os.path.basename(name) -#@-node:targetFilename -#@+node:runTest + + def runTest(): mgr = SiteMgr(verbosity=DEBUG) mgr.insert() -#@-node:runTest -#@-others -#@-node:funcs -#@+node:mainline -if __name__ == '__main__': - runTest() -#@-node:mainline -#@-others -#@-node:@file sitemgr.py -#@-leo + + +if __name__ == '__main__': + runTest() From 07a91722c71a2c80f831f246281d13e1e9647974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Debora=20W=C3=B6pcke?= Date: Sun, 12 Jan 2025 13:16:06 +0100 Subject: [PATCH 2/5] Fix all trivial formatting violations --- fcp3/sitemgr.py | 854 ++++++++++++++++++++++++------------------------ 1 file changed, 429 insertions(+), 425 deletions(-) diff --git a/fcp3/sitemgr.py b/fcp3/sitemgr.py index 039db57..3d0a296 100644 --- a/fcp3/sitemgr.py +++ b/fcp3/sitemgr.py @@ -4,10 +4,15 @@ # TODO for collaborative huge site support: # - new file state: needupload (but has CHK) -# - only upload --max-size-per-call per update run (but at least 1 file). Default: None -# - when --check-get-before-upload is set, before trying to upload an external file, try to get(key, nodata=True, realtime=True, timeout=(estimated)) -# - estimate timeout: 5MiB/minute. -> catch exception FCPSendTimeout -> queue upload. -# - when --only-external-files is set, construct but do not upload the manifest. +# - only upload --max-size-per-call per update run (but at least 1 file). +# Default: None +# - when --check-get-before-upload is set, before trying to upload +# an external file, try to +# get(key, nodata=True, realtime=True, timeout=(estimated)) +# - estimate timeout: 5MiB/minute. -> catch exception FCPSendTimeout -> +# queue upload. +# - when --only-external-files is set, construct but do not upload +# the manifest. # - this gives us reinsert for free: mark all external files as needupload # # Controller: @@ -19,9 +24,6 @@ # --check-get-before-upload # --only-external-files - - - import fnmatch import io import json @@ -35,11 +37,9 @@ import traceback import fcp3 as fcp -from fcp3 import CRITICAL, ERROR, INFO, DETAIL, DEBUG #, NOISY +from fcp3 import CRITICAL, ERROR, INFO, DETAIL, DEBUG # , NOISY from fcp3.node import hashFile - - defaultBaseDir = os.path.join(os.path.expanduser('~'), ".freesitemgr") maxretries = -1 @@ -47,23 +47,30 @@ defaultMaxConcurrent = 10 testMode = False -#testMode = True +# testMode = True defaultPriority = 3 -defaultMaxManifestSizeBytes = 1024*1024*2 # 2.0 MiB: As used by the freenet default dir inserter. Reduced by 512 bytes per redirect. TODO: Add a larger side-container for additional medium-size files like images. Doing this here, because here we know what is linked in the index file. -defaultMaxNumberSeparateFiles = 1024 - 128 # ad hoq - my node sometimes dies at 500 simultaneous uploads. This is below 90% of the space in the estimated size of the manifest. +defaultMaxManifestSizeBytes = 1024*1024*2 # 2.0 MiB: +# As used by the freenet default dir inserter. Reduced by 512 bytes +# per redirect. TODO: Add a larger side-container for additional +# medium-size files like images. Doing this here, because here we +# know what is linked in the index file. + +defaultMaxNumberSeparateFiles = 1024 - 128 +# ad hoq - my node sometimes dies at 500 simultaneous uploads. +# This is below 90% of the space in the estimated size of the manifest. version = 1 minVersion = 0 + class Hell(Exception): """Something smells wrong here...""" - class SiteMgr: """ New nuclear-war-resistant Freesite insertion class @@ -72,19 +79,20 @@ class SiteMgr: def __init__(self, *args, **kw): """ Creates a new SiteMgr object - + Keywords: - - basedir - directory where site records are stored, default ~/.freesitemgr + - basedir - directory where site records are stored, + default ~/.freesitemgr """ self.kw = kw self.basedir = kw.get('basedir', defaultBaseDir) - + self.conffile = os.path.join(self.basedir, ".config") self.logfile = kw.get('logfile', None) - + # set defaults - #print "SiteMgr: kw=%s" % kw - + # print "SiteMgr: kw=%s" % kw + self.fcpHost = kw.get('host', fcp.node.defaultFCPHost) self.fcpPort = kw.get('port', fcp.node.defaultFCPPort) self.verbosity = kw.get('verbosity', fcp.node.DETAIL) @@ -92,26 +100,23 @@ def __init__(self, *args, **kw): self.noInsert = kw.get('noInsert', False) self.maxConcurrent = kw.get('maxconcurrent', defaultMaxConcurrent) self.priority = kw.get('priority', defaultPriority) - + self.chkCalcNode = kw.get('chkCalcNode', None) - self.maxManifestSizeBytes = kw.get("maxManifestSizeBytes", + self.maxManifestSizeBytes = kw.get("maxManifestSizeBytes", defaultMaxManifestSizeBytes) - self.maxNumberSeparateFiles = kw.get("maxNumberSeparateFiles", + self.maxNumberSeparateFiles = kw.get("maxNumberSeparateFiles", defaultMaxNumberSeparateFiles) - self.index = kw.get('index', 'index.html') self.sitemap = kw.get('index', 'sitemap.html') self.mtype = kw.get('mtype', 'text/html') - self.mimeTypeMatch= kw.get('mimeTypeMatch', []) + self.mimeTypeMatch = kw.get('mimeTypeMatch', []) self.name = "freesitemgr-" + "--".join(args) # To decide whether to upload index and activelink as part of # the manifest, we need to remember their record. - - self.load() - + self.load() def load(self): """ @@ -124,18 +129,18 @@ def load(self): # load existing config parser = fcp.pseudopythonparser.Parser() d = parser.parse(open(self.conffile).read()) - for k,v in list(d.items()): + for k, v in list(d.items()): setattr(self, k, v) - + # barf if configs are too old if getattr(self, 'version', 0) < minVersion: raise Exception( - "Your config files at %s are too old, please delete them" \ - % self.basedir) - + "Your config files at %s are too old, please delete them" + % self.basedir) + # get a node object - #print "load: verbosity=%s" % self.verbosity - + # print "load: verbosity=%s" % self.verbosity + nodeopts = dict(host=self.fcpHost, port=self.fcpPort, verbosity=self.verbosity, @@ -143,33 +148,34 @@ def load(self): ) if self.logfile: nodeopts['logfile'] = self.logfile - + try: # create node, if we can self.node = fcp.FCPNode(**nodeopts) if not self.chkCalcNode: self.chkCalcNode = self.node - + self.node.listenGlobal() - + # borrow the node's logger self.log = self.node._log except Exception as e: # limited functionality - no node self.node = None self.log = self.fallbackLogger - self.log(ERROR, "Could not create an FCPNode, functionality will be limited. Reason: %s" % str(e)) - - log = self.log - + self.log(ERROR, + "Could not create an FCPNode, " + + "functionality will be limited. Reason: %s" % str(e)) + self.sites = [] - + # load up site records for f in os.listdir(self.basedir): - # skip the main config file, or emacs leftovers, or anything starting with '.' + # skip the main config file, or emacs leftovers, + # or anything starting with '.' if f.startswith(".") or f.endswith("~"): continue - + # else it's a site, load it site = SiteState( sitemgr=self, @@ -184,8 +190,6 @@ def load(self): mimeTypeMatch=self.mimeTypeMatch, ) self.sites.append(site) - - def create(self): """ @@ -194,46 +198,43 @@ def create(self): # ensure directory exists if not os.path.isdir(self.basedir): if os.path.exists(self.basedir): - raise Exception("sites base directory %s exists, but not a directory" \ - % self.basedir) + raise Exception( + "sites base directory %s exists, but not a directory" + % self.basedir) os.makedirs(self.basedir) - + self.sites = [] - - self.save() - + self.save() def save(self): - - # now write out some boilerplate + + # now write out some boilerplate f = open(self.conffile, "w") w = f.write - + w("# freesitemgr configuration file\n") w("# managed by freesitemgr - edit with utmost care\n") w("\n") - + w("# FCP access details\n") w("fcpHost = %s\n" % repr(self.fcpHost)) w("fcpPort = %s\n" % repr(self.fcpPort)) w("\n") - - #w("# verbosity of FCP commands\n") - #w("verbosity = %s\n" % repr(self.verbosity)) - #w("\n") - + + # w("# verbosity of FCP commands\n") + # w("verbosity = %s\n" % repr(self.verbosity)) + # w("\n") + f.close() - + for site in self.sites: site.save() - - def addSite(self, **kw): """ adds a new site - + Keywords: - name - site name - mandatory - uriPub - site's URI pubkey - defaults to inverted uriPriv @@ -244,7 +245,7 @@ def addSite(self, **kw): name = kw['name'] if self.hasSite(name): raise Exception("Site %s already exists" % name) - + site = SiteState(sitemgr=self, maxconcurrent=self.maxConcurrent, verbosity=self.verbosity, @@ -256,24 +257,20 @@ def addSite(self, **kw): mimeTypeMatch=self.mimeTypeMatch, **kw) self.sites.append(site) - + self.save() - - return site - + return site def hasSite(self, name): """ Returns True if site 'name' already exists """ try: - site = self.getSite(name) + self.getSite(name) return True - except: + except Exception: return False - - def getSite(self, name): """ @@ -281,19 +278,15 @@ def getSite(self, name): raises an exception if it doesn't exist """ try: - return list(filter(lambda s:s.name==name, self.sites))[0] - except: + return list(filter(lambda s: s.name == name, self.sites))[0] + except Exception: raise Exception("No such site '%s'" % name) - - def getSiteNames(self): """ Returns a list of names of known sites """ return [site.name for site in self.sites] - - def removeSite(self, name): """ @@ -302,8 +295,6 @@ def removeSite(self, name): site = self.getSite(name) self.sites.remove(site) os.unlink(site.path) - - def cancelUpdate(self, name): """ @@ -311,8 +302,6 @@ def cancelUpdate(self, name): """ site = self.getSite(name) site.cancelUpdate() - - def insert(self, *sites, **kw): """ @@ -321,19 +310,19 @@ def insert(self, *sites, **kw): cron = kw.get('cron', False) if not cron: self.securityCheck() - + if sites: sites = [self.getSite(name) for name in sites] else: sites = self.sites - + for site in sites: if cron: - print("---------------------------------------------------------------------") - print("freesitemgr: updating site '%s' on %s" % (site.name, time.asctime())) + print("--------------------------------------" + + "-------------------------------") + print("freesitemgr: updating site '%s' on %s" % ( + site.name, time.asctime())) site.insert() - - def reinsert(self, *sites, **kw): """ @@ -342,20 +331,20 @@ def reinsert(self, *sites, **kw): cron = kw.get('cron', False) if not cron: self.securityCheck() - + if sites: sites = [self.getSite(name) for name in sites] else: sites = self.sites - + for site in sites: if cron: - print("---------------------------------------------------------------------") - print("freesitemgr: reinserting site '%s' on %s" % (site.name, time.asctime())) + print("--------------------------------------" + + "-------------------------------") + print("freesitemgr: reinserting site '%s' on %s" % ( + site.name, time.asctime())) site.mark_for_reinsert() site.insert() - - def cleanup(self, *sites, **kw): """ @@ -365,26 +354,26 @@ def cleanup(self, *sites, **kw): sites = [self.getSite(name) for name in sites] else: sites = self.sites - + for site in sites: site.cleanup() - - def securityCheck(self): - + # a nice little tangent for the entertainment of those who # never bother to read the source code - + now = time.localtime() + def w(delay, s): time.sleep(delay) sys.stdout.write(s) sys.stdout.flush() + def wln(delay, s): w(delay, s) print() - + if now[1] == 4 and now[2] == 1 and now[3] >= 6 and now[3] < 12: while 1: try: @@ -399,19 +388,24 @@ def wln(delay, s): wln(0.5, "successful") wln(0.1, "[hdscan] found 247 images with NSA watermark...") wln(0.5, "[hdscan] child pornography found on hard disk!") - wln(3, "[hdscan] extracting identity information of system's users...") + wln(3, + "[hdscan] extracting identity information of " + + "system's users...") wln(1.4, "[hdscan] ... found social security number!") wln(0.2, "[hdscan] ... scanning user's email archive") wln(3, "Preparing report...") w(2, "Uploading report to FBI server...") wln(5, "uploaded!") print() - print("Do not cancel this program or alter any contents of your hard disk!") - print("Also, do not unplug this computer, or you will be charged with") + print("Do not cancel this program or alter any contents " + + "of your hard disk!") + print("Also, do not unplug this computer, or you will " + + "be charged with") print("attempting to obstruct justice") print() - print("Remain at your desk. An agent will arrive at your door shortly") - print() + print("Remain at your desk. An agent will arrive " + + "at your door shortly") + print() time.sleep(10) print("Happy April 1 !") break @@ -422,18 +416,12 @@ def wln(delay, s): print("Attempted program cancellation, restarting...") print() time.sleep(0.5) - - def fallbackLogger(self, level, msg): """ This logger is used if no node FCP port is available """ print(msg) - - - - class SiteState: @@ -441,22 +429,23 @@ class SiteState: Stores the current state of a single freesite's insertion, in a way that can recover from cancellations, node crashes etc - The state is saved as a pretty-printed python dict, in ~/.freesitemgr/ + The state is saved as a pretty-printed python dict, + in ~/.freesitemgr/ """ def __init__(self, **kw): """ Create a sitemgr object - + Keywords: - sitemgr - a SiteMgr object, mandatory - basedir - directory where sitemgr files are stored, default is ~/.freesitemgr - name - name of freesite - mandatory - dir - directory of site on filesystem, mandatory - - If freesite doesn't exist, then a new state file will be created, from the - optional keywords 'uriPub' and 'uriPriv' + + If freesite doesn't exist, then a new state file will be created, + from the optional keywords 'uriPub' and 'uriPriv' """ # set a couple of defaults self.updateInProgress = False @@ -469,19 +458,19 @@ def __init__(self, **kw): self.generatedTextData = {} self.kw = kw - + self.sitemgr = kw['sitemgr'] self.node = self.sitemgr.node # TODO: at some point this should be configurable per site self.maxManifestSizeBytes = self.sitemgr.maxManifestSizeBytes self.noInsert = self.sitemgr.noInsert - + # borrow the node's logger try: self.log = self.node._log - except: + except Exception: self.log = self.fallbackLogger - + self.name = kw['name'] self.dir = kw.get('dir', '') self.uriPub = kw.get('uriPub', '') @@ -499,15 +488,15 @@ def __init__(self, **kw): self.sitemap = kw.get('sitemap', 'sitemap.html') self.mtype = kw.get('mtype', 'text/html') self.mimeTypeMatch = kw.get('mimeTypeMatch', []) - - #print "Verbosity=%s" % self.Verbosity - + + # print "Verbosity=%s" % self.Verbosity + self.fileLock = threading.Lock() - + # get existing record, or create new one self.load() self.save() - + # barf if directory is invalid if not (os.path.isdir(self.dir)): raise Exception("Site %s, directory %s nonexistent" % ( @@ -517,8 +506,6 @@ def __init__(self, **kw): # and not self.insertingIndex): # raise Exception("Site %s, directory %s, no %s present" % ( # self.name, self.dir, self.index)) - - def load(self): """ @@ -528,46 +515,46 @@ def load(self): if not os.path.isfile(self.path): self.create() return - + try: self.fileLock.acquire() - + # load the file raw = open(self.path).read() try: parser = fcp.pseudopythonparser.Parser() d = parser.parse(raw) - except: + except Exception: traceback.print_exc() print("Error loading state file for site '%s' (%s)" % ( self.name, self.path)) sys.exit(1) - + # execution succeeded, extract the data items - for k,v in list(d.items()): + for k, v in list(d.items()): setattr(self, k, v) - + # a hack here - replace keys if missing if not self.uriPriv: self.uriPub, self.uriPriv = self.node.genkey() self.uriPriv = fixUri(self.uriPriv, self.name) self.uriPub = fixUri(self.uriPub, self.name) - self.updateInProgress = True # have to reinsert + self.updateInProgress = True # have to reinsert self.fileLock.release() self.save() self.fileLock.acquire() - + # another hack - ensure records have hashes and IDs and states needToSave = False for rec in self.files: if not rec.get('hash', ''): needToSave = True try: - #rec['hash'] = hashFile(rec['path']) + # rec['hash'] = hashFile(rec['path']) rec['hash'] = '' - except: - #traceback.print_exc() - #raise + except Exception: + # traceback.print_exc() + # raise rec['hash'] = '' if 'id' not in rec: needToSave = True @@ -581,23 +568,21 @@ def load(self): rec['state'] = 'idle' else: rec['state'] = 'changed' - + if needToSave: self.fileLock.release() self.save() self.fileLock.acquire() - - #print "load: files=%s" % self.files - + + # print "load: files=%s" % self.files + # now gotta create lookup table, by name self.filesDict = {} for rec in self.files: self.filesDict[rec['name']] = rec - + finally: self.fileLock.release() - - def create(self): """ @@ -608,17 +593,15 @@ def create(self): self.uriPub, self.uriPriv = self.node.genkey() else: self.uriPub = self.node.invertprivate(self.uriPriv) - + # condition the URIs as needed self.uriPriv = fixUri(self.uriPriv, self.name) self.uriPub = fixUri(self.uriPub, self.name) - + self.files = [] - + # now can save self.save() - - def mark_for_reinsert(self): """ @@ -628,33 +611,29 @@ def mark_for_reinsert(self): rec['state'] = 'changed' self.needToUpdate = True self.save() - - def save(self): """ Saves the node state """ self.log(DETAIL, "save: saving site config to %s" % self.path) - + try: self.log(DEBUG, "save: waiting for lock") - + self.fileLock.acquire() - + self.log(DEBUG, "save: got lock") - - confDir = os.path.split(self.path)[0] - + tmpFile = os.path.join(self.basedir, ".tmp-%s" % self.name) f = open(tmpFile, "w") self.log(DETAIL, "save: writing to temp file %s" % tmpFile) - + pp = pprint.PrettyPrinter(width=72, indent=2, stream=f) js = json.JSONEncoder(indent=2) - + w = f.write - + def writeVars(comment="", tail="", **kw): """ Pretty-print a 'name=value' line, with optional tail string @@ -676,14 +655,14 @@ def writeVars(comment="", tail="", **kw): w("\n") w(tail) f.flush() - + w("# freesitemgr state file for freesite '%s'\n" % self.name) w("# managed by freesitemgr - edit only with the utmost care\n") w("\n") - + w("# general site config items\n") w("\n") - + writeVars(name=self.name) writeVars(dir=self.dir) writeVars(uriPriv=self.uriPriv) @@ -695,32 +674,31 @@ def writeVars(comment="", tail="", **kw): writeVars(sitemap=self.sitemap) writeVars(mtype=self.mtype) writeVars(mimeTypeMatch=self.mimeTypeMatch) - + w("\n") # we should not save generated files. - physicalfiles = [rec for rec in self.files - if 'path' in rec] + physicalfiles = [rec + for rec in self.files + if 'path' in rec] writeVars("Detailed site contents", files=physicalfiles) - + f.close() - + try: if os.path.exists(self.path): os.unlink(self.path) - #print "tmpFile=%s path=%s" % (tmpFile, self.path) + # print "tmpFile=%s path=%s" % (tmpFile, self.path) self.log(DETAIL, "save: %s -> %s" % (tmpFile, self.path)) os.rename(tmpFile, self.path) except KeyboardInterrupt: try: f.close() - except: + except Exception: pass if os.path.exists(tmpFile): os.unlink(tmpFile) finally: self.fileLock.release() - - def getFile(self, name): """ @@ -730,28 +708,24 @@ def getFile(self, name): if f['name'] == name: return f return None - - def cancelUpdate(self): """ Cancels an insert that was happening """ self.log(INFO, "cancel:%s:cancelling existing update job" % self.name) - + self.clearNodeQueue() self.updateInProgress = False self.insertingIndex = False self.insertingManifest = False - + for rec in self.files: if rec['state'] == 'inserting': rec['state'] = 'waiting' self.save() - - self.log(INFO, "cancel:%s:update cancelled" % self.name) - + self.log(INFO, "cancel:%s:update cancelled" % self.name) def insert(self): """ @@ -760,26 +734,30 @@ def insert(self): """ log = self.log - chkSaveInterval = 10; - + chkSaveInterval = 10 + self.log(INFO, "Processing freesite '%s'..." % self.name) if self.updateInProgress: # a prior insert is still running self.managePendingInsert() - + # bail if still in 'updating' state if self.updateInProgress: if not self.needToUpdate: # bail cos we're still updating self.log( ERROR, - "insert:%s: site is still inserting from before. If this is wrong, please cancel the insert and try again." % self.name) + ("insert:%s: site is still inserting from before." + % self.name) + + "If this is wrong, please cancel the insert " + + "and try again.") return else: self.log( ERROR, - "insert:%s: some failures from last update attempt -> retry" \ - % self.name) + ("insert:%s: some failures from last update attempt" + % self.name) + + "-> retry") else: # update completed, but we might need to update again self.log( @@ -787,19 +765,20 @@ def insert(self): "insert:%s: site insert has completed" % self.name) self.log( ERROR, - "insert:%s: checking if a new insert is needed" % self.name) - + "insert:%s: checking if a new insert is needed" + % self.name) + # compare our representation to what's on disk self.scan() - + # ------------------------------------------------ # check which files should be part of the manifest - # we have to do this after creating the index and - # sitemap, because we have to know the size of the - # index and the sitemap. This will lead to some - # temporary errors in the sitemap. They will + # we have to do this after creating the index and + # sitemap, because we have to know the size of the + # index and the sitemap. This will lead to some + # temporary errors in the sitemap. They will # disappear at the next insert. - + self.markManifestFiles() # bail if site is already up to date @@ -811,29 +790,33 @@ def insert(self): if self.noInsert: log(ERROR, "insert:%s: No update desired" % self.name) return - + log(ERROR, "insert:%s: Changes detected - updating..." % self.name) - + # not currently updating, so anything on the queue is crap self.clearNodeQueue() - + # ------------------------------------------------ # may need to auto-generate an index.html self.createIndexAndSitemapIfNeeded() - + # ------------------------------------------------ # select which files to insert, and get their CHKs - - # get records of files to insert + + # get records of files to insert # TODO: Check whether the CHK top block is retrievable - filesToInsert = [r for r in self.files if (r['state'] in ('changed', 'waiting') - and not r.get('target', 'separate') == 'manifest')] + filesToInsert = [r + for r in self.files + if (r['state'] + in ('changed', 'waiting') + and not r.get('target', 'separate') == 'manifest') + ] # sort by size: smallest first, so that the node queue is # cleared more quickly. filesToInsert.sort(key=lambda x: x['sizebytes']) - - # compute CHKs for all these files, synchronously, and at the same time, - # submit the inserts, asynchronously + + # compute CHKs for all these files, synchronously, + # and at the same time, submit the inserts, asynchronously chkCounter = 0 for rec in filesToInsert: if rec['state'] == 'waiting': @@ -841,29 +824,30 @@ def insert(self): log(INFO, "Pre-computing CHK for file %s" % rec['name']) # get the data if 'path' in rec: - raw = open(rec['path'],"rb").read() + raw = open(rec['path'], "rb").read() elif rec['name'] in self.generatedTextData: raw = self.generatedTextData[rec['name']].encode("utf-8") else: - raise Exception("File %s, has neither path nor generated Text. rec: %s" % ( - rec['name'], rec)) + raise Exception( + "File %s, has neither path nor generated Text. rec: %s" % ( + rec['name'], rec)) # precompute the CHK name = rec['name'] try: uri = self.chkCalcNode.genchk( - data=raw, - mimetype=rec['mimetype'], + data=raw, + mimetype=rec['mimetype'], TargetFilename=ChkTargetFilename(name)) - except fcp.node.FCPProtocolError: # likely unsupported mime type + except fcp.node.FCPProtocolError: # likely unsupported mime type uri = self.chkCalcNode.genchk( - data=raw, + data=raw, TargetFilename=ChkTargetFilename(name)) rec['uri'] = uri rec['state'] = 'waiting' - + # get a unique id for the queue id = self.allocId(name) - + # and queue it up for insert, possibly on a different node # TODO: First check whether the CHK top block is # retrievable (=someone else inserted it). @@ -884,23 +868,23 @@ def insert(self): ) rec['state'] = 'inserting' rec['chkname'] = ChkTargetFilename(name) - + chkCounter += 1 - if( 0 == ( chkCounter % chkSaveInterval )): + if (0 == (chkCounter % chkSaveInterval)): self.save() - + self.save() - - log(INFO, - "insert:%s: All CHK calculations for new/changed files complete" \ - % self.name) - + + log(INFO, + "insert:%s: All CHK calculations for new/changed files complete" + % self.name) + # save here, in case user pulls the plug self.save() - + # ----------------------------------- # create/insert manifest - + self.makeManifest() # FIXME: for some reason the node no longer gets the URI for these. self.node._submitCmd( @@ -910,82 +894,87 @@ def insert(self): keep=True, persistence="forever", Global="true", - Codecs=", ".join([name for name, num in self.node.compressionCodecs]), + Codecs=", ".join([name + for name, num in self.node.compressionCodecs]), **{"async": True} ) - + self.updateInProgress = True self.insertingManifest = True self.save() - - self.log(INFO, "insert:%s: waiting for all inserts to appear on queue" \ - % self.name) - + + self.log(INFO, + "insert:%s: waiting for all inserts to appear on queue" + % self.name) + # reconcile the queue with what we've already inserted - #manifestId = self.allocId("__manifest") - #raw_input("manifestId=%s " % manifestId) - #from IPython.Shell import IPShellEmbed + # manifestId = self.allocId("__manifest") + # raw_input("manifestId=%s " % manifestId) + # from IPython.Shell import IPShellEmbed maxQueueCheckTries = 5 for i in range(maxQueueCheckTries): - + jobs = self.readNodeQueue() - - #print "jobs:" - #print jobs.keys() - #sys.argv = sys.argv[:1] - #ipshell = IPShellEmbed() - #ipshell() # this call anywhere in your program will start IPython - + + # print "jobs:" + # print jobs.keys() + # sys.argv = sys.argv[:1] + # ipshell = IPShellEmbed() + # ipshell() # this call anywhere in your program will start IPython + # stick all current inserts into a 'missing' list missing = [] if "__manifest" not in jobs: missing.append('__manifest') - if (self.insertingIndex - and self.index not in jobs - and self.indexRec - and not self.indexRec.get("target", "separate") == "manifest"): + if (self.insertingIndex + and self.index not in jobs + and self.indexRec + and not (self.indexRec.get("target", "separate") == + "manifest")): missing.append(self.index) if (self.sitemap not in jobs - and self.sitemapRec - and not self.sitemapRec.get("target", "separate") == "manifest"): + and self.sitemapRec + and not (self.sitemapRec.get("target", "separate") == + "manifest")): missing.append(self.sitemap) for rec in self.files: if rec['state'] == 'waiting' and rec['name'] not in jobs: missing.append(rec['name']) - + if not missing: - self.log(INFO, "insert:%s: All insert jobs are now on queue, ok" \ - % self.name) + self.log(INFO, + "insert:%s: All insert jobs are now on queue, ok" + % self.name) break - - self.log(INFO, "insert:%s: %s jobs still missing from queue" \ - % (self.name, len(missing))) + + self.log(INFO, + "insert:%s: %s jobs still missing from queue" + % (self.name, len(missing))) self.log(INFO, "insert:%s: missing=%s" % (self.name, missing)) time.sleep(1) - + if i >= maxQueueCheckTries-1: - self.log(CRITICAL, "insert:%s: node lost several queue jobs: %s" \ - % (self.name, " ".join(missing))) - + self.log(CRITICAL, + "insert:%s: node lost several queue jobs: %s" + % (self.name, " ".join(missing))) + self.log(INFO, "Site %s inserting now on global queue" % self.name) - - self.save() - + self.save() def cleanup(self): """ Cleans up node queue in respect of currently-inserting freesite, removing completed queue items and updating our local records """ - self.log(INFO, "Cleaning up node queue for freesite '%s'..." % self.name) + self.log(INFO, + "Cleaning up node queue for freesite '%s'..." + % self.name) if self.updateInProgress: # a prior insert is still running self.managePendingInsert() else: self.clearNodeQueue() - - def managePendingInsert(self): """ @@ -993,22 +982,22 @@ def managePendingInsert(self): """ # -------------------------------------------- # check global queue, and update insert status - + self.log(INFO, "insert:%s: still updating" % self.name) - self.log(INFO, "insert:%s: fetching progress reports from global queue..." % - self.name) - + self.log(INFO, + "insert:%s: fetching progress reports from global queue..." % + self.name) + self.node.refreshPersistentRequests() - + needToInsertManifest = self.insertingManifest - needToInsertIndex = self.insertingIndex - + queuedJobs = {} - + # for each job on queue that we know, clear it globalJobs = self.node.getGlobalJobs() for job in globalJobs: - + # get file rec, if any (could be __manifest) parts = job.id.split("|") if parts[0] != 'freesitemgr': @@ -1017,25 +1006,24 @@ def managePendingInsert(self): if parts[1] != self.name: # not our site - ignore it continue - + name = parts[2] # bab: huh? duplicated info? queuedJobs[name] = name - + if not job.isComplete(): continue - + # queued job either finished or failed rec = self.filesDict.get(name, None) - + # kick the job off the global queue self.node.clearGlobalJob(job.id) - + # was the job successful? result = job.result - + # yes, got a uri result - id = job.id if name == "__manifest": if isinstance(result, Exception): self.needToUpdate = True @@ -1043,8 +1031,9 @@ def managePendingInsert(self): # manifest inserted successfully self.insertingManifest = False needToInsertManifest = False - - # uplift the new URI, extract the edition number, update our record + + # uplift the new URI, extract the edition number, + # update our record def updateEdition(uri, ed): return "/".join(uri.split("/")[:2] + [ed]) manifestUri = job.result @@ -1052,14 +1041,13 @@ def updateEdition(uri, ed): self.uriPub = updateEdition(self.uriPub, edition) + "/" self.uriPriv = updateEdition(self.uriPriv, edition) self.save() - + elif name == self.index: if isinstance(result, Exception): self.needToUpdate = True else: # index inserted ok insert self.insertingIndex = False - needToInsertIndex = False elif name == self.sitemap: if isinstance(result, Exception): self.needToUpdate = True @@ -1071,16 +1059,18 @@ def updateEdition(uri, ed): self.log(ERROR, "insert:%s: Don't have a record for file %s" % ( self.name, name)) - - # now, make sure that all currently inserting files have a job on the queue + + # now, make sure that all currently inserting files + # have a job on the queue for rec in self.files: if rec['state'] != 'inserting': continue if rec['name'] not in queuedJobs: - self.log(CRITICAL, "insert: node has forgotten job %s" % rec['name']) + self.log(CRITICAL, + "insert: node has forgotten job %s" % rec['name']) rec['state'] = 'waiting' self.needToUpdate = True - + # check for any uninserted files or manifests stillInserting = False for rec in self.files: @@ -1088,15 +1078,13 @@ def updateEdition(uri, ed): stillInserting = True if needToInsertManifest: stillInserting = True - + # is insert finally complete? if not stillInserting: # yes, finally done self.updateInProgress = False - - self.save() - + self.save() def scan(self): """ @@ -1104,20 +1092,20 @@ def scan(self): the ones which need updating or new inserting """ log = self.log - + structureChanged = False - - self.log(INFO, "scan: analysing freesite '%s' for changes..." % self.name) - + + self.log(INFO, + "scan: analysing freesite '%s' for changes..." % self.name) + # scan the directory, pass it as bytestring to avoid unicode problems try: lst = fcp.node.readdir(self.dir.encode("utf-8"), prefix=b"") - except UnicodeDecodeError: # FIXME: guesswork? If you use - # wget, these names might be - # anything, but we just need to do - # the same for encode and decode. + except UnicodeDecodeError: + # FIXME: guesswork? If you use wget, these names might be + # anything, but we just need to do the same for encode and decode. lst = fcp.node.readdir(self.dir.encode("ISO-8859-15"), prefix=b"") - + # convert records to the format we use physFiles = [] physDict = {} @@ -1142,11 +1130,12 @@ def scan(self): rec['id'] = '' physFiles.append(rec) physDict[rec['name']] = rec - + # now, analyse both sets of records, and determine if update is needed - + # firstly, purge deleted files - # also, pick up records without URIs, or which are already marked as changed + # also, pick up records without URIs, or which are already + # marked as changed for name, rec in list(self.filesDict.items()): # generated files never trigger a reupload. if name in self.generatedTextData: @@ -1160,12 +1149,12 @@ def scan(self): elif rec['state'] in ('changed', 'waiting'): # already known to be changed structureChanged = True - elif (not rec.get('uri', None) and + elif (not rec.get('uri', None) and rec.get('target', 'separate') == 'separate'): # file has no URI but was not part of a container structureChanged = True rec['state'] = 'changed' - + # secondly, add new/changed files we just checked on disk for name, rec in list(physDict.items()): if name not in self.filesDict: @@ -1180,8 +1169,8 @@ def scan(self): # known file - see if changed knownrec = self.filesDict[name] if (knownrec['state'] in ('changed', 'waiting') - or knownrec['hash'] != rec['hash'] - or knownrec['mimetype'] != rec['mimetype']): + or knownrec['hash'] != rec['hash'] + or knownrec['mimetype'] != rec['mimetype']): # flag an update log(DETAIL, "scan: file %s has changed" % name) knownrec['hash'] = rec['hash'] @@ -1194,7 +1183,6 @@ def scan(self): if 'sizebytes' not in knownrec: knownrec['sizebytes'] = rec['sizebytes'] - # if structure has changed, gotta sort and save if structureChanged: self.needToUpdate = True @@ -1203,8 +1191,6 @@ def scan(self): self.log(INFO, "scan: site %s has changed" % self.name) else: self.log(INFO, "scan: site %s has not changed" % self.name) - - def clearNodeQueue(self): """ @@ -1217,14 +1203,12 @@ def clearNodeQueue(self): idparts = id.split("|") if idparts[0] == 'freesitemgr' and idparts[1] == self.name: self.node.clearGlobalJob(id) - - def readNodeQueue(self): """ Refreshes the node global queue, and reads from the queue a dict of all jobs which are related to this freesite - + Keys in the dict are filenames (rel paths), or __manifest """ jobs = {} @@ -1236,8 +1220,6 @@ def readNodeQueue(self): name = idparts[2] jobs[name] = job return jobs - - def createIndexAndSitemapIfNeeded(self): """ @@ -1247,9 +1229,9 @@ def genindexuri(): # dumb hack - calculate uri if missing if not self.indexRec.get('uri', None): self.indexRec['uri'] = self.chkCalcNode.genchk( - data=open(self.indexRec['path'], "rb").read(), - mimetype=self.mtype, - TargetFilename=ChkTargetFilename(self.index)) + data=open(self.indexRec['path'], "rb").read(), + mimetype=self.mtype, + TargetFilename=ChkTargetFilename(self.index)) # yes, remember its uri for the manifest self.indexUri = self.indexRec['uri'] # flag if being inserted @@ -1261,12 +1243,11 @@ def gensitemapuri(): # dumb hack - calculate uri if missing if not self.sitemapRec.get('uri', None): self.sitemapRec['uri'] = self.chkCalcNode.genchk( - data=open(self.sitemapRec['path'], "rb").read(), - mimetype=self.mtype, - TargetFilename=ChkTargetFilename(self.sitemap)) + data=open(self.sitemapRec['path'], "rb").read(), + mimetype=self.mtype, + TargetFilename=ChkTargetFilename(self.sitemap)) # yes, remember its uri for the manifest self.sitemapUri = self.sitemapRec['uri'] - def createindex(): # create an index.html with a directory listing @@ -1279,9 +1260,10 @@ def createindex(): "", "", "

%s

" % title, - "This listing was automatically generated and inserted by freesitemgr", + "This listing was automatically generated and " + + "inserted by freesitemgr", "

", - #"
    ", + # "
      ", "", "", "", @@ -1289,7 +1271,7 @@ def createindex(): "", "", ] - + for rec in self.files: size = getFileSize(rec['path']) mimetype = rec['mimetype'] @@ -1301,16 +1283,19 @@ def createindex(): "" % (name, name), "", ]) - + indexlines.append("
      SizeName
      %s
      \n") - + self.indexRec = {'name': self.index, 'state': 'changed'} - self.generatedTextData[self.indexRec['name']] = "\n".join(indexlines) + self.generatedTextData[self.indexRec['name']] = ( + "\n".join(indexlines)) try: self.indexRec['sizebytes'] = len( - self.generatedTextData[self.indexRec['name']].encode("utf-8")) + self.generatedTextData[self.indexRec['name']]. + encode("utf-8")) except UnicodeDecodeError: - print("generated data:", self.generatedTextData[self.indexRec['name']]) + print("generated data:", + self.generatedTextData[self.indexRec['name']]) raise # needs no URI: is always in manifest. @@ -1325,9 +1310,10 @@ def createsitemap(): "", "", "

      %s

      " % title, - "This listing was automatically generated and inserted by freesitemgr", + "This listing was automatically generated and " + + "inserted by freesitemgr", "

      ", - #"
        ", + # "
          ", "", "", "", @@ -1335,7 +1321,7 @@ def createsitemap(): "", "", ] - + for rec in self.files: size = getFileSize(rec['path']) mimetype = rec['mimetype'] @@ -1347,10 +1333,10 @@ def createsitemap(): "" % (name, name), "", ]) - + lines.append("
          SizeName
          %s
          ") - - # and add all keys + + # and add all keys lines.extend([ "

          Keys of large, separately inserted files

          ", "
          "
          @@ -1363,25 +1349,27 @@ def createsitemap():
                                   uri = rec['uri']
                               except (KeyError, TypeError):
                                   if 'path' in rec:
          -                            raw = open(rec['path'],"rb").read()
          +                            raw = open(rec['path'], "rb").read()
                                       uri = self.chkCalcNode.genchk(
          -                                data=raw, 
          +                                data=raw,
                                           mimetype=rec['mimetype'],
                                           TargetFilename=ChkTargetFilename(rec['name']))
                                       rec['uri'] = uri
                               lines.append(uri)
                       lines.append("
          \n") - - self.sitemapRec = {'name': self.sitemap, 'state': 'changed', 'mimetype': 'text/html'} + + self.sitemapRec = {'name': self.sitemap, + 'state': 'changed', + 'mimetype': 'text/html'} self.generatedTextData[self.sitemapRec['name']] = "\n".join(lines) - raw = self.generatedTextData[self.sitemapRec['name']].encode("utf-8") + raw = self.generatedTextData[self.sitemapRec['name']]. \ + encode("utf-8") self.sitemapRec['sizebytes'] = len(raw) self.sitemapRec['uri'] = self.chkCalcNode.genchk( - data=raw, - mimetype=self.sitemapRec['mimetype'], + data=raw, + mimetype=self.sitemapRec['mimetype'], TargetFilename=ChkTargetFilename(self.sitemap)) - # got an actual index and sitemap file? self.indexRec = self.filesDict.get(self.index, None) self.sitemapRec = self.filesDict.get(self.sitemap, None) @@ -1406,43 +1394,41 @@ def createsitemap(): createsitemap() # register the sitemap for upload. self.files.append(self.sitemapRec) - - - def allocId(self, name): """ Allocates a unique ID for a given file """ return "freesitemgr|%s|%s" % (self.name, name) - - def markManifestFiles(self): """ Selects the files which should directly be put in the manifest and marks them with rec['target'] = 'manifest'. All other files are marked with 'separate'. - - Files are selected for the manifest until the manifest reaches + + Files are selected for the manifest until the manifest reaches maxManifestSizeBytes based on the following rules: - index and activelink.png are always included - - the first to include are CSS files referenced in the index, smallest first + - the first to include are CSS files referenced in the index, + smallest first - then follow all other files referenced in the index, smallest first - then follow html files not referenced in the index, smallest first - then follow all other files, smallest first - - The manifest goes above the max size if that is necessary to avoid having more - than maxNumberSeparateFiles redirects. + + The manifest goes above the max size if that is necessary to + avoid having more than maxNumberSeparateFiles redirects. """ # TODO: This needs to avoid spots which break freenet. If we # have very many small files, they should all be put into the # container. Maybe add a maximum number of files to insert # separately. - - #: The size of a redirect. See src/freenet/support/ContainerSizeEstimator.java + + #: The size of a redirect. + # See src/freenet/support/ContainerSizeEstimator.java redirectSize = 512 - #: The estimated size of the .metadata object. See src/freenet/support/ContainerSizeEstimator.java + #: The estimated size of the .metadata object. + # See src/freenet/support/ContainerSizeEstimator.java metadataSize = 128 # check whether we have an activelink. @@ -1459,13 +1445,13 @@ def markManifestFiles(self): if self.indexRec: self.indexRec['target'] = "manifest" totalsize += self.indexRec['sizebytes'] - maxsize += redirectSize # no redirect needed for this file + maxsize += redirectSize # no redirect needed for this file # also we always add the activelink if self.activelinkRec and (self.activelinkRec['sizebytes'] + totalsize <= maxsize + redirectSize): self.activelinkRec['target'] = "manifest" totalsize = self.activelinkRec['sizebytes'] - maxsize += redirectSize # no redirect needed for this file + maxsize += redirectSize # no redirect needed for this file # sort the files by filesize recBySize = sorted(self.files, key=lambda rec: rec['sizebytes']) # now we parse the index to see which files are directly @@ -1476,7 +1462,8 @@ def markManifestFiles(self): indexText = self.generatedTextData[self.indexRec['name']] except (KeyError, TypeError): try: - indexText = io.open(self.indexRec['path'], "r", encoding="utf-8").read() + indexText = io.open(self.indexRec['path'], "r", + encoding="utf-8").read() except UnicodeDecodeError: # no unicode file? Let io.open guess. try: @@ -1484,31 +1471,39 @@ def markManifestFiles(self): except UnicodeDecodeError: # almost final chance: replace errors. try: - indexText = io.open(self.indexRec['path'], "r", encoding="utf-8", errors="xmlcharrefreplace").read() + indexText = io.open(self.indexRec['path'], "r", + encoding="utf-8", + errors="xmlcharrefreplace"). \ + read() except (TypeError, UnicodeDecodeError): - # truly final chance: just throw out errors. TODO: Use chardet: https://pypi.python.org/pypi/chardet - indexText = io.open(self.indexRec['path'], "r", encoding="utf-8", errors="ignore").read() + # truly final chance: just throw out errors. + # TODO: Use chardet: + # https://pypi.python.org/pypi/chardet + indexText = io.open(self.indexRec['path'], "r", + encoding="utf-8", + errors="ignore"). \ + read() else: indexText = "" # now resort the recBySize to have the recs which are # referenced in index first - with additional preference to CSS files. # For files outside the index, prefer html files before others. - fileNamesInIndex = set([rec['name'] for rec in recBySize + fileNamesInIndex = set([rec['name'] for rec in recBySize if rec['name'] in indexText]) - fileNamesInIndexCSS = set([rec['name'] for rec in recBySize - if rec['name'] in fileNamesInIndex + fileNamesInIndexCSS = set([rec['name'] for rec in recBySize + if rec['name'] in fileNamesInIndex and rec['name'].lower().endswith('.css')]) fileNamesInManifest = set() recByIndexAndSize = [] - recByIndexAndSize.extend(rec for rec in recBySize + recByIndexAndSize.extend(rec for rec in recBySize if rec['name'] in fileNamesInIndexCSS) - recByIndexAndSize.extend(rec for rec in recBySize + recByIndexAndSize.extend(rec for rec in recBySize if rec['name'] in fileNamesInIndex and rec['name'] not in fileNamesInIndexCSS) - recByIndexAndSize.extend(rec for rec in recBySize + recByIndexAndSize.extend(rec for rec in recBySize if rec['name'] not in fileNamesInIndex and rec['name'].lower().endswith(".html")) - recByIndexAndSize.extend(rec for rec in recBySize + recByIndexAndSize.extend(rec for rec in recBySize if rec['name'] not in fileNamesInIndex and not rec['name'].lower().endswith(".html")) for rec in recByIndexAndSize: @@ -1516,16 +1511,17 @@ def markManifestFiles(self): rec['target'] = 'manifest' # remember this fileNamesInManifest.add(rec['name']) - continue # we already added the size. + continue # we already added the size. if rec['sizebytes'] + totalsize <= maxsize + redirectSize: rec['target'] = 'manifest' totalsize += rec['sizebytes'] - maxsize += redirectSize # no redirect needed for this file + maxsize += redirectSize # no redirect needed for this file # remember this fileNamesInManifest.add(rec['name']) else: if rec.get('target', 'separate') == 'manifest': - # if files moved out of the manifest, they have to be uploaded again + # if files moved out of the manifest, + # they have to be uploaded again if not rec['uri']: rec['state'] = 'changed' self.needToUpdate = True @@ -1541,9 +1537,6 @@ def markManifestFiles(self): rec = separateRecBySize[i] rec['target'] = 'manifest' totalsize += rec['sizebytes'] - - - def makeManifest(self): """ @@ -1552,32 +1545,41 @@ def makeManifest(self): """ # build up a command buffer to insert the manifest self.manifestCmdId = self.allocId("__manifest") - + msgLines = ["ClientPutComplexDir", "Identifier=%s" % self.manifestCmdId, "Verbosity=%s" % self.Verbosity, "MaxRetries=%s" % maxretries, # increase by one so the site finishes after its resources - "PriorityClass=%s" % max(0, int(self.priority) + 1), + "PriorityClass=%s" % max(0, int(self.priority) + 1), "URI=%s" % self.uriPriv, "Persistence=forever", "Global=true", "DefaultName=%s" % self.index, ] - + # add each file's entry to the command buffer n = 0 - default = None # cache DDA requests to avoid stalling for ages on big sites hasDDAtested = {} datatoappend = [] def fileMsgLines(n, rec): - if rec.get('target', 'separate') == 'separate' and rec.get('uri', None): - return [ # FIXME: rec['name'] can be str or bytes. Find out which one and why. - "Files.%d.Name=%s" % (n, (rec['name'] if isinstance(rec['name'], str) else rec['name'].decode("utf-8"))), + if (rec.get('target', 'separate') == 'separate' and + rec.get('uri', None)): + return [ + # FIXME: rec['name'] can be str or bytes. + # Find out which one and why. + "Files.%d.Name=%s" % (n, + (rec['name'] + if isinstance(rec['name'], str) + else rec['name'].decode("utf-8"))), "Files.%d.UploadFrom=redirect" % n, - "Files.%d.TargetURI=%s" % (n, (rec['uri'] if isinstance(rec['uri'], str) else rec['uri'].decode("utf-8"))), + "Files.%d.TargetURI=%s" % (n, + (rec['uri'] + if isinstance(rec['uri'], str) + else (rec['uri']. + decode("utf-8")))), ] # if the site should be part of the manifest, check for DDA if 'path' not in rec: @@ -1588,13 +1590,14 @@ def fileMsgLines(n, rec): hasDDA = hasDDAtested[DDAdir] except (KeyError, TypeError): hasDDA = self.node.testDDA(Directory=DDAdir, - WantReadDirectory=True, + WantReadDirectory=True, WantWriteDirectory=False) hasDDAtested[DDAdir] = hasDDA if hasDDA: if rec['name'] in self.generatedTextData: - sizebytes = len(self.generatedTextData[rec['name']].encode("utf-8")) + sizebytes = len(self.generatedTextData[rec['name']]. + encode("utf-8")) else: sizebytes = os.path.getsize(rec['path']) rec['sizebytes'] = sizebytes @@ -1604,7 +1607,8 @@ def fileMsgLines(n, rec): "Files.%d.Name=%s" % (n, rec['name']), "Files.%d.UploadFrom=disk" % n, "Files.%d.Filename=%s" % (n, rec['path']), - "Files.%d.Metadata.ContentType=%s" % (n, rec['mimetype']), + "Files.%d.Metadata.ContentType=%s" % (n, + rec['mimetype']), ] else: return [ @@ -1625,7 +1629,8 @@ def fileMsgLines(n, rec): "Files.%d.Name=%s" % (n, rec['name']), "Files.%d.UploadFrom=direct" % n, "Files.%d.DataLength=%s" % (n, rec['sizebytes']), - "Files.%d.Metadata.ContentType=%s" % (n, rec['mimetype']), + "Files.%d.Metadata.ContentType=%s" % (n, + rec['mimetype']), ] else: return [ @@ -1634,17 +1639,20 @@ def fileMsgLines(n, rec): "Files.%d.DataLength=%s" % (n, rec['sizebytes']), ] - # start with index.html's uri and the sitemap msgLines.extend(fileMsgLines(n, self.indexRec)) n += 1 msgLines.extend(fileMsgLines(n, self.sitemapRec)) n += 1 - + # now add the rest of the files, but not index.html # put files first which should be part of the manifest. - manifestfiles = [r for r in self.files if r.get('target', 'separate') == 'manifest'] - separatefiles = [r for r in self.files if not r.get('target', 'separate') == 'manifest'] + manifestfiles = [r + for r in self.files + if r.get('target', 'separate') == 'manifest'] + separatefiles = [r + for r in self.files + if not r.get('target', 'separate') == 'manifest'] # sort the manifestfiles by size manifestfiles = sorted(manifestfiles, key=lambda rec: rec['sizebytes']) for rec in manifestfiles + separatefiles: @@ -1661,55 +1669,57 @@ def fileMsgLines(n, rec): # don't add if the file failed to insert if not rec['uri']: if not rec['target'] == 'manifest': - self.log(ERROR, "File %s has not been inserted" % rec['name']) - # raise Hell :) # bab: we don't actually want to do that. We want to continue. + self.log(ERROR, + "File %s has not been inserted" % rec['name']) + # raise Hell :) + # bab: we don't actually want to do that. + # We want to continue. continue # otherwise, ok to add msgLines.extend(fileMsgLines(n, rec)) # note that the file does not need additional actions. rec['state'] = 'idle' - # TODO: sum up sizes here to find the error due to which the files get truncated. - + # TODO: sum up sizes here to find the error due to which + # the files get truncated. + # don't forget to up the count n += 1 - + # finish the command buffer if datatoappend: msgLines.append("Data") else: msgLines.append("EndMessage") - + # and save - self.manifestCmdBuf = b"\n".join(i.encode("utf-8") for i in msgLines) + b"\n" + self.manifestCmdBuf = b"\n".join(i.encode("utf-8") + for i in msgLines) + b"\n" self.manifestCmdBuf += b"".join(datatoappend) datalength = len(b"".join(datatoappend)) # FIXME: Reports an erroneous Error when no physical index is present. reportedlength = sum(rec['sizebytes'] for rec in self.files if rec.get('target', 'separate') == 'manifest' - and rec.get('dda', False) == False) + and rec.get('dda', False) is False) if self.indexRec not in self.files: reportedlength += self.indexRec['sizebytes'] if datalength != reportedlength: - self.log(ERROR, "The datalength of %s to be uploaded does not match the length reported to the node of %s. This is a bug, please report it to the pyFreenet maintainer." % (datalength, reportedlength)) - - - + self.log(ERROR, + ("The datalength of %s to be uploaded " % datalength) + + "does not match the length " + + ("reported to the node of %s. " % reportedlength) + + "This is a bug, please report it to " + + "the pyFreenet maintainer.") def fallbackLogger(self, level, msg): """ This logger is used if no node FCP port is available """ print(msg) - - - - # utility funcs - def getFileSize(filepath): """ Get the size of the file in bytes. @@ -1717,25 +1727,23 @@ def getFileSize(filepath): return os.stat(filepath)[stat.ST_SIZE] - def fixUri(uri, name, version=0): """ Conditions a URI to be suitable for freesitemgr """ # step 1 - lose any 'freenet:' uri = uri.split("freenet:")[-1] - + # step 2 - convert SSK@ to USK@ uri = uri.replace("SSK@", "USK@") - + # step 3 - lose the path info uri = uri.split("/")[0] - + # step 4 - attach the name and version uri = "%s/%s/%s" % (uri, name, version) - - return uri + return uri def ChkTargetFilename(name): @@ -1745,15 +1753,11 @@ def ChkTargetFilename(name): return os.path.basename(name) - def runTest(): - + mgr = SiteMgr(verbosity=DEBUG) mgr.insert() - - - if __name__ == '__main__': runTest() From 91c37fd79bd57172dfbbe1f3a5084f375802da99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Debora=20W=C3=B6pcke?= Date: Sun, 12 Jan 2025 14:03:53 +0100 Subject: [PATCH 3/5] Add the obvious type hints to functions and methods Also fix some probless found by this change --- fcp3/sitemgr.py | 93 +++++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 46 deletions(-) diff --git a/fcp3/sitemgr.py b/fcp3/sitemgr.py index 3d0a296..62c2b86 100644 --- a/fcp3/sitemgr.py +++ b/fcp3/sitemgr.py @@ -35,6 +35,7 @@ import threading import time import traceback +from typing import List import fcp3 as fcp from fcp3 import CRITICAL, ERROR, INFO, DETAIL, DEBUG # , NOISY @@ -76,7 +77,7 @@ class SiteMgr: New nuclear-war-resistant Freesite insertion class """ - def __init__(self, *args, **kw): + def __init__(self, *args, **kw) -> None: """ Creates a new SiteMgr object @@ -118,7 +119,7 @@ def __init__(self, *args, **kw): self.load() - def load(self): + def load(self) -> None: """ Loads all site records """ @@ -151,7 +152,7 @@ def load(self): try: # create node, if we can - self.node = fcp.FCPNode(**nodeopts) + self.node: fcp.FCPNode | None = fcp.FCPNode(**nodeopts) if not self.chkCalcNode: self.chkCalcNode = self.node @@ -191,7 +192,7 @@ def load(self): ) self.sites.append(site) - def create(self): + def create(self) -> None: """ Creates a sites config """ @@ -207,7 +208,7 @@ def create(self): self.save() - def save(self): + def save(self) -> None: # now write out some boilerplate f = open(self.conffile, "w") @@ -231,7 +232,7 @@ def save(self): for site in self.sites: site.save() - def addSite(self, **kw): + def addSite(self, **kw) -> 'SiteState': """ adds a new site @@ -262,7 +263,7 @@ def addSite(self, **kw): return site - def hasSite(self, name): + def hasSite(self, name: str) -> bool: """ Returns True if site 'name' already exists """ @@ -272,7 +273,7 @@ def hasSite(self, name): except Exception: return False - def getSite(self, name): + def getSite(self, name: str) -> 'SiteState': """ Returns a ref to the SiteState object for site 'name', or raises an exception if it doesn't exist @@ -282,13 +283,13 @@ def getSite(self, name): except Exception: raise Exception("No such site '%s'" % name) - def getSiteNames(self): + def getSiteNames(self) -> List[str]: """ Returns a list of names of known sites """ return [site.name for site in self.sites] - def removeSite(self, name): + def removeSite(self, name: str) -> None: """ Removes given site """ @@ -296,14 +297,14 @@ def removeSite(self, name): self.sites.remove(site) os.unlink(site.path) - def cancelUpdate(self, name): + def cancelUpdate(self, name: str) -> None: """ Removes given site """ site = self.getSite(name) site.cancelUpdate() - def insert(self, *sites, **kw): + def insert(self, *sites, **kw) -> None: """ Inserts either named site, or all sites if no name given """ @@ -312,11 +313,11 @@ def insert(self, *sites, **kw): self.securityCheck() if sites: - sites = [self.getSite(name) for name in sites] + sites2 = [self.getSite(name) for name in sites] else: - sites = self.sites + sites2 = self.sites - for site in sites: + for site in sites2: if cron: print("--------------------------------------" + "-------------------------------") @@ -324,7 +325,7 @@ def insert(self, *sites, **kw): site.name, time.asctime())) site.insert() - def reinsert(self, *sites, **kw): + def reinsert(self, *sites, **kw) -> None: """ Mark sites for reinsert: set all external files as needsupload """ @@ -333,11 +334,11 @@ def reinsert(self, *sites, **kw): self.securityCheck() if sites: - sites = [self.getSite(name) for name in sites] + sites2 = [self.getSite(name) for name in sites] else: - sites = self.sites + sites2 = self.sites - for site in sites: + for site in sites2: if cron: print("--------------------------------------" + "-------------------------------") @@ -346,19 +347,19 @@ def reinsert(self, *sites, **kw): site.mark_for_reinsert() site.insert() - def cleanup(self, *sites, **kw): + def cleanup(self, *sites, **kw) -> None: """ Cleans up node queue in respect of completed inserts for given sites """ if sites: - sites = [self.getSite(name) for name in sites] + sites2 = [self.getSite(name) for name in sites] else: - sites = self.sites + sites2 = self.sites - for site in sites: + for site in sites2: site.cleanup() - def securityCheck(self): + def securityCheck(self) -> None: # a nice little tangent for the entertainment of those who # never bother to read the source code @@ -417,7 +418,7 @@ def wln(delay, s): print() time.sleep(0.5) - def fallbackLogger(self, level, msg): + def fallbackLogger(self, level: int, msg: str) -> None: """ This logger is used if no node FCP port is available """ @@ -507,7 +508,7 @@ def __init__(self, **kw): # raise Exception("Site %s, directory %s, no %s present" % ( # self.name, self.dir, self.index)) - def load(self): + def load(self) -> None: """ Attempt to load a freesite """ @@ -584,7 +585,7 @@ def load(self): finally: self.fileLock.release() - def create(self): + def create(self) -> None: """ Creates initial site config """ @@ -603,7 +604,7 @@ def create(self): # now can save self.save() - def mark_for_reinsert(self): + def mark_for_reinsert(self) -> None: """ mark all files as changed """ @@ -612,7 +613,7 @@ def mark_for_reinsert(self): self.needToUpdate = True self.save() - def save(self): + def save(self) -> None: """ Saves the node state """ @@ -709,7 +710,7 @@ def getFile(self, name): return f return None - def cancelUpdate(self): + def cancelUpdate(self) -> None: """ Cancels an insert that was happening """ @@ -727,7 +728,7 @@ def cancelUpdate(self): self.log(INFO, "cancel:%s:update cancelled" % self.name) - def insert(self): + def insert(self) -> None: """ Performs insertion of this site, or gets as far as we can, saving along the way so we can later resume @@ -962,7 +963,7 @@ def insert(self): self.save() - def cleanup(self): + def cleanup(self) -> None: """ Cleans up node queue in respect of currently-inserting freesite, removing completed queue items and updating our local records @@ -976,7 +977,7 @@ def cleanup(self): else: self.clearNodeQueue() - def managePendingInsert(self): + def managePendingInsert(self) -> None: """ Check on the status of the currently running insert """ @@ -1086,7 +1087,7 @@ def updateEdition(uri, ed): self.save() - def scan(self): + def scan(self) -> None: """ Scans all files in the site's filesystem directory, marking the ones which need updating or new inserting @@ -1192,7 +1193,7 @@ def scan(self): else: self.log(INFO, "scan: site %s has not changed" % self.name) - def clearNodeQueue(self): + def clearNodeQueue(self) -> None: """ remove all node queue records relating to this site """ @@ -1221,11 +1222,11 @@ def readNodeQueue(self): jobs[name] = job return jobs - def createIndexAndSitemapIfNeeded(self): + def createIndexAndSitemapIfNeeded(self) -> None: """ generate and insert an index.html if none exists """ - def genindexuri(): + def genindexuri() -> None: # dumb hack - calculate uri if missing if not self.indexRec.get('uri', None): self.indexRec['uri'] = self.chkCalcNode.genchk( @@ -1239,7 +1240,7 @@ def genindexuri(): self.insertingIndex = True self.save() - def gensitemapuri(): + def gensitemapuri() -> None: # dumb hack - calculate uri if missing if not self.sitemapRec.get('uri', None): self.sitemapRec['uri'] = self.chkCalcNode.genchk( @@ -1249,7 +1250,7 @@ def gensitemapuri(): # yes, remember its uri for the manifest self.sitemapUri = self.sitemapRec['uri'] - def createindex(): + def createindex() -> None: # create an index.html with a directory listing title = "Freesite %s directory listing" % self.name, indexlines = [ @@ -1299,7 +1300,7 @@ def createindex(): raise # needs no URI: is always in manifest. - def createsitemap(): + def createsitemap() -> None: # create a sitemap.html with a directory listing title = "Sitemap for %s" % self.name, lines = [ @@ -1395,7 +1396,7 @@ def createsitemap(): # register the sitemap for upload. self.files.append(self.sitemapRec) - def allocId(self, name): + def allocId(self, name: str) -> str: """ Allocates a unique ID for a given file """ @@ -1710,7 +1711,7 @@ def fileMsgLines(n, rec): "This is a bug, please report it to " + "the pyFreenet maintainer.") - def fallbackLogger(self, level, msg): + def fallbackLogger(self, level: int, msg: str) -> None: """ This logger is used if no node FCP port is available """ @@ -1720,14 +1721,14 @@ def fallbackLogger(self, level, msg): # utility funcs -def getFileSize(filepath): +def getFileSize(filepath: str) -> int: """ Get the size of the file in bytes. """ return os.stat(filepath)[stat.ST_SIZE] -def fixUri(uri, name, version=0): +def fixUri(uri: str, name: str, version: int = 0) -> str: """ Conditions a URI to be suitable for freesitemgr """ @@ -1746,14 +1747,14 @@ def fixUri(uri, name, version=0): return uri -def ChkTargetFilename(name): +def ChkTargetFilename(name: str) -> str: """ Make the name suitable for a ChkTargetFilename """ return os.path.basename(name) -def runTest(): +def runTest() -> None: mgr = SiteMgr(verbosity=DEBUG) mgr.insert() From 29ba583dfbaed30c9a2fb2a688d1f0e8c0ba68cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Debora=20W=C3=B6pcke?= Date: Mon, 13 Jan 2025 12:34:03 +0100 Subject: [PATCH 4/5] Add a few more obvious type annotations --- fcp3/sitemgr.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/fcp3/sitemgr.py b/fcp3/sitemgr.py index 62c2b86..d338d4b 100644 --- a/fcp3/sitemgr.py +++ b/fcp3/sitemgr.py @@ -117,6 +117,8 @@ def __init__(self, *args, **kw) -> None: # To decide whether to upload index and activelink as part of # the manifest, we need to remember their record. + self.sites: List['SiteState'] + self.load() def load(self) -> None: @@ -347,7 +349,7 @@ def reinsert(self, *sites, **kw) -> None: site.mark_for_reinsert() site.insert() - def cleanup(self, *sites, **kw) -> None: + def cleanup(self, *sites: str, **kw) -> None: """ Cleans up node queue in respect of completed inserts for given sites """ @@ -366,12 +368,12 @@ def securityCheck(self) -> None: now = time.localtime() - def w(delay, s): + def w(delay: float, s: str) -> None: time.sleep(delay) sys.stdout.write(s) sys.stdout.flush() - def wln(delay, s): + def wln(delay: float, s: str) -> None: w(delay, s) print() @@ -635,7 +637,7 @@ def save(self) -> None: w = f.write - def writeVars(comment="", tail="", **kw): + def writeVars(comment: str = "", tail: str = "", **kw): """ Pretty-print a 'name=value' line, with optional tail string """ @@ -701,7 +703,7 @@ def writeVars(comment="", tail="", **kw): finally: self.fileLock.release() - def getFile(self, name): + def getFile(self, name: str): """ returns the control record for file 'name' """ @@ -1035,7 +1037,7 @@ def managePendingInsert(self) -> None: # uplift the new URI, extract the edition number, # update our record - def updateEdition(uri, ed): + def updateEdition(uri: str, ed: str) -> str: return "/".join(uri.split("/")[:2] + [ed]) manifestUri = job.result edition = manifestUri.split("/")[-1] From 6672682e9366325ba9e79573c6daa8ae47f6c6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Debora=20W=C3=B6pcke?= Date: Mon, 13 Jan 2025 14:25:52 +0100 Subject: [PATCH 5/5] Add the File type and some more methods to fulfil the type annotation. This created two new warnings for the case where the code assumes that rec['name'] is bytes and not str. I didn't address those since I want this to be a low-risk change. --- fcp3/sitemgr.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/fcp3/sitemgr.py b/fcp3/sitemgr.py index d338d4b..9c0a144 100644 --- a/fcp3/sitemgr.py +++ b/fcp3/sitemgr.py @@ -35,7 +35,7 @@ import threading import time import traceback -from typing import List +from typing import List, Dict, TypedDict import fcp3 as fcp from fcp3 import CRITICAL, ERROR, INFO, DETAIL, DEBUG # , NOISY @@ -427,6 +427,20 @@ def fallbackLogger(self, level: int, msg: str) -> None: print(msg) +class File(TypedDict, total=False): + mimetype: str + hash: str + name: str + uri: str + sizebytes: int + state: str + path: str + dda: bool + id: str | None + target: str + chkname: str + + class SiteState: """ Stores the current state of a single freesite's insertion, in a way @@ -479,7 +493,8 @@ def __init__(self, **kw): self.uriPub = kw.get('uriPub', '') self.uriPriv = kw.get('uriPriv', '') self.updateInProgress = True - self.files = [] + self.files: List[File] = [] + self.filesDict: Dict[str, File] self.maxConcurrent = kw.get('maxconcurrent', defaultMaxConcurrent) self.priority = kw.get('priority', defaultPriority) self.basedir = kw.get('basedir', defaultBaseDir) @@ -703,7 +718,7 @@ def writeVars(comment: str = "", tail: str = "", **kw): finally: self.fileLock.release() - def getFile(self, name: str): + def getFile(self, name: str) -> File | None: """ returns the control record for file 'name' """ @@ -1113,7 +1128,7 @@ def scan(self) -> None: physFiles = [] physDict = {} for f in lst: - rec = {} + rec: File = {} try: enc = "utf-8" f['fullpath'].decode(enc) @@ -1404,7 +1419,7 @@ def allocId(self, name: str) -> str: """ return "freesitemgr|%s|%s" % (self.name, name) - def markManifestFiles(self): + def markManifestFiles(self) -> None: """ Selects the files which should directly be put in the manifest and marks them with rec['target'] = 'manifest'. All other files @@ -1497,7 +1512,7 @@ def markManifestFiles(self): if rec['name'] in fileNamesInIndex and rec['name'].lower().endswith('.css')]) fileNamesInManifest = set() - recByIndexAndSize = [] + recByIndexAndSize: List[File] = [] recByIndexAndSize.extend(rec for rec in recBySize if rec['name'] in fileNamesInIndexCSS) recByIndexAndSize.extend(rec for rec in recBySize @@ -1541,7 +1556,7 @@ def markManifestFiles(self): rec['target'] = 'manifest' totalsize += rec['sizebytes'] - def makeManifest(self): + def makeManifest(self) -> None: """ Create a site manifest insertion command buffer from our current inventory @@ -1564,10 +1579,10 @@ def makeManifest(self): # add each file's entry to the command buffer n = 0 # cache DDA requests to avoid stalling for ages on big sites - hasDDAtested = {} + hasDDAtested: Dict[str, bool] = {} datatoappend = [] - def fileMsgLines(n, rec): + def fileMsgLines(n: int, rec: File) -> List[str]: if (rec.get('target', 'separate') == 'separate' and rec.get('uri', None)): return [