From 2351904867eea98db823f56d50ae70e84854bc00 Mon Sep 17 00:00:00 2001
From: Christian Panse <cp@fgcz.ethz.ch>
Date: Thu, 25 Jan 2018 16:37:55 +0100
Subject: [PATCH] cosmetics on hostname and some minor items

---
 fgcz/fcc.py                          | 39 +++++++++++++++++++++++-----
 fgcz/scripts/fgcz_fcc_run_windows.py |  8 +++---
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/fgcz/fcc.py b/fgcz/fcc.py
index 4d5e972..e95ea12 100644
--- a/fgcz/fcc.py
+++ b/fgcz/fcc.py
@@ -100,7 +100,7 @@
 import hashlib
 
 
-def create_logger(name="fcc", address=("130.60.81.148", 514)):
+def create_logger(name="fcc", address=("fgcz-ms.uzh.ch", 514)):
     """
     create a logger object
     """
@@ -253,7 +253,7 @@ def parseConfig(xml):
         rule = dict()
         try:
             converter = converterDict[i.attributes['converterID'].value]
-            for a in ("converterDir", "converterCmd", "converterOptions", "toFileExt", "fromFileExt", "hostname"):
+            for a in ("converterID", "converterDir", "converterCmd", "converterOptions", "toFileExt", "fromFileExt", "hostname"):
                 rule[a] = converter[a]
 
             # hard constraints
@@ -358,7 +358,7 @@ def usage():
 class Fcc:
     """
     """
-    parameters = {'config_url': "http://fgcz-s-021.uzh.ch/config/fcc_config.xml", 'readme_url': "http://fgcz-s-021.uzh.ch/config/fcc_readme.txt",
+    parameters = {'config_url': "http://fgcz-ms.uzh.ch/config/fcc_config.xml", 'readme_url': "http://fgcz-r-021.uzh.ch/config/fcc_readme.txt",
                  'crawl_pattern': ['/srv/www/htdocs/Data2San/',
                         'p[0-9]{2,4}', 'Metabolomics',
                         '(GCT)_[0-9]',
@@ -451,13 +451,34 @@ def process(self, file):
                 """
                 create the directory in the python way,
                 """
-                if not os.path.exists(converterDir) and self.parameters['exec']:
+                # if not os.path.exists(converterDir) and self.parameters['exec']:
+                if not os.path.exists(converterDir):
                     try:
                         os.mkdir(converterDir)
                     except:
                         logger.error(
                             "mkdir {0} failed.".format(converterDir))
-                        sys.exit(1)
+                        raise
+
+                readme_filename = os.path.normpath("{0}/README.txt".format(converterDir))
+                readme_content = """
+the files contained in this directory have been generated using fcc applying rule #{0}.
+
+more information can be found using the following url:
+
+http://fgcz-data.uzh.ch/config/fcc_config.xml#converterID-{0}
+
+or by contacting Christian Panse <cp@fgcz.ethz.ch>
+
+if you use these files in your publication please cite:
+http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3614436/
+
+
+""".format(mrule["converterID"])
+
+                if os.path.isfile(readme_filename) is False:
+                    with open(readme_filename, "w") as readmef:
+                        readmef.write(readme_content)
 
                 toFileName = os.path.normpath(
                     "{0}/{1}{2}".format(converterDir,
@@ -479,18 +500,24 @@ def process(self, file):
                     candCmdLineMD5 = checksum.hexdigest()
 
                     if not candCmdLineMD5 in self.processedCmdMD5Dict:
+
                         self.processedCmdMD5Dict[candCmdLineMD5] = candCmdLine
                         if self.parameters['exec']:
                             self.pool.map_async(myExecWorker0, [ candCmdLine ],
                                 callback=lambda i: logger.info("callback {0}".format(i)))
                             logger.info("added|cmd='{}' to pool".format(candCmdLine))
-
+                    else:
+                        # TODO(cp): make this generic working
+                        with open("C:\\FGCZ\\fcc\Cmds-problems.txt", "a") as cmdf:
+                            cmdf.write("{0}\t{1}\n".format(candCmdLineMD5, candCmdLine))
 
     def run(self):
         """
 
         :return:
         """
+
+
         crawler = FgczCrawl(pattern=self.parameters['crawl_pattern'], max_time_diff=self.parameters['max_time_diff'])
 
         if not os.path.exists(os.path.normpath(self.parameters['crawl_pattern'][0])):
diff --git a/fgcz/scripts/fgcz_fcc_run_windows.py b/fgcz/scripts/fgcz_fcc_run_windows.py
index 785fa5e..8db1a2e 100755
--- a/fgcz/scripts/fgcz_fcc_run_windows.py
+++ b/fgcz/scripts/fgcz_fcc_run_windows.py
@@ -1,7 +1,7 @@
 
 import getopt
 import sys
-from fgcz import fcc
+import fcc
 
 if __name__ == "__main__":
     try:
@@ -36,10 +36,10 @@
 
     crawl_pattern = ['S:', 'p[0-9]+',
                      'Proteomics',
-                     '(EXTERNAL|FUSION|G2HD|GCT|ORBI|QEXACTIVE|QEXACTIVEHF|QTOF|QTRAP|T100|TOFTOF|TRIPLETOF|TSQ|VELOS)_[0-9]',
+                     '(EXTERNAL|FUSION|G2HD|GCT|ORBI|QEXACTIVE|QEXACTIVEHF|QEXACTIVEHFX|QTOF|QTRAP|T100|TOFTOF|TRIPLETOF|TSQ|VELOS)_[0-9]',
                      '[a-z]{3,18}_[0-9]{8}(_[-a-zA-Z0-9_]+){0,1}',
-                     '[-a-zA-Z0-9_]+.(RAW|raw)$']
+                     '[-a-zA-Z0-9_]+.(raw)$']
 
     fcc.set_para('crawl_pattern', crawl_pattern)
-    fcc.set_para('max_time_diff', 60 * 60 * 24 * 7 * 10) 
+    fcc.set_para('max_time_diff', 60 * 60 * 24 * 7 * 2) 
     fcc.run()