diff --git a/scripts/README.md b/scripts/README.md index a44656d51f..e9538def88 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -43,7 +43,9 @@ Two commands exist for downloading logs. |-l, --logtype|Glob matcher for type of log file to download| None (match all)| |-S, --skip-s3|Don't search/download s3 logs|false| |-L, --skip-live|Don't search/download live logs|false| -|-V, --verbose|More verbose output|| +|-U, --use-cache|Don't redownload live logs, prefer the cached version|false| +|--search|Run logsearch on the cache of local files (no downloading)|false| +|-V, --verbose|More verbose output|false| ##Grep and Log Files When the `-g` option is set, the log fetcher will grep the downloaded files for the provided regex. @@ -109,3 +111,30 @@ You can also provide the `-g` option which will provide the grep string to the s |-l, --logfile|Log file path to tail (ie logs/access.log)|Must be set!| |-v, --verbose|Extra output about the task id associated with logs in the output|False| +#Logsearch + +An offline version of `logfetch` that will aid in searching through your directory of cached files. The syntax is the same as for `logfetch` with a smaller list of options, shown below: + +##Options +|Flags|Description|Default| +|:---:|:---------|:-----:| +|-f , --conf-folder|Folder to look for configuration files|`~/.logfetch`| +|-c , --conf-file|configuration file to use(path relative to conf_folder)|default| +|-t , --task-id|Task Id to fetch logs for|| +|-r , --request-id|Request Id to fetch logs for|| +|-d , --deploy-id|Deploy Id to fetch logs for (Must also specify requestId when using this option)| +|-o, --dest|Cache folder to search|`~/.logfetch_cache`| +|-s , --start|Search for logs no older than this, can be an integer number of days or date in format “%Y-%m-%d %H:%M:%S” or “%Y-%m-%d”, leaving off h-m-s will be inclusive for the current day (00:00:00) | 7 days ago| +|-e , --end|Search for logs no newer than this, can be an integer number of days or date in format “%Y-%m-%d %H:%M:%S” or “%Y-%m-%d”, leaving off h-m-s will be inclusive for the current day (23:59:59)| None (now)| +|-z , --local-zone|Specify times for `-s` and `-e` in your local time zone. If this is not set, times are assumed to be in UTC|unset/false| +|-p, --file-pattern|Should match the executor.s3.uploader.pattern setting, determines if we can match on file name for s3 logs|`%requestId/%Y/%m/%taskId_%index-%s-%filename`| +|-g, --grep|Grep string for searching log files(Only for `logfetch`)|| +|-l, --logtype|Glob matcher for type of log file to download| None (match all)| +|-V, --verbose|More verbose output|| + +example: + +- grep in logs matching `*.out` logs from request `My_Request_Id` + +`logfetch -r ‘My_Request_Id’ -l ‘*.out’ -g 'Regex_here'` + diff --git a/scripts/logfetch/entrypoint.py b/scripts/logfetch/entrypoint.py index c83def3c03..c5e3efd94d 100644 --- a/scripts/logfetch/entrypoint.py +++ b/scripts/logfetch/entrypoint.py @@ -9,6 +9,7 @@ from fake_section_head import FakeSectionHead from live_logs import download_live_logs from s3_logs import download_s3_logs +from search import find_cached_logs from tail import start_tail from grep import grep_files from cat import cat_files @@ -34,6 +35,13 @@ def tail_logs(args): except KeyboardInterrupt: exit('Stopping logtail...', 'magenta') +def search_logs(args): + try: + all_logs = find_cached_logs(args) + grep_files(args, all_logs) + except KeyboardInterrupt: + exit('Stopping logfetch...', 'magenta') + def fetch_logs(args): try: check_dest(args) @@ -119,7 +127,7 @@ def fetch(): parser.set_defaults(**defaults) parser.add_argument("-t", "--task-id", dest="taskId", help="TaskId of task to fetch logs for") parser.add_argument("-r", "--request-id", dest="requestId", help="RequestId of request to fetch logs for (can be a glob)") - parser.add_argument("-T","--task-count", dest="task_count", help="Number of recent tasks per request to fetch logs from", type=int) + parser.add_argument("-T", "--task-count", dest="task_count", help="Number of recent tasks per request to fetch logs from", type=int) parser.add_argument("-d", "--deploy-id", dest="deployId", help="DeployId of task to fetch logs for (can be a glob)") parser.add_argument("-o", "--dest", dest="dest", help="Destination directory") parser.add_argument("-n", "--num-parallel-fetches", dest="num_parallel_fetches", help="Number of fetches to make at once", type=int) @@ -134,6 +142,8 @@ def fetch(): parser.add_argument("-z", "--local-zone", dest="zone", help="If specified, input times in the local time zone and convert to UTC, if not specified inputs are assumed to be UTC", action="store_true") parser.add_argument("-S", "--skip-s3", dest="skip_s3", help="Don't download/search s3 logs", action='store_true') parser.add_argument("-L", "--skip-live", dest="skip_live", help="Don't download/search live logs", action='store_true') + parser.add_argument("-U", "--use-cache", dest="use_cache", help="Use cache for live logs, don't re-download them", action='store_true') + parser.add_argument("--search", dest="search", help="run logsearch on the local cache of downloaded files", action='store_true') parser.add_argument("-V", "--verbose", dest="verbose", help="Print more verbose output", action='store_true') args = parser.parse_args(remaining_argv) @@ -149,7 +159,62 @@ def fetch(): sys.stderr.write('No additional request headers found\n') setattr(args, 'headers', {}) - fetch_logs(args) + if args.search: + search_logs(args) + else: + fetch_logs(args) + +def search(): + conf_parser = argparse.ArgumentParser(version=VERSION, description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False) + conf_parser.add_argument("-f", "--conf-folder", dest='conf_folder', help="specify a folder for config files to live") + conf_parser.add_argument("-c", "--conf-file", dest='conf_file', help="Specify config file within the conf folder", metavar="FILE") + args, remaining_argv = conf_parser.parse_known_args() + conf_dir = args.conf_folder if args.conf_folder else DEFAULT_CONF_DIR + conf_file = os.path.expanduser(conf_dir + '/' + args.conf_file) if args.conf_file else os.path.expanduser(conf_dir + '/' + DEFAULT_CONF_FILE) + config = ConfigParser.SafeConfigParser() + config.optionxform = str + + defaults = { + "dest" : DEFAULT_DEST, + "start" : datetime.strptime('{0} 00:00:00'.format(datetime.now().strftime("%Y-%m-%d")), "%Y-%m-%d %H:%M:%S") - timedelta(days=DEFAULT_DAYS), + "file_pattern" : DEFAULT_S3_PATTERN, + "end" : datetime.strptime('{0} 23:59:59'.format(datetime.now().strftime("%Y-%m-%d")), "%Y-%m-%d %H:%M:%S") + } + + try: + config.readfp(FakeSectionHead(open(os.path.expanduser(conf_file)))) + defaults.update(dict(config.items("Defaults"))) + except Exception, err: + sys.stderr.write(CONF_READ_ERR_FORMAT.format(conf_file, err) + '\n') + + parser = argparse.ArgumentParser(parents=[conf_parser], description="Search log files in the cache directory", + prog="logsearch") + + parser.set_defaults(**defaults) + parser.add_argument("-t", "--task-id", dest="taskId", help="TaskId of task to fetch logs for") + parser.add_argument("-r", "--request-id", dest="requestId", help="RequestId of request to fetch logs for (can be a glob)") + parser.add_argument("-d", "--deploy-id", dest="deployId", help="DeployId of task to fetch logs for (can be a glob)") + parser.add_argument("-o", "--dest", dest="dest", help="Destination directory") + parser.add_argument("-s", "--start", dest="start", help="Search for logs no older than this, can be an integer number of days or date in format '%%Y-%%m-%%d %%H:%%M:%%S' or '%%Y-%%m-%%d'") + parser.add_argument("-e", "--end", dest="end", help="Search for logs no newer than this, can be an integer number of days or date in format '%%Y-%%m-%%d %%H:%%M:%%S' or '%%Y-%%m-%%d' (defaults to None/now)") + parser.add_argument("-l", "--log-type", dest="logtype", help="Logfile type to downlaod (ie 'access.log'), can be a glob (ie *.log)") + parser.add_argument("-p", "--file-pattern", dest="file_pattern", help="S3 uploader file pattern") + parser.add_argument("-g", "--grep", dest="grep", help="Regex to grep for (normal grep syntax) or a full grep command") + parser.add_argument("-z", "--local-zone", dest="zone", help="If specified, input times in the local time zone and convert to UTC, if not specified inputs are assumed to be UTC", action="store_true") + parser.add_argument("-V", "--verbose", dest="verbose", help="Print more verbose output", action='store_true') + + args, unknown = parser.parse_known_args(remaining_argv) + + if args.verbose and unknown: + sys.stderr.write(colored('Found unknown args {0}'.format(unknown), 'magenta')) + + check_args(args) + args.start = convert_to_date(args, args.start) + args.end = convert_to_date(args, args.end) + + args.dest = os.path.expanduser(args.dest) + + search_logs(args) def cat(): conf_parser = argparse.ArgumentParser(version=VERSION, description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False) @@ -183,7 +248,7 @@ def cat(): parser.set_defaults(**defaults) parser.add_argument("-t", "--task-id", dest="taskId", help="TaskId of task to fetch logs for") parser.add_argument("-r", "--request-id", dest="requestId", help="RequestId of request to fetch logs for (can be a glob)") - parser.add_argument("-T","--task-count", dest="taskCount", help="Number of recent tasks per request to fetch logs from", type=int) + parser.add_argument("-T", "--task-count", dest="taskCount", help="Number of recent tasks per request to fetch logs from", type=int) parser.add_argument("-d", "--deploy-id", dest="deployId", help="DeployId of tasks to fetch logs for (can be a glob)") parser.add_argument("-o", "--dest", dest="dest", help="Destination directory") parser.add_argument("-n", "--num-parallel-fetches", dest="num_parallel_fetches", help="Number of fetches to make at once", type=int) @@ -197,6 +262,7 @@ def cat(): parser.add_argument("-z", "--local-zone", dest="zone", help="If specified, input times in the local time zone and convert to UTC, if not specified inputs are assumed to be UTC", action="store_true") parser.add_argument("-S", "--skip-s3", dest="skip_s3", help="Don't download/search s3 logs", action='store_true') parser.add_argument("-L", "--skip-live", dest="skip_live", help="Don't download/search live logs", action='store_true') + parser.add_argument("-U", "--use-cache", dest="use_cache", help="Use cache for live logs, don't re-download them", action='store_true') parser.add_argument("-V", "--verbose", dest="verbose", help="Print more verbose output", action='store_true') args = parser.parse_args(remaining_argv) diff --git a/scripts/logfetch/live_logs.py b/scripts/logfetch/live_logs.py index 857744e178..25d88d5dd0 100644 --- a/scripts/logfetch/live_logs.py +++ b/scripts/logfetch/live_logs.py @@ -1,3 +1,4 @@ +import os import sys import fnmatch import grequests @@ -8,6 +9,7 @@ DOWNLOAD_FILE_FORMAT = 'http://{0}:5051/files/download.json' BROWSE_FOLDER_FORMAT = '{0}/sandbox/{1}/browse' +TASK_HISTORY_FORMAT = '{0}/history/task/{1}' def download_live_logs(args): tasks = tasks_to_check(args) @@ -24,13 +26,14 @@ def download_live_logs(args): for log_file in base_directory_files(args, task, metadata): logfile_name = '{0}-{1}'.format(task, log_file) if not args.logtype or (args.logtype and logfetch_base.log_matches(log_file, args.logtype.replace('logs/', ''))): - async_requests.append( - grequests.AsyncRequest('GET',uri , - callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose), - params={'path' : '{0}/{1}/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)}, - headers=args.headers + if should_download(args, logfile_name, task): + async_requests.append( + grequests.AsyncRequest('GET',uri , + callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose), + params={'path' : '{0}/{1}/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)}, + headers=args.headers + ) ) - ) if logfile_name.endswith('.gz'): zipped_files.append('{0}/{1}'.format(args.dest, logfile_name)) else: @@ -43,13 +46,14 @@ def download_live_logs(args): for log_file in logs_folder_files(args, task): logfile_name = '{0}-{1}'.format(task, log_file) if not args.logtype or (args.logtype and logfetch_base.log_matches(log_file, args.logtype.replace('logs/', ''))): - async_requests.append( - grequests.AsyncRequest('GET',uri , - callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose), - params={'path' : '{0}/{1}/logs/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)}, - headers=args.headers + if should_download(args, logfile_name, task): + async_requests.append( + grequests.AsyncRequest('GET',uri , + callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose), + params={'path' : '{0}/{1}/logs/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)}, + headers=args.headers + ) ) - ) if logfile_name.endswith('.gz'): zipped_files.append('{0}/{1}'.format(args.dest, logfile_name)) else: @@ -71,6 +75,17 @@ def tasks_to_check(args): else: return logfetch_base.tasks_for_requests(args) +def task_history(args, task): + uri = TASK_HISTORY_FORMAT.format(logfetch_base.base_uri(args), task) + return get_json_response(uri, args) + +def task_still_running(args, task, history): + try: + last_state = history['taskUpdates'][-1]['taskState'] + return last_state in ['TASK_RUNNING', 'TASK_STARTING', 'TASK_LAUNCHED', 'TASK_CLEANING'] + except: + return True + def files_json(args, task): uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task) return get_json_response(uri, args) @@ -97,3 +112,31 @@ def valid_logfile(args, fileData): is_a_logfile = fnmatch.fnmatch(fileData['name'], '*.log') or fnmatch.fnmatch(fileData['name'], '*.out') or fnmatch.fnmatch(fileData['name'], '*.err') return is_in_range and not_a_directory and is_a_logfile +def should_download(args, filename, task): + if args.use_cache and already_downloaded(args, filename): + if args.verbose: + sys.stderr.write(colored('Using cached version of file {0}\n'.format(filename), 'magenta')) + return False + if filename.endswith('.gz') and already_downloaded(args, filename): + if args.verbose: + sys.stderr.write(colored('Using cached version of file {0}, zipped file has not changed\n'.format(filename), 'magenta')) + return False + history = task_history(args, task) + if not task_still_running(args, task, history) and already_downloaded(args, filename) and file_not_too_old(args, history, filename): + if args.verbose: + sys.stderr.write(colored('Using cached version of file {0}, {1}, file has not changed\n'.format(filename, history['taskUpdates'][-1]['taskState']), 'magenta')) + else: + if args.verbose: + sys.stderr.write(colored('Will download file {0}, version on the server is newer than cached version\n'.format(filename), 'magenta')) + + return True + +def file_not_too_old(args, history, filename): + state_updated_at = int(str(history['taskUpdates'][-1]['timestamp'])[0:-3]) + return int(os.path.getmtime('{0}/{1}'.format(args.dest, filename))) > state_updated_at + +def already_downloaded(args, filename): + have_file = (os.path.isfile('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log'))) or os.path.isfile('{0}/{1}'.format(args.dest, filename))) + return have_file + + diff --git a/scripts/logfetch/search.py b/scripts/logfetch/search.py new file mode 100644 index 0000000000..e4ebf93a73 --- /dev/null +++ b/scripts/logfetch/search.py @@ -0,0 +1,44 @@ +import os +import re +import sys +import fnmatch +import logfetch_base +from termcolor import colored + +def find_cached_logs(args): + matching_logs = [] + log_fn_match = get_matcher(args) + for filename in os.listdir(args.dest): + if fnmatch.fnmatch(filename, log_fn_match) and in_date_range(args, filename): + if args.verbose: + sys.stderr.write(colored('Including log {0}\n'.format(filename), 'magenta')) + matching_logs.append('{0}/{1}'.format(args.dest, filename)) + else: + if args.verbose: + sys.stderr.write(colored('Excluding log {0}, not in date range\n'.format(filename), 'magenta')) + return matching_logs + + +def in_date_range(args, filename): + timestamps = re.findall(r"\d{13}", filename) + if timestamps: + return logfetch_base.is_in_date_range(args, int(str(timestamps[-1])[0:-3])) + else: + return True + +def get_matcher(args): + if args.taskId: + if 'filename' in args.file_pattern and args.logtype: + return '{0}*{1}*'.format(args.taskId, args.logtype) + else: + return '{0}*'.format(args.taskId) + elif args.deployId and args.requestId: + if 'filename' in args.file_pattern and args.logtype: + return '{0}-{1}*{2}*'.format(args.requestId, args.deployId, args.logtype) + else: + return '{0}-{1}*'.format(args.requestId, args.deployId) + else: + if 'filename' in args.file_pattern and args.logtype: + return '{0}*{1}*'.format(args.requestId, args.logtype) + else: + return '{0}*'.format(args.requestId) \ No newline at end of file diff --git a/scripts/setup.py b/scripts/setup.py index 7b4c9a0e32..29c6ba63d1 100644 --- a/scripts/setup.py +++ b/scripts/setup.py @@ -11,7 +11,7 @@ setup( name='singularity-logfetch', - version='0.19.0', + version='0.20.0', description='Singularity log fetching and searching', author="HubSpot", author_email='singularity-users@googlegroups.com', @@ -24,7 +24,8 @@ 'console_scripts':[ 'logfetch=logfetch.entrypoint:fetch', 'logtail=logfetch.entrypoint:tail', - 'logcat=logfetch.entrypoint:cat' + 'logcat=logfetch.entrypoint:cat', + 'logsearch=logfetch.entrypoint:search' ], } )