Skip to content

Commit

Permalink
Merge pull request #667 from HubSpot/logfetch_cache
Browse files Browse the repository at this point in the history
add logsearch and use-cache option for live logs
  • Loading branch information
ssalinas committed Aug 25, 2015
2 parents 6bd8546 + 7e8c857 commit d5dd777
Show file tree
Hide file tree
Showing 5 changed files with 201 additions and 18 deletions.
31 changes: 30 additions & 1 deletion scripts/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ Two commands exist for downloading logs.
|-l, --logtype|Glob matcher for type of log file to download| None (match all)|
|-S, --skip-s3|Don't search/download s3 logs|false|
|-L, --skip-live|Don't search/download live logs|false|
|-V, --verbose|More verbose output||
|-U, --use-cache|Don't redownload live logs, prefer the cached version|false|
|--search|Run logsearch on the cache of local files (no downloading)|false|
|-V, --verbose|More verbose output|false|

##Grep and Log Files
When the `-g` option is set, the log fetcher will grep the downloaded files for the provided regex.
Expand Down Expand Up @@ -109,3 +111,30 @@ You can also provide the `-g` option which will provide the grep string to the s
|-l, --logfile|Log file path to tail (ie logs/access.log)|Must be set!|
|-v, --verbose|Extra output about the task id associated with logs in the output|False|

#Logsearch

An offline version of `logfetch` that will aid in searching through your directory of cached files. The syntax is the same as for `logfetch` with a smaller list of options, shown below:

##Options
|Flags|Description|Default|
|:---:|:---------|:-----:|
|-f , --conf-folder|Folder to look for configuration files|`~/.logfetch`|
|-c , --conf-file|configuration file to use(path relative to conf_folder)|default|
|-t , --task-id|Task Id to fetch logs for||
|-r , --request-id|Request Id to fetch logs for||
|-d , --deploy-id|Deploy Id to fetch logs for (Must also specify requestId when using this option)|
|-o, --dest|Cache folder to search|`~/.logfetch_cache`|
|-s , --start|Search for logs no older than this, can be an integer number of days or date in format “%Y-%m-%d %H:%M:%S” or “%Y-%m-%d”, leaving off h-m-s will be inclusive for the current day (00:00:00) | 7 days ago|
|-e , --end|Search for logs no newer than this, can be an integer number of days or date in format “%Y-%m-%d %H:%M:%S” or “%Y-%m-%d”, leaving off h-m-s will be inclusive for the current day (23:59:59)| None (now)|
|-z , --local-zone|Specify times for `-s` and `-e` in your local time zone. If this is not set, times are assumed to be in UTC|unset/false|
|-p, --file-pattern|Should match the executor.s3.uploader.pattern setting, determines if we can match on file name for s3 logs|`%requestId/%Y/%m/%taskId_%index-%s-%filename`|
|-g, --grep|Grep string for searching log files(Only for `logfetch`)||
|-l, --logtype|Glob matcher for type of log file to download| None (match all)|
|-V, --verbose|More verbose output||

example:

- grep in logs matching `*.out` logs from request `My_Request_Id`

`logfetch -r ‘My_Request_Id’ -l ‘*.out’ -g 'Regex_here'`

72 changes: 69 additions & 3 deletions scripts/logfetch/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from fake_section_head import FakeSectionHead
from live_logs import download_live_logs
from s3_logs import download_s3_logs
from search import find_cached_logs
from tail import start_tail
from grep import grep_files
from cat import cat_files
Expand All @@ -34,6 +35,13 @@ def tail_logs(args):
except KeyboardInterrupt:
exit('Stopping logtail...', 'magenta')

def search_logs(args):
try:
all_logs = find_cached_logs(args)
grep_files(args, all_logs)
except KeyboardInterrupt:
exit('Stopping logfetch...', 'magenta')

def fetch_logs(args):
try:
check_dest(args)
Expand Down Expand Up @@ -119,7 +127,7 @@ def fetch():
parser.set_defaults(**defaults)
parser.add_argument("-t", "--task-id", dest="taskId", help="TaskId of task to fetch logs for")
parser.add_argument("-r", "--request-id", dest="requestId", help="RequestId of request to fetch logs for (can be a glob)")
parser.add_argument("-T","--task-count", dest="task_count", help="Number of recent tasks per request to fetch logs from", type=int)
parser.add_argument("-T", "--task-count", dest="task_count", help="Number of recent tasks per request to fetch logs from", type=int)
parser.add_argument("-d", "--deploy-id", dest="deployId", help="DeployId of task to fetch logs for (can be a glob)")
parser.add_argument("-o", "--dest", dest="dest", help="Destination directory")
parser.add_argument("-n", "--num-parallel-fetches", dest="num_parallel_fetches", help="Number of fetches to make at once", type=int)
Expand All @@ -134,6 +142,8 @@ def fetch():
parser.add_argument("-z", "--local-zone", dest="zone", help="If specified, input times in the local time zone and convert to UTC, if not specified inputs are assumed to be UTC", action="store_true")
parser.add_argument("-S", "--skip-s3", dest="skip_s3", help="Don't download/search s3 logs", action='store_true')
parser.add_argument("-L", "--skip-live", dest="skip_live", help="Don't download/search live logs", action='store_true')
parser.add_argument("-U", "--use-cache", dest="use_cache", help="Use cache for live logs, don't re-download them", action='store_true')
parser.add_argument("--search", dest="search", help="run logsearch on the local cache of downloaded files", action='store_true')
parser.add_argument("-V", "--verbose", dest="verbose", help="Print more verbose output", action='store_true')

args = parser.parse_args(remaining_argv)
Expand All @@ -149,7 +159,62 @@ def fetch():
sys.stderr.write('No additional request headers found\n')
setattr(args, 'headers', {})

fetch_logs(args)
if args.search:
search_logs(args)
else:
fetch_logs(args)

def search():
conf_parser = argparse.ArgumentParser(version=VERSION, description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False)
conf_parser.add_argument("-f", "--conf-folder", dest='conf_folder', help="specify a folder for config files to live")
conf_parser.add_argument("-c", "--conf-file", dest='conf_file', help="Specify config file within the conf folder", metavar="FILE")
args, remaining_argv = conf_parser.parse_known_args()
conf_dir = args.conf_folder if args.conf_folder else DEFAULT_CONF_DIR
conf_file = os.path.expanduser(conf_dir + '/' + args.conf_file) if args.conf_file else os.path.expanduser(conf_dir + '/' + DEFAULT_CONF_FILE)
config = ConfigParser.SafeConfigParser()
config.optionxform = str

defaults = {
"dest" : DEFAULT_DEST,
"start" : datetime.strptime('{0} 00:00:00'.format(datetime.now().strftime("%Y-%m-%d")), "%Y-%m-%d %H:%M:%S") - timedelta(days=DEFAULT_DAYS),
"file_pattern" : DEFAULT_S3_PATTERN,
"end" : datetime.strptime('{0} 23:59:59'.format(datetime.now().strftime("%Y-%m-%d")), "%Y-%m-%d %H:%M:%S")
}

try:
config.readfp(FakeSectionHead(open(os.path.expanduser(conf_file))))
defaults.update(dict(config.items("Defaults")))
except Exception, err:
sys.stderr.write(CONF_READ_ERR_FORMAT.format(conf_file, err) + '\n')

parser = argparse.ArgumentParser(parents=[conf_parser], description="Search log files in the cache directory",
prog="logsearch")

parser.set_defaults(**defaults)
parser.add_argument("-t", "--task-id", dest="taskId", help="TaskId of task to fetch logs for")
parser.add_argument("-r", "--request-id", dest="requestId", help="RequestId of request to fetch logs for (can be a glob)")
parser.add_argument("-d", "--deploy-id", dest="deployId", help="DeployId of task to fetch logs for (can be a glob)")
parser.add_argument("-o", "--dest", dest="dest", help="Destination directory")
parser.add_argument("-s", "--start", dest="start", help="Search for logs no older than this, can be an integer number of days or date in format '%%Y-%%m-%%d %%H:%%M:%%S' or '%%Y-%%m-%%d'")
parser.add_argument("-e", "--end", dest="end", help="Search for logs no newer than this, can be an integer number of days or date in format '%%Y-%%m-%%d %%H:%%M:%%S' or '%%Y-%%m-%%d' (defaults to None/now)")
parser.add_argument("-l", "--log-type", dest="logtype", help="Logfile type to downlaod (ie 'access.log'), can be a glob (ie *.log)")
parser.add_argument("-p", "--file-pattern", dest="file_pattern", help="S3 uploader file pattern")
parser.add_argument("-g", "--grep", dest="grep", help="Regex to grep for (normal grep syntax) or a full grep command")
parser.add_argument("-z", "--local-zone", dest="zone", help="If specified, input times in the local time zone and convert to UTC, if not specified inputs are assumed to be UTC", action="store_true")
parser.add_argument("-V", "--verbose", dest="verbose", help="Print more verbose output", action='store_true')

args, unknown = parser.parse_known_args(remaining_argv)

if args.verbose and unknown:
sys.stderr.write(colored('Found unknown args {0}'.format(unknown), 'magenta'))

check_args(args)
args.start = convert_to_date(args, args.start)
args.end = convert_to_date(args, args.end)

args.dest = os.path.expanduser(args.dest)

search_logs(args)

def cat():
conf_parser = argparse.ArgumentParser(version=VERSION, description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, add_help=False)
Expand Down Expand Up @@ -183,7 +248,7 @@ def cat():
parser.set_defaults(**defaults)
parser.add_argument("-t", "--task-id", dest="taskId", help="TaskId of task to fetch logs for")
parser.add_argument("-r", "--request-id", dest="requestId", help="RequestId of request to fetch logs for (can be a glob)")
parser.add_argument("-T","--task-count", dest="taskCount", help="Number of recent tasks per request to fetch logs from", type=int)
parser.add_argument("-T", "--task-count", dest="taskCount", help="Number of recent tasks per request to fetch logs from", type=int)
parser.add_argument("-d", "--deploy-id", dest="deployId", help="DeployId of tasks to fetch logs for (can be a glob)")
parser.add_argument("-o", "--dest", dest="dest", help="Destination directory")
parser.add_argument("-n", "--num-parallel-fetches", dest="num_parallel_fetches", help="Number of fetches to make at once", type=int)
Expand All @@ -197,6 +262,7 @@ def cat():
parser.add_argument("-z", "--local-zone", dest="zone", help="If specified, input times in the local time zone and convert to UTC, if not specified inputs are assumed to be UTC", action="store_true")
parser.add_argument("-S", "--skip-s3", dest="skip_s3", help="Don't download/search s3 logs", action='store_true')
parser.add_argument("-L", "--skip-live", dest="skip_live", help="Don't download/search live logs", action='store_true')
parser.add_argument("-U", "--use-cache", dest="use_cache", help="Use cache for live logs, don't re-download them", action='store_true')
parser.add_argument("-V", "--verbose", dest="verbose", help="Print more verbose output", action='store_true')

args = parser.parse_args(remaining_argv)
Expand Down
67 changes: 55 additions & 12 deletions scripts/logfetch/live_logs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import sys
import fnmatch
import grequests
Expand All @@ -8,6 +9,7 @@

DOWNLOAD_FILE_FORMAT = 'http://{0}:5051/files/download.json'
BROWSE_FOLDER_FORMAT = '{0}/sandbox/{1}/browse'
TASK_HISTORY_FORMAT = '{0}/history/task/{1}'

def download_live_logs(args):
tasks = tasks_to_check(args)
Expand All @@ -24,13 +26,14 @@ def download_live_logs(args):
for log_file in base_directory_files(args, task, metadata):
logfile_name = '{0}-{1}'.format(task, log_file)
if not args.logtype or (args.logtype and logfetch_base.log_matches(log_file, args.logtype.replace('logs/', ''))):
async_requests.append(
grequests.AsyncRequest('GET',uri ,
callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose),
params={'path' : '{0}/{1}/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)},
headers=args.headers
if should_download(args, logfile_name, task):
async_requests.append(
grequests.AsyncRequest('GET',uri ,
callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose),
params={'path' : '{0}/{1}/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)},
headers=args.headers
)
)
)
if logfile_name.endswith('.gz'):
zipped_files.append('{0}/{1}'.format(args.dest, logfile_name))
else:
Expand All @@ -43,13 +46,14 @@ def download_live_logs(args):
for log_file in logs_folder_files(args, task):
logfile_name = '{0}-{1}'.format(task, log_file)
if not args.logtype or (args.logtype and logfetch_base.log_matches(log_file, args.logtype.replace('logs/', ''))):
async_requests.append(
grequests.AsyncRequest('GET',uri ,
callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose),
params={'path' : '{0}/{1}/logs/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)},
headers=args.headers
if should_download(args, logfile_name, task):
async_requests.append(
grequests.AsyncRequest('GET',uri ,
callback=generate_callback(uri, args.dest, logfile_name, args.chunk_size, args.verbose),
params={'path' : '{0}/{1}/logs/{2}'.format(metadata['fullPathToRoot'], metadata['currentDirectory'], log_file)},
headers=args.headers
)
)
)
if logfile_name.endswith('.gz'):
zipped_files.append('{0}/{1}'.format(args.dest, logfile_name))
else:
Expand All @@ -71,6 +75,17 @@ def tasks_to_check(args):
else:
return logfetch_base.tasks_for_requests(args)

def task_history(args, task):
uri = TASK_HISTORY_FORMAT.format(logfetch_base.base_uri(args), task)
return get_json_response(uri, args)

def task_still_running(args, task, history):
try:
last_state = history['taskUpdates'][-1]['taskState']
return last_state in ['TASK_RUNNING', 'TASK_STARTING', 'TASK_LAUNCHED', 'TASK_CLEANING']
except:
return True

def files_json(args, task):
uri = BROWSE_FOLDER_FORMAT.format(logfetch_base.base_uri(args), task)
return get_json_response(uri, args)
Expand All @@ -97,3 +112,31 @@ def valid_logfile(args, fileData):
is_a_logfile = fnmatch.fnmatch(fileData['name'], '*.log') or fnmatch.fnmatch(fileData['name'], '*.out') or fnmatch.fnmatch(fileData['name'], '*.err')
return is_in_range and not_a_directory and is_a_logfile

def should_download(args, filename, task):
if args.use_cache and already_downloaded(args, filename):
if args.verbose:
sys.stderr.write(colored('Using cached version of file {0}\n'.format(filename), 'magenta'))
return False
if filename.endswith('.gz') and already_downloaded(args, filename):
if args.verbose:
sys.stderr.write(colored('Using cached version of file {0}, zipped file has not changed\n'.format(filename), 'magenta'))
return False
history = task_history(args, task)
if not task_still_running(args, task, history) and already_downloaded(args, filename) and file_not_too_old(args, history, filename):
if args.verbose:
sys.stderr.write(colored('Using cached version of file {0}, {1}, file has not changed\n'.format(filename, history['taskUpdates'][-1]['taskState']), 'magenta'))
else:
if args.verbose:
sys.stderr.write(colored('Will download file {0}, version on the server is newer than cached version\n'.format(filename), 'magenta'))

return True

def file_not_too_old(args, history, filename):
state_updated_at = int(str(history['taskUpdates'][-1]['timestamp'])[0:-3])
return int(os.path.getmtime('{0}/{1}'.format(args.dest, filename))) > state_updated_at

def already_downloaded(args, filename):
have_file = (os.path.isfile('{0}/{1}'.format(args.dest, filename.replace('.gz', '.log'))) or os.path.isfile('{0}/{1}'.format(args.dest, filename)))
return have_file


44 changes: 44 additions & 0 deletions scripts/logfetch/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
import re
import sys
import fnmatch
import logfetch_base
from termcolor import colored

def find_cached_logs(args):
matching_logs = []
log_fn_match = get_matcher(args)
for filename in os.listdir(args.dest):
if fnmatch.fnmatch(filename, log_fn_match) and in_date_range(args, filename):
if args.verbose:
sys.stderr.write(colored('Including log {0}\n'.format(filename), 'magenta'))
matching_logs.append('{0}/{1}'.format(args.dest, filename))
else:
if args.verbose:
sys.stderr.write(colored('Excluding log {0}, not in date range\n'.format(filename), 'magenta'))
return matching_logs


def in_date_range(args, filename):
timestamps = re.findall(r"\d{13}", filename)
if timestamps:
return logfetch_base.is_in_date_range(args, int(str(timestamps[-1])[0:-3]))
else:
return True

def get_matcher(args):
if args.taskId:
if 'filename' in args.file_pattern and args.logtype:
return '{0}*{1}*'.format(args.taskId, args.logtype)
else:
return '{0}*'.format(args.taskId)
elif args.deployId and args.requestId:
if 'filename' in args.file_pattern and args.logtype:
return '{0}-{1}*{2}*'.format(args.requestId, args.deployId, args.logtype)
else:
return '{0}-{1}*'.format(args.requestId, args.deployId)
else:
if 'filename' in args.file_pattern and args.logtype:
return '{0}*{1}*'.format(args.requestId, args.logtype)
else:
return '{0}*'.format(args.requestId)
5 changes: 3 additions & 2 deletions scripts/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name='singularity-logfetch',
version='0.19.0',
version='0.20.0',
description='Singularity log fetching and searching',
author="HubSpot",
author_email='[email protected]',
Expand All @@ -24,7 +24,8 @@
'console_scripts':[
'logfetch=logfetch.entrypoint:fetch',
'logtail=logfetch.entrypoint:tail',
'logcat=logfetch.entrypoint:cat'
'logcat=logfetch.entrypoint:cat',
'logsearch=logfetch.entrypoint:search'
],
}
)

0 comments on commit d5dd777

Please sign in to comment.