From bac4abbda6e79fd6c0da58e427ade1cd9629ac22 Mon Sep 17 00:00:00 2001 From: Rob Kaufman Date: Mon, 18 Sep 2023 13:43:00 -0700 Subject: [PATCH] clear tmp before start --- awslambda/handler.rb | 13 +++++++++++++ awslambda/serverless.yml | 5 ++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/awslambda/handler.rb b/awslambda/handler.rb index 98943f7..36c44b9 100644 --- a/awslambda/handler.rb +++ b/awslambda/handler.rb @@ -33,6 +33,7 @@ # @return [Hash] from {#response_body_for} # @todo TODO: Refactor to maybe use #handle method? def copy(event:, context:) + before_run jobs = get_event_body(event: event) output_uris = [] jobs.each do |job| @@ -57,6 +58,7 @@ def copy(event:, context:) # @param context [Object] # @return [Hash] from {#response_body_for} def split_ocr_thumbnail(event:, context:, env: ENV) + before_run # {"s3://space-stone-dev-preprocessedbucketf21466dd-bxjjlz4251re.s3.us-west-1.amazonaws.com/20121820/20121820.ARCHIVAL.pdf":["s3://space-stone-dev-preprocessedbucketf21466dd-bxjjlz4251re.s3.us-west-1.amazonaws.com/{{dir_parts[-1..-1]}}/{{ filename }}"]} # split in to pages handle(generator: DerivativeRodeo::Generators::PdfSplitGenerator, event: event, context: context) do |output_uris| @@ -70,6 +72,7 @@ def split_ocr_thumbnail(event:, context:, env: ENV) end def ocr(event:, context:, env: ENV) + before_run DerivativeRodeo::Generators::HocrGenerator.command_environment_variables = "OMP_THREAD_LIMIT=1 TESSDATA_PREFIX=/opt/share/tessdata LD_LIBRARY_PATH=/opt/lib PATH=/opt/bin:$PATH" handle(generator: DerivativeRodeo::Generators::HocrGenerator, event: event, context: context) do |output_uris| s3_url = s3_name_to_url(bucket_name: env['S3_BUCKET_NAME']) @@ -83,18 +86,22 @@ def ocr(event:, context:, env: ENV) end def thumbnail(event:, context:) + before_run handle(generator: DerivativeRodeo::Generators::ThumbnailGenerator, event: event, context: context) end def word_coordinates(event:, context:) + before_run handle(generator: DerivativeRodeo::Generators::WordCoordinatesGenerator, event: event, context: context) end def plain_text(event:, context:) + before_run handle(generator: DerivativeRodeo::Generators::PlainTextGenerator, event: event, context: context) end def alto_xml(event:, context:) + before_run handle(generator: DerivativeRodeo::Generators::AltoGenerator, event: event, context: context) end @@ -123,6 +130,12 @@ def handle(generator:, event:, context:) response_body_for(output_uris) end +## +# We delete ahead of time so that its empty in the first run and we do not bother on the last run since lambda will destory it +def before_run + puts %x{find /tmp -type f -delete -print} +end + ## # @api private # diff --git a/awslambda/serverless.yml b/awslambda/serverless.yml index df54bef..32f20b2 100644 --- a/awslambda/serverless.yml +++ b/awslambda/serverless.yml @@ -42,7 +42,10 @@ provider: Resource: "*" # you can define service wide environment variables here -# environment: +environment: + MAGICK_TEMPORARY_PATH: /tmp + TMPDIR: /tmp + # variable1: value1 # you can add packaging information here