Skip to content

Commit

Permalink
clear tmp before start
Browse files Browse the repository at this point in the history
  • Loading branch information
orangewolf committed Sep 18, 2023
1 parent 83bdc31 commit bac4abb
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
13 changes: 13 additions & 0 deletions awslambda/handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# @return [Hash<Symbol, Object>] from {#response_body_for}
# @todo TODO: Refactor to maybe use #handle method?
def copy(event:, context:)
before_run
jobs = get_event_body(event: event)
output_uris = []
jobs.each do |job|
Expand All @@ -57,6 +58,7 @@ def copy(event:, context:)
# @param context [Object]
# @return [Hash<Symbol, Object>] from {#response_body_for}
def split_ocr_thumbnail(event:, context:, env: ENV)
before_run
# {"s3://space-stone-dev-preprocessedbucketf21466dd-bxjjlz4251re.s3.us-west-1.amazonaws.com/20121820/20121820.ARCHIVAL.pdf":["s3://space-stone-dev-preprocessedbucketf21466dd-bxjjlz4251re.s3.us-west-1.amazonaws.com/{{dir_parts[-1..-1]}}/{{ filename }}"]}
# split in to pages
handle(generator: DerivativeRodeo::Generators::PdfSplitGenerator, event: event, context: context) do |output_uris|
Expand All @@ -70,6 +72,7 @@ def split_ocr_thumbnail(event:, context:, env: ENV)
end

def ocr(event:, context:, env: ENV)
before_run
DerivativeRodeo::Generators::HocrGenerator.command_environment_variables = "OMP_THREAD_LIMIT=1 TESSDATA_PREFIX=/opt/share/tessdata LD_LIBRARY_PATH=/opt/lib PATH=/opt/bin:$PATH"
handle(generator: DerivativeRodeo::Generators::HocrGenerator, event: event, context: context) do |output_uris|
s3_url = s3_name_to_url(bucket_name: env['S3_BUCKET_NAME'])
Expand All @@ -83,18 +86,22 @@ def ocr(event:, context:, env: ENV)
end

def thumbnail(event:, context:)
before_run
handle(generator: DerivativeRodeo::Generators::ThumbnailGenerator, event: event, context: context)
end

def word_coordinates(event:, context:)
before_run
handle(generator: DerivativeRodeo::Generators::WordCoordinatesGenerator, event: event, context: context)
end

def plain_text(event:, context:)
before_run
handle(generator: DerivativeRodeo::Generators::PlainTextGenerator, event: event, context: context)
end

def alto_xml(event:, context:)
before_run
handle(generator: DerivativeRodeo::Generators::AltoGenerator, event: event, context: context)
end

Expand Down Expand Up @@ -123,6 +130,12 @@ def handle(generator:, event:, context:)
response_body_for(output_uris)
end

##
# We delete ahead of time so that its empty in the first run and we do not bother on the last run since lambda will destory it
def before_run
puts %x{find /tmp -type f -delete -print}
end

##
# @api private
#
Expand Down
5 changes: 4 additions & 1 deletion awslambda/serverless.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,10 @@ provider:
Resource: "*"

# you can define service wide environment variables here
# environment:
environment:
MAGICK_TEMPORARY_PATH: /tmp
TMPDIR: /tmp

# variable1: value1

# you can add packaging information here
Expand Down

0 comments on commit bac4abb

Please sign in to comment.