Skip to content

Commit

Permalink
🎁 Add ability to pass search term to PDF.js
Browse files Browse the repository at this point in the history
This commit will add a PDF's text to the file set's solr document so it
can be searched in the catalog.  Now we can pass the search term to the
viewer so when it loads it will highlight the search term.
  • Loading branch information
kirkkwang committed Nov 15, 2023
1 parent 02fd84e commit 8d21dde
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 2 deletions.
8 changes: 7 additions & 1 deletion app/helpers/pdf_js_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

module PdfJsHelper
def pdf_js_url(path)
"/pdf.js/web/viewer.html?file=#{path}"
"/pdf.js/web/viewer.html?file=#{path}##{query_param}"
end

def pdf_file_set_presenter(presenter)
Expand All @@ -13,4 +13,10 @@ def pdf_file_set_presenter(presenter)
def representative_presenter(presenter)
presenter.file_set_presenters.find { |file_set_presenter| file_set_presenter.id == presenter.representative_id }
end

def query_param
return unless params[:q]

"search=#{params[:q]}&phrase=true"
end
end
5 changes: 4 additions & 1 deletion app/helpers/shared_search_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ def generate_work_url(model, request)
id = model["id"]
end
request_params = %i[protocol host port].map { |method| ["request_#{method}".to_sym, request.send(method)] }.to_h
get_url(id: id, request: request_params, account_cname: account_cname, has_model: has_model)
url = get_url(id: id, request: request_params, account_cname: account_cname, has_model: has_model)

# pass search query params to work show page
params[:q].present? ? "#{url}?q=#{params[:q]}" : url
end

private
Expand Down
34 changes: 34 additions & 0 deletions app/indexers/hyrax/file_set_indexer_decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

# OVERRIDE Hyrax 3.5.0 to add PDF text to solr document when using the default PDF viewer (PDF.js)

module Hyrax
module FileSetIndexerDecorator
def generate_solr_document
return super unless Flipflop.default_pdf_viewer?

super.tap do |solr_doc|
solr_doc['all_text_timv'] = solr_doc['all_text_tsimv'] = pdf_text
end
end

private

def pdf_text
return unless object.pdf?
return unless object.original_file&.content.is_a? String

text = IO.popen(['pdftotext', '-', '-'], 'r+b') do |pdftotext|
pdftotext.write(object.original_file.content)
pdftotext.close_write
pdftotext.read
end

text.tr("\n", ' ')
.squeeze(' ')
.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') # remove non-UTF-8 characters
end
end
end

Hyrax::FileSetIndexer.prepend(Hyrax::FileSetIndexerDecorator)

0 comments on commit 8d21dde

Please sign in to comment.