Skip to content

Commit

Permalink
Merge branch 'main' into mark_during_bulk
Browse files Browse the repository at this point in the history
  • Loading branch information
laritakr authored Sep 25, 2023
2 parents 269a6ab + 4574ecc commit ac95953
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 8 deletions.
56 changes: 48 additions & 8 deletions config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,34 @@
Bundler.require(*groups)

module Hyku
# Providing a common method to ensure consistent UTF-8 encoding. Also removing the tricksy Byte
# Order Marker character which is an invisible 0 space character.
#
# @note In testing, we encountered errors with the file's character encoding
# (e.g. `Encoding::UndefinedConversionError`). The following will force the encoding to
# UTF-8 and replace any invalid or undefined characters from the original encoding with a
# "?".
#
# Given that we still have the original, and this is a derivative, the forced encoding
# should be acceptable.
#
# @param [String]
# @return [String]
#
# @see https://sentry.io/organizations/scientist-inc/issues/3773392603/?project=6745020&query=is%3Aunresolved&referrer=issue-stream
# @see https://github.com/samvera-labs/bulkrax/pull/689
# @see https://github.com/samvera-labs/bulkrax/issues/688
# @see https://github.com/scientist-softserv/adventist-dl/issues/179
def self.utf_8_encode(string)
string
.encode(Encoding.find('UTF-8'), invalid: :replace, undef: :replace, replace: "?")
.delete("\xEF\xBB\xBF")
end

class Application < Rails::Application
# Add this line to load the lib folder first because we need
config.autoload_paths.unshift("#{Rails.root}/lib")

# Settings in config/environments/* take precedence over those specified here.
# Application configuration should go into files in config/initializers
# -- all .rb files in that directory are automatically loaded.
Expand All @@ -33,22 +60,33 @@ class Application < Rails::Application
end

config.to_prepare do
# Allows us to use decorator files in the app directory

# Add any extra services before IiifPrint::PluggableDerivativeService to enable processing
Hyrax::DerivativeService.services = [IiifPrint::PluggableDerivativeService]

# When you are ready to use the derivative rodeo instead of the pluggable uncomment the
# following and comment out the preceding Hyrax::DerivativeService.service
#
# Hyrax::DerivativeService.services = [
# Adventist::TextFileTextExtractionService,
# IiifPrint::DerivativeRodeoService,
# Hyrax::FileSetDerivativesService]

DerivativeRodeo::Generators::HocrGenerator.additional_tessearct_options = "-l eng_best"

# Allows us to use decorator files
Dir.glob(File.join(File.dirname(__FILE__), "../app/**/*_decorator*.rb")).sort.each do |c|
Rails.configuration.cache_classes ? require(c) : load(c)
end
end

config.to_prepare do
# Allows us to use decorator files in the app directory
Dir.glob(File.join(File.dirname(__FILE__), "../lib/**/*_decorator*.rb")).sort.each do |c|
Rails.configuration.cache_classes ? require(c) : load(c)
end
end

# OAI additions
Dir.glob(File.join(File.dirname(__FILE__), "../lib/oai/**/*.rb")).sort.each do |c|
Rails.configuration.cache_classes ? require(c) : load(c)
# OAI additions
Dir.glob(File.join(File.dirname(__FILE__), "../lib/oai/**/*.rb")).sort.each do |c|
Rails.configuration.cache_classes ? require(c) : load(c)
end
end

# resolve reloading issue in dev mode
Expand All @@ -67,6 +105,8 @@ class Application < Rails::Application
Object.include(AccountSwitch)
end

# copies tinymce assets directly into public/assets
config.tinymce.install = :copy
##
# Psych Allow YAML Classes
#
Expand Down
1 change: 1 addition & 0 deletions config/database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

login: &login
adapter: <%= ENV['DB_ADAPTER'] || 'postgresql' %>
schema_search_path: "public,shared_extensions"
host: <%= ENV['DB_HOST'] %>
username: <%= ENV['DB_USER'] %>
password: <%= ENV['DB_PASSWORD'] %>
Expand Down
1 change: 1 addition & 0 deletions config/fedora.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ production:
password: fedoraAdmin
url: http://<%= ENV['FCREPO_HOST'] || 'localhost' %>:<%= ENV['FCREPO_PORT'] || 8080 %>/<%= ENV['FCREPO_REST_PATH'] || 'rest' %>
base_path: <%= ENV['FCREPO_BASE_PATH'] || '/prod' %>
request: { timeout: 600, open_timeout: 60}
11 changes: 11 additions & 0 deletions config/initializers/active_fedora_override.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Based on https://github.com/samvera/hyrax/issues/4581#issuecomment-843085122

# Monkey-patch to short circuit ActiveModel::Dirty which attempts to load the whole master files ordered list when calling nodes_will_change!
# This leads to a stack level too deep exception when attempting to delete a master file from a media object on the manage files step.
# See https://github.com/samvera/active_fedora/pull/1312/commits/7c8bbbefdacefd655a2ca653f5950c991e1dc999#diff-28356c4daa0d55cbaf97e4269869f510R100-R103
ActiveFedora::Aggregation::ListSource.class_eval do
def attribute_will_change!(attr)
return super unless attr == 'nodes'
attributes_changed_by_setter[:nodes] = true
end
end
2 changes: 2 additions & 0 deletions config/initializers/apartment.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
# Any schemas added here will be available along with your selected Tenant.
#
# config.persistent_schemas = %w{ hstore }
config.persistent_schemas = ['shared_extensions']


# <== PostgreSQL only options
#
Expand Down
20 changes: 20 additions & 0 deletions lib/active_fedora/solr_service_decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# frozen_string_literal: true

# OVERRIDE: class ActiveFedora::SolrService from Fedora 12.1.1
module ActiveFedora
module SolrServiceDecorator
# Get the count of records that match the query
# @param [String] query a solr query
# @param [Hash] args arguments to pass through to `args' param of SolrService.query
# (note that :rows will be overwritten to 0)
# @return [Integer] number of records matching
#
# OVERRIDE: use `post` rather than `get` to handle larger query sizes
def count(query, args = {})
args = args.merge(rows: 0)
SolrService.post(query, args)['response']['numFound'].to_i
end
end
end

ActiveFedora::SolrService.singleton_class.send(:prepend, ActiveFedora::SolrServiceDecorator)
24 changes: 24 additions & 0 deletions lib/tasks/db_enhancements.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# frozen_string_literal: true

namespace :db do
desc 'Also create shared_extensions Schema'
task extensions: :environment do
# Create Schema
ActiveRecord::Base.connection.execute 'CREATE SCHEMA IF NOT EXISTS shared_extensions;'
# Enable Hstore
ActiveRecord::Base.connection.execute 'CREATE EXTENSION IF NOT EXISTS HSTORE SCHEMA shared_extensions;'
# Enable UUID-OSSP
ActiveRecord::Base.connection.execute 'CREATE EXTENSION IF NOT EXISTS "uuid-ossp" SCHEMA shared_extensions;'
ActiveRecord::Base.connection.execute 'CREATE EXTENSION IF NOT EXISTS "pgcrypto" SCHEMA shared_extensions;'
# Grant usage to public
ActiveRecord::Base.connection.execute 'GRANT usage ON SCHEMA shared_extensions to public;'
end
end

Rake::Task["db:create"].enhance do
Rake::Task["db:extensions"].invoke
end

Rake::Task["db:test:purge"].enhance do
Rake::Task["db:extensions"].invoke
end
42 changes: 42 additions & 0 deletions lib/wings/services/custom_queries/find_ids_by_model_decorator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# frozen_string_literal: true

# OVERRIDE Hyrax 3.5 to use post instead of get for Solr requests

module Wings
module CustomQueries
##
# @see https://github.com/samvera/valkyrie/wiki/Queries#custom-queries
# @see Hyrax::CustomQueries::FindIdsByModel
module FindIdsByModelDecorator
##
# @note uses solr to do the lookup
#
# @param model [Class]
# @param ids [Enumerable<#to_s>, Symbol]
#
# @return [Enumerable<Valkyrie::ID>]
def find_ids_by_model(model:, ids: :all)
return enum_for(:find_ids_by_model, model: model, ids: ids) unless block_given?
model_name = ModelRegistry.lookup(model).model_name

solr_query = "_query_:\"{!raw f=has_model_ssim}#{model_name}\""
solr_response = ActiveFedora::SolrService.post(solr_query, fl: 'id', rows: @query_rows)['response']

loop do
response_docs = solr_response['docs']
response_docs.select! { |doc| ids.include?(doc['id']) } unless ids == :all

response_docs.each { |doc| yield doc['id'] }

break if (solr_response['start'] + solr_response['docs'].count) >= solr_response['numFound']
solr_response = ActiveFedora::SolrService.post(solr_query,
fl: 'id',
rows: @query_rows,
start: solr_response['start'] + @query_rows)['response']
end
end
end
end
end

Wings::CustomQueries::FindIdsByModel.prepend Wings::CustomQueries::FindIdsByModelDecorator

0 comments on commit ac95953

Please sign in to comment.