Skip to content

Commit

Permalink
enable both big and limited query
Browse files Browse the repository at this point in the history
  • Loading branch information
ake123 committed Sep 26, 2024
1 parent 542e9a9 commit 013a071
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 103 deletions.
191 changes: 97 additions & 94 deletions R/search_finna.R
Original file line number Diff line number Diff line change
@@ -1,35 +1,35 @@
#' Finna Index Search with Pagination and Advanced Options
#' Finna Index Search with Total Limit Option
#'
#' This function performs a search on the Finna index with extended options, allowing for a wide range of search types, filters, facets, and sorting methods.
#' It retrieves all available data by paginating through the API results.
#' This function retrieves records from the Finna index with an option to limit the total number of records returned.
#' The function paginates through the results, fetching records until the specified total limit is reached.
#'
#' @name search_finna
#' @param lookfor A string containing the search terms. Boolean operators (AND, OR, NOT) can be included.
#' @param query description
#' @param type A string specifying the type of search. Options include "AllFields", "Title", "Author", "Subject". Defaults to "AllFields".
#' @param fields A vector of fields to be returned in the search results. Defaults to NULL, which returns a standard set of fields.
#' @param filters A vector of filter queries to refine the search. Defaults to NULL.
#' @param facets A vector specifying which facets to return in the results. Defaults to NULL.
#' @param facetFilters A vector of regular expressions to filter facets. Defaults to NULL.
#' @param sort A string defining the sort order of the results. Options include:
#' \itemize{
#' \item "relevance,id asc" (default)
#' \item "main_date_str desc" (Year, newest first)
#' \item "main_date_str asc" (Year, oldest first)
#' \item "last_indexed desc" (Last modified)
#' \item "first_indexed desc" (Last added)
#' \item "callnumber,id asc" (Classmark)
#' \item "author,id asc" (Author)
#' \item "title,id asc" (Title)
#' }
#' @param limit An integer specifying the number of records to return per page. Defaults to 100 (maximum).
#' \itemize{
#' \item "relevance,id asc" (default)
#' \item "main_date_str desc" (Year, newest first)
#' \item "main_date_str asc" (Year, oldest first)
#' \item "last_indexed desc" (Last modified)
#' \item "first_indexed desc" (Last added)
#' \item "callnumber,id asc" (Classmark)
#' \item "author,id asc" (Author)
#' \item "title,id asc" (Title)
#' }
#' @param limit An integer specifying the total number of records to return across multiple pages.
#' @param lng A string for the language of returned translated strings. Options are "fi", "en-gb", "sv", "se". Defaults to "fi".
#' @param prettyPrint A logical value indicating whether to pretty-print the JSON response. Useful for debugging. Defaults to FALSE.
#' @return A tibble containing all search results with relevant fields extracted and provenance information.
#' @return A tibble containing the search results with relevant fields extracted and provenance information.
#' @examples
#' search_results <- search_finna("sibelius", sort = "main_date_str desc")
#' search_results <- search_finna("sibelius", sort = "main_date_str desc", limit = 100)
#' print(search_results)
#' @export
search_finna <- function(lookfor,
search_finna <- function(query = NULL,#lookfor
type = "AllFields",
fields = NULL,
filters = NULL,
Expand All @@ -41,30 +41,37 @@ search_finna <- function(lookfor,
prettyPrint = FALSE) {

# Handle empty search queries
if (lookfor == "" || is.null(lookfor)) {
warning("Error: Empty search query provided.")
return(NULL)
}
# if (query == "" || is.null(query)) {
# warning("Error: Empty search query provided.")
# return(NULL)
# }

# Define the base URL for the search API
base_url <- "https://api.finna.fi/v1/search"

# Initialize variables for pagination
all_data <- list() # Store all pages of data
page <- 1 # Start from the first page
# Initialize empty list to store all records
all_data <- list()

# Define the pagination parameters
total_fetched <- 0
page <- 1
records_per_page <- 100 # Fetch 100 records per page for efficiency

repeat {
# Construct the query parameters for each page
while (total_fetched < limit) {
# Calculate the remaining number of records to fetch
remaining_to_fetch <- min(records_per_page, limit - total_fetched)

# Construct the query parameters for the current page
query_params <- list(
lookfor = lookfor,
lookfor = query,
type = type,
`field[]` = fields,
`filter[]` = filters,
`facet[]` = facets,
`facetFilter[]` = facetFilters,
sort = sort,
page = page,
limit = limit,
limit = remaining_to_fetch, # Set the page limit to fetch only the remaining records
lng = lng,
prettyPrint = prettyPrint
)
Expand All @@ -78,87 +85,83 @@ search_finna <- function(lookfor,
}
)

# Check if the response is valid
if (is.null(response) || httr::status_code(response) != 200) {
error_message <- sprintf("Failed to perform the search. Status code: %d - Response: %s",
httr::status_code(response), httr::content(response, "text"))
warning(error_message)
break
}
# Process the response based on the status code
if (httr::status_code(response) == 200) {
# Parse the JSON content of the response
search_results <- httr::content(response, "parsed")

# Parse the JSON content of the response
search_results <- httr::content(response, "parsed")

# Extract and structure relevant data from the search results
records <- search_results$records
if (length(records) == 0) {
message("No more records found. Stopping pagination.")
break
}
# Extract and structure relevant data from the search results
records <- search_results$records
if (is.null(records) || length(records) == 0) {
break # No more records, stop fetching
}

data <- lapply(records, function(record) {
list(
Title = record$title %||% NA,
Author = if (!is.null(record$nonPresenterAuthors) && length(record$nonPresenterAuthors) > 0) {
paste(sapply(record$nonPresenterAuthors, function(author) author$name), collapse = ", ")
} else {
NA
},
Year = record$year %||% NA,
Language = if (!is.null(record$languages) && length(record$languages) > 0) record$languages[[1]] else NA,
Formats = if (!is.null(record$formats) && length(record$formats) > 0) {
paste(sapply(record$formats, function(format) format$translated), collapse = ", ")
} else {
NA
},
Subjects = if (!is.null(record$subjects) && length(record$subjects) > 0) {
paste(sapply(record$subjects, function(subject) paste(subject, collapse = ", ")), collapse = "; ")
} else {
NA
},
Library = if (!is.null(record$buildings) && length(record$buildings) > 0) {
paste(sapply(record$buildings, function(building) building$translated), collapse = ", ")
} else {
NA
},
Series = tryCatch({
if (!is.null(record$series)) {
if (is.list(record$series)) {
if (length(record$series) > 0) {
paste(sapply(record$series, function(series) series$name %||% NA), collapse = ", ")
# Add the records to the all_data list
data <- lapply(records, function(record) {
list(
Title = record$title %||% NA,
Author = if (!is.null(record$nonPresenterAuthors) && length(record$nonPresenterAuthors) > 0) {
paste(sapply(record$nonPresenterAuthors, function(author) author$name), collapse = ", ")
} else {
NA
},
Year = record$year %||% NA,
Language = if (!is.null(record$languages) && length(record$languages) > 0) record$languages[[1]] else NA,
Formats = if (!is.null(record$formats) && length(record$formats) > 0) {
paste(sapply(record$formats, function(format) format$translated), collapse = ", ")
} else {
NA
},
Subjects = if (!is.null(record$subjects) && length(record$subjects) > 0) {
paste(sapply(record$subjects, function(subject) paste(subject, collapse = ", ")), collapse = "; ")
} else {
NA
},
Library = if (!is.null(record$buildings) && length(record$buildings) > 0) {
paste(sapply(record$buildings, function(building) building$translated), collapse = ", ")
} else {
NA
},
Series = tryCatch({
if (!is.null(record$series)) {
if (is.list(record$series)) {
if (length(record$series) > 0) {
paste(sapply(record$series, function(series) series$name %||% NA), collapse = ", ")
} else {
NA
}
} else if (is.atomic(record$series)) {
as.character(record$series)
} else {
NA
}
} else if (is.atomic(record$series)) {
as.character(record$series)
} else {
NA
}
} else {
NA
}
}, error = function(e) NA)
)
})
}, error = function(e) NA)
)
})

# Append the current page's data to the list of all data
all_data <- c(all_data, data)
all_data <- c(all_data, data)

# Check if we've reached the last page
if (length(records) < limit) {
message("Retrieved last page of results.")
break
}
# Update the total number of fetched records
total_fetched <- total_fetched + length(records)
page <- page + 1

# Increment the page number for the next iteration
page <- page + 1
} else {
# Handle API errors with detailed messages
error_message <- sprintf("Failed to perform the search. Status code: %d - Response: %s",
httr::status_code(response), httr::content(response, "text"))
warning(error_message)
return(NULL)
}
}

# Convert the collected data into a tibble for easy analysis
# Convert the list of extracted data into a tibble for easy analysis
tibble_results <- tibble::as_tibble(do.call(rbind, lapply(all_data, function(x) unlist(x, recursive = FALSE))))

# Attach the language attribute to the tibble
attr(tibble_results, "language") <- lng
cat("Data retrieved from Finna API (https://www.finna.fi) - metadata licensed under CC0.\n")
#cat("Data retrieved from Finna API (https://www.finna.fi) - metadata licensed under CC0.\n")
return(tibble_results)
}
16 changes: 8 additions & 8 deletions man/search_finna.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions _pkgdown.yml → pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@ navbar:
href: news/index.html
right:
- text: "GitHub"
icon: fa-github-code-o
href: https://github.com/rOpenGov/finna

# Optional: Define the authors
authors:
- name: "Akewak Jeba"
url: https://github.com/rOpenGov/finna
- name: "Leo Lahti"
url: https://github.com/rOpenGov/finna

# Optional: Specify the output directory (if different from default)
output_dir: docs
Loading

0 comments on commit 013a071

Please sign in to comment.