From 0518147265051c5e6868ea842b4aaf85be7f52dd Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Tue, 18 Feb 2020 20:28:01 -0500 Subject: [PATCH 1/7] Update server package name --- server/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/package.json b/server/package.json index fb77ac5..c213ea7 100644 --- a/server/package.json +++ b/server/package.json @@ -1,5 +1,5 @@ { - "name": "backend", + "name": "server", "version": "1.0.0", "description": "", "main": "index.js", From 5f632a500a4b2b31a2b784c21f3c27ce727bcf00 Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Tue, 18 Feb 2020 20:28:48 -0500 Subject: [PATCH 2/7] Update gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3c3629e..97008e5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +yarn.lock \ No newline at end of file From 13cd812cf691a35c786fb99d952b3d1794f4d4cf Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Tue, 18 Feb 2020 20:31:36 -0500 Subject: [PATCH 3/7] Update client react-scripts to avoid TypeError --- client/package.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/package.json b/client/package.json index ccc2899..2f839e6 100644 --- a/client/package.json +++ b/client/package.json @@ -1,5 +1,5 @@ { - "name": "pubmed-search", + "name": "client", "version": "0.1.0", "private": true, "dependencies": { @@ -14,7 +14,7 @@ "react": "^16.12.0", "react-dom": "^16.12.0", "react-router-dom": "^5.1.2", - "react-scripts": "3.3.0", + "react-scripts": "^3.4.0", "react-spinners": "^0.8.0" }, "scripts": { From 7ebf75b212e3b12558cc97453ea87453040da1e6 Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Wed, 19 Feb 2020 14:00:50 -0500 Subject: [PATCH 4/7] Update search tips with boolean AND info --- client/src/SearchTips.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/src/SearchTips.js b/client/src/SearchTips.js index 42cd37a..4df1f44 100755 --- a/client/src/SearchTips.js +++ b/client/src/SearchTips.js @@ -14,6 +14,7 @@ const SearchTips = () => {
  1. By default, words entered into the search box are treated as an OR boolean search. For example, typing aspirin tylenol will return a list of all article citations that include the term "aspirin" or the term "tylenol."
  2. +
  3. To perform a boolean AND search, enclose each search term individually in double quotes. Typing "aspirin" "tylenol"will return a list of all article citations that include the term "aspirin" and the term "tylenol." (Note, however, that this type of search will only match exact terms. "puppies" "kittens" will return matches only for the exact words 'puppies' and 'kittens', not 'puppy' and 'kitten'.)
  4. You can search for a phrase by entering a set of words in quotation marks. E.g., typing "patient care" will result in a search for the words patient and care together, not individually.
  5. You can exclude terms from your search by prefacing them with a hypen (-). If you wanted to modify the previous search so that you only saw articles about patient care that did not involve cancer, you could type "patient care" -cancer.
  6. Clicking one of the highlighted keywords in an article citation from a previous search result will immediately perform a keyword search for that term. This type of search returns a list of article citations that include that term only as a subject keyword.
  7. @@ -21,7 +22,7 @@ const SearchTips = () => {

    Notes on search

    1. Searches are case-insensitive and diacritic-insensitive. Regular text searches (i.e., non-keyword searches) look for query terms in a citation's title, abstract, and keywords, and are ranked by relevance.
    2. -
    3. The search engine uses a stemmer. Thus, for example, a search for "puppies" will include results for "puppy" as well.
    4. +
    5. The search engine uses a stemmer. Thus, for example, a search for "puppies" will include results for "puppy" as well (unless performing a boolean AND search).
    6. Searches that are too broad (e.g., "cancer"), will result in an error. You will see the message Your search could not be completed. Your topic may be too broad; try narrowing your search terms.. This is a limitation of the system due to computer memory constraints. If you encounter this error, redo your search with a more limited query, e.g. "pancreatic cancer."
    7. More complex searches may take longer to produce results.
    From 0c291bd3a97205c1a148a938a2b4e7050b7c76f4 Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Wed, 26 Feb 2020 09:26:25 -0500 Subject: [PATCH 5/7] Make things work in IE --- client/package.json | 11 +++++++++-- client/src/App.js | 3 +++ client/src/index.js | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/client/package.json b/client/package.json index ccc2899..1d3fcda 100644 --- a/client/package.json +++ b/client/package.json @@ -8,15 +8,22 @@ "@testing-library/user-event": "^7.1.2", "axios": "^0.19.0", "bootstrap": "^4.4.1", + "core-js": "^3.6.4", + "es6-promise": "^4.2.8", + "isomorphic-fetch": "^2.2.1", "mongodb": "^3.4.0", "mongodb-client-encryption": "^1.0.0", "mongoose": "^5.8.1", "react": "^16.12.0", "react-dom": "^16.12.0", "react-router-dom": "^5.1.2", - "react-scripts": "3.3.0", - "react-spinners": "^0.8.0" + "react-scripts": "^3.4.0", + "react-spinners": "^0.8.0", + "serve": "^11.3.0" }, + "bundledDependencies": [ + "serve" + ], "scripts": { "start": "react-scripts start", "build": "react-scripts build", diff --git a/client/src/App.js b/client/src/App.js index 4130b00..0113ca0 100755 --- a/client/src/App.js +++ b/client/src/App.js @@ -7,6 +7,9 @@ import Bookbag from './Bookbag'; import SearchPage from './SearchPage'; import SearchTips from './SearchTips'; +require('es6-promise').polyfill(); +require('isomorphic-fetch'); + class App extends Component { // initialize state state = { diff --git a/client/src/index.js b/client/src/index.js index f4729f8..341da82 100644 --- a/client/src/index.js +++ b/client/src/index.js @@ -1,3 +1,5 @@ +import "core-js/stable"; +import "regenerator-runtime/runtime"; import "bootstrap/dist/css/bootstrap.min.css"; import React from 'react'; import ReactDOM from 'react-dom'; From cb5cccda64c4ac662cb34864dbdd0a114ae99ad4 Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Sat, 29 Feb 2020 16:26:21 -0500 Subject: [PATCH 6/7] Fix merge conflict --- client/package.json | 4 ---- 1 file changed, 4 deletions(-) diff --git a/client/package.json b/client/package.json index e388189..06671d8 100644 --- a/client/package.json +++ b/client/package.json @@ -18,12 +18,8 @@ "react-dom": "^16.12.0", "react-router-dom": "^5.1.2", "react-scripts": "^3.4.0", -<<<<<<< HEAD - "react-spinners": "^0.8.0" -======= "react-spinners": "^0.8.0", "serve": "^11.3.0" ->>>>>>> origin/dev }, "bundledDependencies": [ "serve" From e41075fddfe726e1b95db3a47d26e670400b5533 Mon Sep 17 00:00:00 2001 From: Matthew Connolly Date: Sat, 29 Feb 2020 19:56:57 -0500 Subject: [PATCH 7/7] Add script for analyzing date ranges in PubMed files --- scripts/date_range_from_pubmed_xml.rb | 100 ++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 scripts/date_range_from_pubmed_xml.rb diff --git a/scripts/date_range_from_pubmed_xml.rb b/scripts/date_range_from_pubmed_xml.rb new file mode 100644 index 0000000..e20f377 --- /dev/null +++ b/scripts/date_range_from_pubmed_xml.rb @@ -0,0 +1,100 @@ +# Given a PubMed abstract index XML file, get the range of years +# covered by the article citations +require 'nokogiri' + +class CitationParser < Nokogiri::XML::SAX::Document + + + def initialize(filename) + @filename = filename + end + + def start_document + @text = '' + @years = [] + @in_pub_date = false + @capture = false + end + + def start_element(element, attrs) + case element + when 'PubDate' + @in_pub_date = true + when 'Year' + @capture = @in_pub_date + end + end + + def characters(str) + @text += str if @capture + end + + def end_element(element) + case element + when 'Year' + if @capture + @years << Integer(@text) + @text = '' + @capture = false + end + when 'PubDate' + @in_pub_date = false + end + end + + def end_document + #puts "Range found: #{@years.min}-#{@years.max} for #{@filename} (avg: #{mean(@years)}, mode: #{mode(@years)}, median: #{median(@years)})" + year_report = " (" + categories(@years).each do |k, v| + year_report += "#{k}: #{v}, " + end + year_report += ")" + puts "Range found: #{@years.min}-#{@years.max} for #{@filename}" + year_report + end + + def mean(years) + @years.reduce(:+) / @years.count + end + + def median(years) + sorted = years.sort + if years.count % 2 == 0 + 0.5 * (sorted[years.length / 2 - 1] + sorted[years.length / 2]) + else + sorted[years.length / 2] + end + end + + def mode(years) + frequencies = years.reduce(Hash.new(0)) { |h,v| h[v] += 1; h } + years.max_by { |y| frequencies[y] } + end + + def categories(years) + { + :early => years.select { |y| y < 1960 }.count, + :_1960s => years.select { |y| y >= 1960 && y < 1970 }.count, + :_1970s => years.select { |y| y >= 1970 && y < 1980 }.count, + :_1980s => years.select { |y| y >= 1980 && y < 1990 }.count, + :_1990s => years.select { |y| y >= 1990 && y < 2000 }.count, + :_2000s => years.select { |y| y >= 2000 && y < 2010 }.count, + :_2010s => years.select { |y| y >= 2010 }.count, + } + end +end + + +# Main program +if ARGV[0] + # PRocess the single file specified + files = [ARGV[0]] +else + files = Dir['source_files/originals/*'] +end +puts "\n\nWorking..." +files.sort.each do |f| + parser = Nokogiri::XML::SAX::Parser.new(CitationParser.new(File.basename(f))) + #puts "Processing #{f}" + parser.parse_file f +end +puts "\nDone\n\n" \ No newline at end of file