Skip to content

Commit

Permalink
Merge pull request #1 from Baroquem/dev
Browse files Browse the repository at this point in the history
Merge changes for initial version
  • Loading branch information
Baroquem authored Mar 1, 2020
2 parents 7aa5f58 + e41075f commit cc687c7
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 5 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
node_modules
yarn.lock
.DS_Store
13 changes: 10 additions & 3 deletions client/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "pubmed-search",
"name": "client",
"version": "0.1.0",
"private": true,
"dependencies": {
Expand All @@ -8,15 +8,22 @@
"@testing-library/user-event": "^7.1.2",
"axios": "^0.19.0",
"bootstrap": "^4.4.1",
"core-js": "^3.6.4",
"es6-promise": "^4.2.8",
"isomorphic-fetch": "^2.2.1",
"mongodb": "^3.4.0",
"mongodb-client-encryption": "^1.0.0",
"mongoose": "^5.8.1",
"react": "^16.12.0",
"react-dom": "^16.12.0",
"react-router-dom": "^5.1.2",
"react-scripts": "3.3.0",
"react-spinners": "^0.8.0"
"react-scripts": "^3.4.0",
"react-spinners": "^0.8.0",
"serve": "^11.3.0"
},
"bundledDependencies": [
"serve"
],
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
Expand Down
3 changes: 3 additions & 0 deletions client/src/App.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import Bookbag from './Bookbag';
import SearchPage from './SearchPage';
import SearchTips from './SearchTips';

require('es6-promise').polyfill();
require('isomorphic-fetch');

class App extends Component {
// initialize state
state = {
Expand Down
3 changes: 2 additions & 1 deletion client/src/SearchTips.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@ const SearchTips = () => {
<ol>
<li>By default, words entered into the search box are treated as an OR boolean search. For example, typing <span className="example">aspirin tylenol</span> will return a list of all article citations that include the term "aspirin" <i>or</i> the term "tylenol."
</li>
<li>To perform a boolean AND search, enclose each search term individually in double quotes. Typing <span className="example">"aspirin" "tylenol"</span>will return a list of all article citations that include the term "aspirin" <i>and</i> the term "tylenol." (Note, however, that this type of search will only match exact terms. <span className="example">"puppies" "kittens"</span> will return matches only for the exact words 'puppies' and 'kittens', not 'puppy' and 'kitten'.)</li>
<li>You can search for a phrase by entering a set of words in quotation marks. E.g., typing <span className="example">"patient care"</span> will result in a search for the words <i>patient</i> and <i>care</i> together, not individually.</li>
<li>You can exclude terms from your search by prefacing them with a hypen (-). If you wanted to modify the previous search so that you only saw articles about patient care that did not involve cancer, you could type <span className="example">"patient care" -cancer</span>.</li>
<li>Clicking one of the highlighted keywords in an article citation from a previous search result will immediately perform a keyword search for that term. This type of search returns a list of article citations that include that term <i>only</i> as a subject keyword.</li>
</ol>
<h4>Notes on search</h4>
<ol>
<li>Searches are case-insensitive and diacritic-insensitive. Regular text searches (i.e., non-keyword searches) look for query terms in a citation's title, abstract, and keywords, and are ranked by relevance.</li>
<li>The search engine uses a stemmer. Thus, for example, a search for "puppies" will include results for "puppy" as well.</li>
<li>The search engine uses a stemmer. Thus, for example, a search for "puppies" will include results for "puppy" as well (unless performing a boolean AND search).</li>
<li>Searches that are too broad (e.g., "cancer"), will result in an error. You will see the message <span className="example">Your search could not be completed. Your topic may be too broad; try narrowing your search terms.</span>. This is a limitation of the system due to computer memory constraints. If you encounter this error, redo your search with a more limited query, e.g. "pancreatic cancer."</li>
<li>More complex searches may take longer to produce results.</li>
</ol>
Expand Down
2 changes: 2 additions & 0 deletions client/src/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import "core-js/stable";
import "regenerator-runtime/runtime";
import "bootstrap/dist/css/bootstrap.min.css";
import React from 'react';
import ReactDOM from 'react-dom';
Expand Down
100 changes: 100 additions & 0 deletions scripts/date_range_from_pubmed_xml.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Given a PubMed abstract index XML file, get the range of years
# covered by the article citations
require 'nokogiri'

class CitationParser < Nokogiri::XML::SAX::Document


def initialize(filename)
@filename = filename
end

def start_document
@text = ''
@years = []
@in_pub_date = false
@capture = false
end

def start_element(element, attrs)
case element
when 'PubDate'
@in_pub_date = true
when 'Year'
@capture = @in_pub_date
end
end

def characters(str)
@text += str if @capture
end

def end_element(element)
case element
when 'Year'
if @capture
@years << Integer(@text)
@text = ''
@capture = false
end
when 'PubDate'
@in_pub_date = false
end
end

def end_document
#puts "Range found: #{@years.min}-#{@years.max} for #{@filename} (avg: #{mean(@years)}, mode: #{mode(@years)}, median: #{median(@years)})"
year_report = " ("
categories(@years).each do |k, v|
year_report += "#{k}: #{v}, "
end
year_report += ")"
puts "Range found: #{@years.min}-#{@years.max} for #{@filename}" + year_report
end

def mean(years)
@years.reduce(:+) / @years.count
end

def median(years)
sorted = years.sort
if years.count % 2 == 0
0.5 * (sorted[years.length / 2 - 1] + sorted[years.length / 2])
else
sorted[years.length / 2]
end
end

def mode(years)
frequencies = years.reduce(Hash.new(0)) { |h,v| h[v] += 1; h }
years.max_by { |y| frequencies[y] }
end

def categories(years)
{
:early => years.select { |y| y < 1960 }.count,
:_1960s => years.select { |y| y >= 1960 && y < 1970 }.count,
:_1970s => years.select { |y| y >= 1970 && y < 1980 }.count,
:_1980s => years.select { |y| y >= 1980 && y < 1990 }.count,
:_1990s => years.select { |y| y >= 1990 && y < 2000 }.count,
:_2000s => years.select { |y| y >= 2000 && y < 2010 }.count,
:_2010s => years.select { |y| y >= 2010 }.count,
}
end
end


# Main program
if ARGV[0]
# PRocess the single file specified
files = [ARGV[0]]
else
files = Dir['source_files/originals/*']
end
puts "\n\nWorking..."
files.sort.each do |f|
parser = Nokogiri::XML::SAX::Parser.new(CitationParser.new(File.basename(f)))
#puts "Processing #{f}"
parser.parse_file f
end
puts "\nDone\n\n"
2 changes: 1 addition & 1 deletion server/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "backend",
"name": "server",
"version": "1.0.0",
"description": "",
"main": "index.js",
Expand Down

0 comments on commit cc687c7

Please sign in to comment.