Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scoring integration #66

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,28 @@
npm start
```

4. Optionally enable PaNET ontology fetching from an external URL (if this step is skipped no PaNET ontology is used)
4. Optionally scoring can be enabled by setting to true the environmental variable PSS_ENABLED and providing the service url in PSS_BASE_URL

```bash
export PSS_ENABLED=1
export PSS_BASE_URL=<the URL of the deployed PaNOSC scoring service>
```

([PaNOSC Search Score](https://github.com/panosc-eu/panosc-search-scoring))

5. Optionally enable PaNET ontology fetching from an external URL (if this step is skipped no PaNET ontology is used)

```bash
export PANET_BASE_URL=<the URL of the deployed pan-ontologies-api service>
```

([pan-ontologies-api source code and container](https://github.com/ExPaNDS-eu/pan-ontologies-api))

5. Try out the API using the example queries, either through http://localhost:3000/explorer or Curl.
6. Try out the API using the example queries, either through http://localhost:3000/explorer or Curl.

- [Dataset Example Queries](./doc/dataset-example-queries.md)
- [Document Example Queries](./doc/document-example-queries.md)
- [Instrument Example Queries](./doc/instrument-example-queries.md)


## Acceptance and integration tests

Expand Down
3 changes: 2 additions & 1 deletion common/mixins/panet.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class Panet {
async panet(techniqueLoopbackWhere) {

console.log(">>> Panet.panet: panet requested");
console.log(" - where filter : ", techniqueLoopbackWhere);
console.log(" - original filter : ", techniqueLoopbackWhere);

const res = await superagent
.get(this.panetUrl)
Expand All @@ -31,6 +31,7 @@ class Panet {
() => true,
(obj) => (obj.panetId = obj.pid, delete obj.pid)
);
console.log(" - expanded filter : ", resJSON);
return resJSON
}

Expand Down
55 changes: 50 additions & 5 deletions common/mixins/score.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,54 @@

const PSSService = require("../pss-service");
const modelsWithScore = ["Dataset","Document"];

module.exports = (Model, options) => {

const pssScoreService = new PSSService.Score();
const pssScoreEnabled = process.env.PSS_ENABLE || false;

// Set score property
Model.afterRemote('find', (ctx, result, next) => {
ctx.result.forEach((instance) => {
instance.score = 0;
});
next();
Model.afterRemote('find', async (ctx, result, next) => {
// check if we received a query
console.log("Filter : " + JSON.stringify(ctx.args));
const query = (
( Object.keys(ctx.args).includes('filter')
&& typeof(ctx.args.filter) === 'object'
&& Object.keys(ctx.args.filter).includes('query') )
? ctx.args.filter.query
: null
);
console.log("Requested query : " + query);
// check if we are working with Datasets and Documents
const requestedModel = ctx.methodString.split('.')[0];
const operation = ctx.methodString.split('.')[1];
let modelWithScore=false
if (modelsWithScore.includes(requestedModel)) {
modelWithScore=true
}
else {
requestModel="Other"
}
//console.log(pssScoreEnabled);
//console.log(modelWithScore);
//console.log(operation);
// check scoring is enabled and we are working with Datasets and Documents
if (query && pssScoreEnabled && modelWithScore && operation === 'find') {
console.log("Requested query : " + query);
// we need to score the results
// extract the ids of the dataset returned by SciCat
const datasetsIds = ctx.result.map((i) => i.pid);
const scores = await pssScoreService.score(query, datasetsIds, requestedModel);
const assignScore = (instance) => {
instance.score = scores[instance.pid];
}
ctx.result.forEach(assignScore);
}
else {
ctx.result.forEach((instance) => {
instance.score = 0;
});
}
//next();
});
};
45 changes: 45 additions & 0 deletions common/pss-service.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"use strict";

const superagent = require("superagent");

const baseUrl = process.env.PSS_BASE_URL || "http://localhost:8000";

exports.Score = class {

constructor() {
this.pssScoreUrl = baseUrl + "/score";
}

/**
* request scoring to PSS subsystem
* @param {str} query plain english query that we want to use for scoring our entries
* @param {str[]} itemIds list of ids of the item we are requesting the scoring for
* @param {str} group type of items that we are requesting the scoring on
* @param {int} limit number of items we want returned
* @returns {object[]} Array of the scores
*/
async score(query, itemIds, group = "default", limit = -1) {

console.log(">>> Score.score: score requested");
console.log(" - query : ", query);
console.log(" - number of items : ", itemIds.length);
console.log(" - group : ", group);
console.log(" - limit : ", limit);

const res = await superagent
.post(this.pssScoreUrl)
.send({
query: query,
itemIds: itemIds,
group: group,
limit: limit
});

const jsonRes = JSON.parse(res.text);

const scores = Object.assign({}, ...jsonRes.scores.map((i) => ({ [i.itemId]: i.score })));

return scores;
}

};
12 changes: 10 additions & 2 deletions data/db.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@
"1": "{\"id\":1,\"datasetId\":\"20.500.12269/panosc-dataset1\",\"techniqueId\":\"20.500.12269/panosc-tech1\"}",
"2": "{\"id\":2,\"datasetId\":\"20.500.12269/panosc-dataset2\",\"techniqueId\":\"20.500.12269/panosc-tech1\"}",
"3": "{\"id\":3,\"datasetId\":\"20.500.12269/panosc-dataset3\",\"techniqueId\":\"20.500.12269/panosc-tech2\"}",
"4": "{\"id\":4,\"datasetId\":\"20.500.12269/panosc-dataset4\",\"techniqueId\":\"20.500.12269/panosc-tech2\"}"
"4": "{\"id\":4,\"datasetId\":\"20.500.12269/panosc-dataset4\",\"techniqueId\":\"20.500.12269/panosc-tech2\"}",
"5": "{\"id\":5,\"datasetId\":\"20.500.12269/panosc-dataset1\",\"techniqueId\":\"20.500.12269/panosc-tech3\"}",
"6": "{\"id\":6,\"datasetId\":\"20.500.12269/panosc-dataset2\",\"techniqueId\":\"20.500.12269/panosc-tech4\"}",
"7": "{\"id\":7,\"datasetId\":\"20.500.12269/panosc-dataset3\",\"techniqueId\":\"20.500.12269/panosc-tech5\"}",
"8": "{\"id\":8,\"datasetId\":\"20.500.12269/panosc-dataset4\",\"techniqueId\":\"20.500.12269/panosc-tech6\"}"
},
"Document": {
"10.5072/panosc-document1": "{\"pid\":\"10.5072/panosc-document1\",\"isPublic\":true,\"type\":\"publication\",\"title\":\"PaNOSC Test Publication\"}",
Expand Down Expand Up @@ -101,7 +105,11 @@
},
"Technique": {
"20.500.12269/panosc-tech1": "{\"pid\":\"20.500.12269/panosc-tech1\",\"name\":\"small-angle neutron scattering\", \"panetId\": \"http://purl.org/pan-science/PaNET/PaNET01189\"}",
"20.500.12269/panosc-tech2": "{\"pid\":\"20.500.12269/panosc-tech2\",\"name\":\"x-ray absorption\", \"panetId\": \"http://purl.org/pan-science/PaNET/PaNET01227\"}"
"20.500.12269/panosc-tech2": "{\"pid\":\"20.500.12269/panosc-tech2\",\"name\":\"x-ray absorption\", \"panetId\": \"http://purl.org/pan-science/PaNET/PaNET01227\"}",
"20.500.12269/panosc-tech3": "{\"pid\":\"20.500.12269/panosc-tech3\",\"name\":\"inelastic x-ray small angle scattering\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01281\"}",
"20.500.12269/panosc-tech4": "{\"pid\":\"20.500.12269/panosc-tech4\",\"name\":\"cold neutron spectroscopy\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01246\"}",
"20.500.12269/panosc-tech5": "{\"pid\":\"20.500.12269/panosc-tech5\",\"name\":\"thermal neutron scpectroscopy\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01247\"}",
"20.500.12269/panosc-tech6": "{\"pid\":\"20.500.12269/panosc-tech6\",\"name\":\"incoherent scattering\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01033\"}"
},
"User": {}
}
Expand Down
28 changes: 28 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
version: '3.9'

services:
pss-db:
image: bitnami/mongodb:latest
networks:
- panosc-search-api
pss-scoring:
image: nitrosx71/panosc-search-scoring:v1.0-beta-4
depends_on:
- pss-db
environment:
PSS_DEBUG: 1
PSS_MONGODB_URL: mongodb://pss-db:27017
PSS_DATABASE: panosc_test
PSS_DEPLOYMENT: "PaNOSC Search Api - Reference Implementation"
ports:
- 8000:8000
networks:
- panosc-search-api
techniques:
image: ghcr.io/expands-eu/pan-ontologies-api:latest
ports:
- 8001:3000

networks:
panosc-search-api:

1 change: 1 addition & 0 deletions test/dataset.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ describe('Dataset', () => {
if (err) throw err;

expect(res.body).to.be.an('array');
console.log(res.body)
expect(res.body.length).to.equal(2);
res.body.forEach((dataset) => {
expect(dataset).to.have.property('pid');
Expand Down
123 changes: 123 additions & 0 deletions test_locally.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/bin/bash
#
# Script to ingest the data/db.json file and populate the scoring service
# and test the reference implementation locally with scoring and techniques
#

DATA_FILE="data/db.json"

PSS_BASE_URL="http://localhost:8000"
DATASET_URL="${PSS_BASE_URL}/items"
COMPUTE_URL="${PSS_BASE_URL}/compute"
TERMS_URL="${PSS_BASE_URL}/terms"
WEIGHTS_URL="${PSS_BASE_URL}/weights"

PANET_BASE_URL="http://localhost:8001"

IFS=$'\n';


clear
echo "Starting docker containers..."
docker-compose up -d --remove-orphans

echo -e "\n\n"
echo "Waiting for services to be available..."
res=`curl -s -o /dev/null -w"%{http_code}" -X GET -I ${PSS_BASE_URL}`
echo "Http status code : ${res}"
until [ "-${res}-" == "-200-" ]; do
echo "Sleeping for 5 seconds"
sleep 5
res=`curl -s -o /dev/null -w"%{http_code}" -X GET -I ${PSS_BASE_URL}`
echo "Http status code : ${res}"
done
echo "Services ready"



echo -e "\n\n"
echo "Inserting Datasets score information..."
for dataset in `jq '.models.Dataset[]' ${DATA_FILE}`; do
echo "------------"
temp1=`echo ${dataset:1:${#string}-1} | sed 's#\\\"#\"#g'`
pid=`echo $temp1 | jq '.pid'`
pid=`echo ${pid:1:${#string}-1}`
echo "Pid ${pid}"
title=`echo $temp1 | jq '.title'`
title=`echo ${title:1:${#string}-1}`
echo "Title ${title}"
techniques=`echo $temp1 | jq '.techniques' | jq . -c`
echo "Techniques ${techniques}"


data='{"id":"'${pid}'", "group":"Dataset", "fields":{ "title":"'${title}'", "techniques":'${techniques}'} }'
echo "Command =>curl -X POST -i -L -H \"Content-Type: application/json\" -d '${data}' ${DATASET_URL}<="
curl \
-X POST \
-i -L \
-H "Content-Type: application/json" \
-d ${data}\
${DATASET_URL}
res=$?
echo -e "\n\n"
echo "Result =>${res}<="
done
echo "...Datasets score information inserted"


echo "Inserting Documents score information..."
for document in `jq '.models.Document[]' ${DATA_FILE}`; do
echo "------------"
temp1=`echo ${document:1:${#string}-1} | sed 's#\\\"#\"#g'`
pid=`echo $temp1 | jq '.pid'`
pid=`echo ${pid:1:${#string}-1}`
echo "Pid ${pid}"
title=`echo $temp1 | jq '.title'`
title=`echo ${title:1:${#string}-1}`
echo "Title ${title}"
type=`echo $temp1 | jq '.type'`
type=`echo ${type:1:${#string}-1}`
echo "Type ${type}"


data='{"id":"'${pid}'", "group":"Dataset", "fields":{ "title":"'${title}'", "type":"'${type}'"} }'
echo "Command =>curl -X POST -i -L -H \"Content-Type: application/json\" -d '${data}' ${DATASET_URL}<="
curl \
-X POST \
-i -L \
-H "Content-Type: application/json" \
-d ${data}\
${DATASET_URL}
res=$?
echo -e "\n\n"
echo "Result =>${res}<="
done
echo "...Document score information inserted"
echo -e "\n"

echo "Triggering weights computation..."
echo "Command =>curl -X POST -i -L ${COMPUTE_URL}<="
curl -X POST -i -L ${COMPUTE_URL}
echo -e "\n"

res=`curl -X GET -L ${COMPUTE_URL} 2>/dev/null | jq . | grep progressPercent | sed "s#[ ,]##g" | cut -d: -f2`
while [ "-${res}-" == "-1-" ]; do
echo "Computation still running... Sleeping for 5 seconds"
sleep 5
res=`curl -X GET -L ${COMPUTE_URL} 2>/dev/null | jq . | grep progressPercent | sed "s#[ ,]##g" | cut -d: -f2`
echo -e "\n"
done
echo "Completed weights computation..."
echo -e "\n"

number_of_terms=`curl -X GET -L ${TERMS_URL} 2>/dev/null | jq . | grep "term" | wc -l`
number_of_weights=`curl -X GET -L ${WEIGHTS_URL} 2>/dev/null | jq . | grep "_id" | wc -l`
echo "Number of terms extracted : " ${number_of_terms}
echo "number of weights computed : " ${number_of_weights}
echo -e "\n\n"

echo "Starting PaNOSC search API - reference implementation"
export PSS_ENABLE=1
export PSS_BASE_URL=${PSS_BASE_URL}
export PANET_BASE_URL=${PANET_BASE_URL}
npm start