panosc-eu · nitrosx · Mar 30, 2022 · Mar 30, 2022 · Mar 31, 2022 · Apr 1, 2022
diff --git a/README.md b/README.md
@@ -34,19 +34,28 @@
    npm start
    ```
 
-4. Optionally enable PaNET ontology fetching from an external URL (if this step is skipped no PaNET ontology is used)
+4. Optionally scoring can be enabled by setting to true the environmental variable PSS_ENABLED and providing the service url in PSS_BASE_URL
+
+   ```bash
+   export PSS_ENABLED=1
+   export PSS_BASE_URL=<the URL of the deployed PaNOSC scoring service>
+   ```
+
+   ([PaNOSC Search Score](https://github.com/panosc-eu/panosc-search-scoring))
+
+5. Optionally enable PaNET ontology fetching from an external URL (if this step is skipped no PaNET ontology is used)
 
    ```bash
    export PANET_BASE_URL=<the URL of the deployed pan-ontologies-api service>
    ```
+
    ([pan-ontologies-api source code and container](https://github.com/ExPaNDS-eu/pan-ontologies-api))
 
-5. Try out the API using the example queries, either through http://localhost:3000/explorer or Curl.
+6. Try out the API using the example queries, either through http://localhost:3000/explorer or Curl.
 
    - [Dataset Example Queries](./doc/dataset-example-queries.md)
    - [Document Example Queries](./doc/document-example-queries.md)
    - [Instrument Example Queries](./doc/instrument-example-queries.md)
-
 
 ## Acceptance and integration tests
 

diff --git a/common/mixins/panet.js b/common/mixins/panet.js
@@ -20,7 +20,7 @@ class Panet {
   async panet(techniqueLoopbackWhere) {
 
     console.log(">>> Panet.panet: panet requested");
-    console.log(" - where filter : ", techniqueLoopbackWhere);
+    console.log(" - original filter : ", techniqueLoopbackWhere);
 
     const res = await superagent
       .get(this.panetUrl)
@@ -31,6 +31,7 @@ class Panet {
       () => true,
       (obj) => (obj.panetId = obj.pid, delete obj.pid)
     );
+    console.log(" - expanded filter : ", resJSON);
     return resJSON
   }
 

diff --git a/common/mixins/score.js b/common/mixins/score.js
@@ -1,9 +1,54 @@
+
+const PSSService = require("../pss-service");
+const modelsWithScore = ["Dataset","Document"];
+
 module.exports = (Model, options) => {
+
+  const pssScoreService = new PSSService.Score();
+  const pssScoreEnabled = process.env.PSS_ENABLE || false;
+
   // Set score property
-  Model.afterRemote('find', (ctx, result, next) => {
-    ctx.result.forEach((instance) => {
-      instance.score = 0;
-    });
-    next();
+  Model.afterRemote('find', async (ctx, result, next) => {
+    // check if we received a query
+    console.log("Filter : " + JSON.stringify(ctx.args));
+    const query = (
+      ( Object.keys(ctx.args).includes('filter')
+        && typeof(ctx.args.filter) === 'object'
+        && Object.keys(ctx.args.filter).includes('query') )
+      ? ctx.args.filter.query
+      : null
+    );
+    console.log("Requested query : " + query);
+    // check if we are working with Datasets and Documents
+    const requestedModel = ctx.methodString.split('.')[0];
+    const operation = ctx.methodString.split('.')[1];
+    let modelWithScore=false
+    if (modelsWithScore.includes(requestedModel)) {
+      modelWithScore=true
+    }
+    else {
+      requestModel="Other"
+    }
+    //console.log(pssScoreEnabled);
+    //console.log(modelWithScore);
+    //console.log(operation);
+    // check scoring is enabled and we are working with Datasets and Documents
+    if (query && pssScoreEnabled && modelWithScore && operation === 'find') {
+      console.log("Requested query : " + query);
+      // we need to score the results
+      // extract the ids of the dataset returned by SciCat
+      const datasetsIds = ctx.result.map((i) => i.pid);
+      const scores = await pssScoreService.score(query, datasetsIds, requestedModel);
+      const assignScore = (instance) => {
+        instance.score = scores[instance.pid];
+      }
+      ctx.result.forEach(assignScore);
+    }
+    else {
+      ctx.result.forEach((instance) => {
+        instance.score = 0;
+      });
+    }
+    //next();
   });
 };
diff --git a/common/pss-service.js b/common/pss-service.js
@@ -0,0 +1,45 @@
+"use strict";
+
+const superagent = require("superagent");
+
+const baseUrl = process.env.PSS_BASE_URL || "http://localhost:8000";
+
+exports.Score = class {
+
+  constructor() {
+    this.pssScoreUrl = baseUrl + "/score";
+  }
+
+  /**
+   * request scoring to PSS subsystem
+   * @param {str} query plain english query that we want to use for scoring our entries
+   * @param {str[]} itemIds list of ids of the item we are requesting the scoring for
+   * @param {str} group type of items that we are requesting the scoring on
+   * @param {int} limit number of items we want returned
+   * @returns {object[]} Array of the scores
+   */
+  async score(query, itemIds, group = "default", limit = -1) {
+
+    console.log(">>> Score.score: score requested");
+    console.log(" - query : ", query);
+    console.log(" - number of items : ", itemIds.length);
+    console.log(" - group : ", group);
+    console.log(" - limit : ", limit);
+
+    const res = await superagent
+      .post(this.pssScoreUrl)
+      .send({
+        query: query,
+        itemIds: itemIds,
+        group: group,
+        limit: limit
+      });
+
+    const jsonRes = JSON.parse(res.text);
+
+    const scores = Object.assign({}, ...jsonRes.scores.map((i) => ({ [i.itemId]: i.score })));
+
+    return scores;
+  }
+
+};
diff --git a/data/db.json b/data/db.json
@@ -41,7 +41,11 @@
       "1": "{\"id\":1,\"datasetId\":\"20.500.12269/panosc-dataset1\",\"techniqueId\":\"20.500.12269/panosc-tech1\"}",
       "2": "{\"id\":2,\"datasetId\":\"20.500.12269/panosc-dataset2\",\"techniqueId\":\"20.500.12269/panosc-tech1\"}",
       "3": "{\"id\":3,\"datasetId\":\"20.500.12269/panosc-dataset3\",\"techniqueId\":\"20.500.12269/panosc-tech2\"}",
-      "4": "{\"id\":4,\"datasetId\":\"20.500.12269/panosc-dataset4\",\"techniqueId\":\"20.500.12269/panosc-tech2\"}"
+      "4": "{\"id\":4,\"datasetId\":\"20.500.12269/panosc-dataset4\",\"techniqueId\":\"20.500.12269/panosc-tech2\"}",
+      "5": "{\"id\":5,\"datasetId\":\"20.500.12269/panosc-dataset1\",\"techniqueId\":\"20.500.12269/panosc-tech3\"}",
+      "6": "{\"id\":6,\"datasetId\":\"20.500.12269/panosc-dataset2\",\"techniqueId\":\"20.500.12269/panosc-tech4\"}",
+      "7": "{\"id\":7,\"datasetId\":\"20.500.12269/panosc-dataset3\",\"techniqueId\":\"20.500.12269/panosc-tech5\"}",
+      "8": "{\"id\":8,\"datasetId\":\"20.500.12269/panosc-dataset4\",\"techniqueId\":\"20.500.12269/panosc-tech6\"}"
     },
     "Document": {
       "10.5072/panosc-document1": "{\"pid\":\"10.5072/panosc-document1\",\"isPublic\":true,\"type\":\"publication\",\"title\":\"PaNOSC Test Publication\"}",
@@ -101,7 +105,11 @@
     },
     "Technique": {
       "20.500.12269/panosc-tech1": "{\"pid\":\"20.500.12269/panosc-tech1\",\"name\":\"small-angle neutron scattering\", \"panetId\": \"http://purl.org/pan-science/PaNET/PaNET01189\"}",
-      "20.500.12269/panosc-tech2": "{\"pid\":\"20.500.12269/panosc-tech2\",\"name\":\"x-ray absorption\", \"panetId\": \"http://purl.org/pan-science/PaNET/PaNET01227\"}"
+      "20.500.12269/panosc-tech2": "{\"pid\":\"20.500.12269/panosc-tech2\",\"name\":\"x-ray absorption\", \"panetId\": \"http://purl.org/pan-science/PaNET/PaNET01227\"}",
+      "20.500.12269/panosc-tech3": "{\"pid\":\"20.500.12269/panosc-tech3\",\"name\":\"inelastic x-ray small angle scattering\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01281\"}",
+      "20.500.12269/panosc-tech4": "{\"pid\":\"20.500.12269/panosc-tech4\",\"name\":\"cold neutron spectroscopy\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01246\"}",
+      "20.500.12269/panosc-tech5": "{\"pid\":\"20.500.12269/panosc-tech5\",\"name\":\"thermal neutron scpectroscopy\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01247\"}",
+      "20.500.12269/panosc-tech6": "{\"pid\":\"20.500.12269/panosc-tech6\",\"name\":\"incoherent scattering\",\"panetId\":\"http://purl.org/pan-science/PaNET/PaNET01033\"}"
     },
     "User": {}
   }

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,28 @@
+version: '3.9'
+
+services:
+  pss-db:
+    image: bitnami/mongodb:latest
+    networks:
+      - panosc-search-api
+  pss-scoring:
+    image: nitrosx71/panosc-search-scoring:v1.0-beta-4
+    depends_on:
+      - pss-db
+    environment:
+      PSS_DEBUG: 1
+      PSS_MONGODB_URL: mongodb://pss-db:27017
+      PSS_DATABASE: panosc_test
+      PSS_DEPLOYMENT: "PaNOSC Search Api - Reference Implementation"
+    ports:
+      - 8000:8000
+    networks:
+      - panosc-search-api
+  techniques:
+    image: ghcr.io/expands-eu/pan-ontologies-api:latest
+    ports:
+      - 8001:3000
+
+networks:
+  panosc-search-api:
+
diff --git a/test/dataset.test.js b/test/dataset.test.js
@@ -67,6 +67,7 @@ describe('Dataset', () => {
             if (err) throw err;
 
             expect(res.body).to.be.an('array');
+            console.log(res.body)
             expect(res.body.length).to.equal(2);
             res.body.forEach((dataset) => {
               expect(dataset).to.have.property('pid');

diff --git a/test_locally.bash b/test_locally.bash
@@ -0,0 +1,123 @@
+#!/bin/bash
+#
+# Script to ingest the data/db.json file and populate the scoring service
+# and test the reference implementation locally with scoring and techniques
+#
+
+DATA_FILE="data/db.json"
+
+PSS_BASE_URL="http://localhost:8000"
+DATASET_URL="${PSS_BASE_URL}/items"
+COMPUTE_URL="${PSS_BASE_URL}/compute"
+TERMS_URL="${PSS_BASE_URL}/terms"
+WEIGHTS_URL="${PSS_BASE_URL}/weights"
+
+PANET_BASE_URL="http://localhost:8001"
+
+IFS=$'\n';
+
+
+clear
+echo "Starting docker containers..."
+docker-compose up -d --remove-orphans
+
+echo -e "\n\n"
+echo "Waiting for services to be available..."
+res=`curl -s -o /dev/null -w"%{http_code}" -X GET -I ${PSS_BASE_URL}`
+echo "Http status code : ${res}"
+until [ "-${res}-" == "-200-" ]; do
+  echo "Sleeping for 5 seconds"
+  sleep 5
+  res=`curl -s -o /dev/null -w"%{http_code}" -X GET -I ${PSS_BASE_URL}`
+  echo "Http status code : ${res}"
+done
+echo "Services ready"
+
+
+
+echo -e "\n\n"
+echo "Inserting Datasets score information..."
+for dataset in `jq '.models.Dataset[]' ${DATA_FILE}`; do
+  echo "------------"
+  temp1=`echo ${dataset:1:${#string}-1} | sed 's#\\\"#\"#g'`
+  pid=`echo $temp1 | jq '.pid'`
+  pid=`echo ${pid:1:${#string}-1}`
+  echo "Pid ${pid}"
+  title=`echo $temp1 | jq '.title'`
+  title=`echo ${title:1:${#string}-1}`
+  echo "Title ${title}"
+  techniques=`echo $temp1 | jq '.techniques' | jq . -c`
+  echo "Techniques ${techniques}"
+
+
+  data='{"id":"'${pid}'", "group":"Dataset", "fields":{ "title":"'${title}'", "techniques":'${techniques}'} }'
+  echo "Command =>curl -X POST -i -L -H \"Content-Type: application/json\" -d '${data}' ${DATASET_URL}<="
+  curl \
+    -X POST \
+    -i -L \
+    -H "Content-Type: application/json" \
+    -d ${data}\
+    ${DATASET_URL}
+  res=$?
+  echo -e "\n\n"
+  echo "Result =>${res}<="
+done
+echo "...Datasets score information inserted"
+
+
+echo "Inserting Documents score information..."
+for document in `jq '.models.Document[]' ${DATA_FILE}`; do
+  echo "------------"
+  temp1=`echo ${document:1:${#string}-1} | sed 's#\\\"#\"#g'`
+  pid=`echo $temp1 | jq '.pid'`
+  pid=`echo ${pid:1:${#string}-1}`
+  echo "Pid ${pid}"
+  title=`echo $temp1 | jq '.title'`
+  title=`echo ${title:1:${#string}-1}`
+  echo "Title ${title}"
+  type=`echo $temp1 | jq '.type'`
+  type=`echo ${type:1:${#string}-1}`
+  echo "Type ${type}"
+
+
+  data='{"id":"'${pid}'", "group":"Dataset", "fields":{ "title":"'${title}'", "type":"'${type}'"} }'
+  echo "Command =>curl -X POST -i -L -H \"Content-Type: application/json\" -d '${data}' ${DATASET_URL}<="
+  curl \
+    -X POST \
+    -i -L \
+    -H "Content-Type: application/json" \
+    -d ${data}\
+    ${DATASET_URL}
+  res=$?
+  echo -e "\n\n"
+  echo "Result =>${res}<="
+done
+echo "...Document score information inserted"
+echo -e "\n"
+
+echo "Triggering weights computation..."
+echo "Command =>curl -X POST -i -L ${COMPUTE_URL}<="
+curl -X POST -i -L ${COMPUTE_URL}
+echo -e "\n"
+
+res=`curl -X GET -L ${COMPUTE_URL} 2>/dev/null | jq . | grep progressPercent | sed "s#[ ,]##g" | cut -d: -f2`
+while [ "-${res}-" == "-1-" ]; do
+  echo "Computation still running... Sleeping for 5 seconds"
+  sleep 5
+  res=`curl -X GET -L ${COMPUTE_URL} 2>/dev/null | jq . | grep progressPercent | sed "s#[ ,]##g" | cut -d: -f2`
+  echo -e "\n"
+done
+echo "Completed weights computation..."
+echo -e "\n"
+
+number_of_terms=`curl -X GET -L ${TERMS_URL} 2>/dev/null | jq . | grep "term" | wc -l`
+number_of_weights=`curl -X GET -L ${WEIGHTS_URL} 2>/dev/null | jq . | grep "_id" | wc -l`
+echo "Number of terms extracted : " ${number_of_terms}
+echo "number of weights computed : " ${number_of_weights}
+echo -e "\n\n"
+
+echo "Starting PaNOSC search API - reference implementation"
+export PSS_ENABLE=1
+export PSS_BASE_URL=${PSS_BASE_URL}
+export PANET_BASE_URL=${PANET_BASE_URL}
+npm start