Skip to content

Commit

Permalink
adds script to ingest example data into Elasticsearch
Browse files Browse the repository at this point in the history
  • Loading branch information
walterra committed Jan 6, 2023
1 parent 6bd55a5 commit 637f72d
Show file tree
Hide file tree
Showing 3 changed files with 205 additions and 0 deletions.
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
],
"scripts": {
"build": "yarn plugin-helpers build",
"ingest-sample-data": "node scripts/ingest_sample_data",
"plugin-helpers": "node ../../scripts/plugin_helpers",
"kbn": "node ../../scripts/kbn",
"test": "yarn jest"
Expand All @@ -36,6 +37,7 @@
"minimist": "^1.2.6"
},
"devDependencies": {
"@elastic/elasticsearch": "^7.17.0",
"@types/d3-collection": "^1.0.7",
"@types/jest": "^26.0.14",
"jest": "^26.6.3",
Expand Down
171 changes: 171 additions & 0 deletions scripts/ingest_sample_data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
const { Client } = require('@elastic/elasticsearch');
const client = new Client({ node: 'http://localhost:9200' });
const dataCovid19 = require('../node_modules/d3-milestones/src/stories/assets/covid19.json');
const dataLotr = require('../node_modules/d3-milestones/src/stories/assets/lotr.json');
const dataMilestonesEvents = require('../node_modules/d3-milestones/src/stories/assets/milestones-events.json');
const dataMilestones = require('../node_modules/d3-milestones/src/stories/assets/milestones.json');
const dataOsCategoryLabels = require('../node_modules/d3-milestones/src/stories/assets/os-category-labels.json');
const dataUltimaSeries = require('../node_modules/d3-milestones/src/stories/assets/ultima-series.json');
const dataVikings = require('../node_modules/d3-milestones/src/stories/assets/vikings.json');

const datasets = [
{
index: 'kmv-covid-19',
mappings: {
properties: {
date: { type: 'date' },
title: { type: 'text' },
},
},
data: dataCovid19,
},
{
index: 'kmv-lotr',
mappings: {
properties: {
timestamp: { type: 'date', format: 'dd.MM.yyyy||strict_date_optional_time ||epoch_millis' },
character: { type: 'keyword' },
text: { type: 'text' },
},
},
data: dataLotr,
},
{
index: 'kmv-milestones',
mappings: {
properties: {
timestamp: {
type: 'date',
format: `yyyy.MM.dd'T'HH:mm||strict_date_optional_time ||epoch_millis`,
},
detail: { type: 'keyword' },
giturl: { type: 'keyword' },
},
},
data: dataMilestones,
},
{
index: 'kmv-milestones-events',
mappings: {
properties: {
timestamp: {
type: 'date',
format: `yyyy.MM.dd'T'HH:mm||strict_date_optional_time ||epoch_millis`,
},
detail: { type: 'keyword' },
},
},
data: dataMilestonesEvents,
},
{
index: 'kmv-os-category-labels',
mappings: {
properties: {
year: {
type: 'date',
format: 'yyyy||strict_date_optional_time ||epoch_millis',
},
title: { type: 'keyword' },
system: { type: 'keyword' },
},
},
data: dataOsCategoryLabels,
transform: (data) => {
return data.reduce((p, c) => {
c.versions.forEach((version) => {
p.push({
...version,
system: c.system,
});
});
return p;
}, []);
},
},
{
index: 'kmv-ultima-series',
mappings: {
properties: {
year: {
type: 'date',
format: 'yyyy||strict_date_optional_time ||epoch_millis',
},
cover: { type: 'keyword' },
title: { type: 'keyword' },
},
},
data: dataUltimaSeries,
},
{
index: 'kmv-vikings',
mappings: {
properties: {
year: {
type: 'date',
format: 'yyyy||strict_date_optional_time ||epoch_millis',
},
title: { type: 'text' },
},
},
data: dataVikings,
transform: (data) => {
return data.map((d) => ({
...d,
year: `${d.year}`.padStart(4, '0'),
}));
},
},
];

async function run({ index, mappings, data, transform }) {
const indexExists = await client.indices.exists({ index });

if (indexExists.body === true) {
await client.indices.delete({ index });
}

await client.indices.create(
{
index,
body: {
mappings,
},
},
{ ignore: [400] }
);

const transformedData = transform !== undefined ? transform(data) : data;

const body = transformedData.flatMap((doc) => [{ index: { _index: index } }, doc]);

const { body: bulkResponse } = await client.bulk({ refresh: true, body });

if (bulkResponse.errors) {
const erroredDocuments = [];
// The items array has the same order of the dataset we just indexed.
// The presence of the `error` key indicates that the operation
// that we did for the document has failed.
bulkResponse.items.forEach((action, i) => {
const operation = Object.keys(action)[0];
if (action[operation].error) {
erroredDocuments.push({
// If the status is 429 it means that you can retry the document,
// otherwise it's very likely a mapping error, and you should
// fix the document before to try it again.
status: action[operation].status,
error: action[operation].error,
operation: body[i * 2],
document: body[i * 2 + 1],
});
}
});
console.log(erroredDocuments);
}

const { body: count } = await client.count({ index });
console.log(count);
}

datasets.forEach((dataset) => {
run(dataset).catch(console.log);
});
32 changes: 32 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,16 @@
exec-sh "^0.3.2"
minimist "^1.2.0"

"@elastic/elasticsearch@^7.17.0":
version "7.17.0"
resolved "https://registry.yarnpkg.com/@elastic/elasticsearch/-/elasticsearch-7.17.0.tgz#589fb219234cf1b0da23744e82b1d25e2fe9a797"
integrity sha512-5QLPCjd0uLmLj1lSuKSThjNpq39f6NmlTy9ROLFwG5gjyTgpwSqufDeYG/Fm43Xs05uF7WcscoO7eguI3HuuYA==
dependencies:
debug "^4.3.1"
hpagent "^0.1.1"
ms "^2.1.3"
secure-json-parse "^2.4.0"

"@istanbuljs/load-nyc-config@^1.0.0":
version "1.1.0"
resolved "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz"
Expand Down Expand Up @@ -1155,6 +1165,13 @@ debug@^2.2.0, debug@^2.3.3:
dependencies:
ms "2.0.0"

debug@^4.3.1:
version "4.3.4"
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865"
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
dependencies:
ms "2.1.2"

decamelize@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/decamelize/-/decamelize-1.2.0.tgz#f6534d15148269b20352e7bee26f501f9a191290"
Expand Down Expand Up @@ -1579,6 +1596,11 @@ hosted-git-info@^2.1.4:
resolved "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz"
integrity sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==

hpagent@^0.1.1:
version "0.1.1"
resolved "https://registry.yarnpkg.com/hpagent/-/hpagent-0.1.1.tgz#66f67f16e5c7a8b59a068e40c2658c2c749ad5e2"
integrity sha512-IxJWQiY0vmEjetHdoE9HZjD4Cx+mYTr25tR7JCxXaiI3QxW0YqYyM11KyZbHufoa/piWhMb2+D3FGpMgmA2cFQ==

html-encoding-sniffer@^2.0.1:
version "2.0.1"
resolved "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-2.0.1.tgz"
Expand Down Expand Up @@ -2495,6 +2517,11 @@ [email protected]:
resolved "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz"
integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==

ms@^2.1.3:
version "2.1.3"
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2"
integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==

nanomatch@^1.2.9:
version "1.2.13"
resolved "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz"
Expand Down Expand Up @@ -2968,6 +2995,11 @@ scheduler@^0.19.1:
loose-envify "^1.1.0"
object-assign "^4.1.1"

secure-json-parse@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/secure-json-parse/-/secure-json-parse-2.4.0.tgz#5aaeaaef85c7a417f76271a4f5b0cc3315ddca85"
integrity sha512-Q5Z/97nbON5t/L/sH6mY2EacfjVGwrCcSi5D3btRO2GZ8pf1K1UN7Z9H5J57hjVU2Qzxr1xO+FmBhOvEkzCMmg==

"semver@2 || 3 || 4 || 5", semver@^5.5.0:
version "5.7.1"
resolved "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz"
Expand Down

0 comments on commit 637f72d

Please sign in to comment.