Skip to content

Commit

Permalink
Added markdown news support
Browse files Browse the repository at this point in the history
  • Loading branch information
jfkonecn committed Feb 18, 2025
1 parent 56c09f4 commit 2580677
Show file tree
Hide file tree
Showing 9 changed files with 723 additions and 63 deletions.
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
"@types/classnames": "^2.2.9",
"@types/enzyme": "3.10.5",
"@types/jest": "25.2.1",
"@types/markdown-it": "^14.1.2",
"@types/node": "13.13.4",
"@types/pluralize": "0.0.29",
"@types/react": "16.9.34",
Expand Down Expand Up @@ -143,6 +144,7 @@
"lint-staged": "8.2.1",
"mini-css-extract-plugin": "0.9.0",
"moment-locales-webpack-plugin": "1.2.0",
"markdown-it": "^14.1.0",
"node-sass": "^4.12.0",
"optimize-css-assets-webpack-plugin": "5.0.3",
"postcss-loader": "3.0.0",
Expand Down Expand Up @@ -190,6 +192,7 @@
"updateAPI": "yarn run fetchAPI && yarn run buildAPI",
"fetchAPI": "curl http://localhost:9095/v2/api-docs | json | grep -v basePath | grep -v termsOfService | grep -v host > src/main/webapp/app/shared/api/generated/API-docs.json",
"buildAPI": "node scripts/generate-api.js src/main/webapp/app/shared/api/generated API",
"buildNewPages": "node scripts/generate-news-sections.js -f src/main/webapp/app/pages/newsPage/markdown -o src/main/webapp/app/pages/newsPage/code-generated && yarn run prettier --write \"{,src/main/webapp/app/pages/newsPage/code-generated/**/}*.{md,json,ts,tsx,css,scss,yml}\"",
"updateOncoKbAPI": "yarn run fetchOncoKbAPI && yarn run buildOncoKbAPI",
"fetchOncoKbAPI": "curl http://localhost:8080/oncokb-public/api/v1/v2/api-docs?group=Private%20APIs | json | grep -v basePath | grep -v termsOfService | grep -v host > src/main/webapp/app/shared/api/generated/OncoKbAPI-docs.json && curl http://localhost:8080/oncokb-public/api/private/v2/api-docs | json | grep -v basePath | grep -v termsOfService | grep -v host > src/main/webapp/app/shared/api/generated/OncoKbPrivateAPI-docs.json",
"buildOncoKbAPI": "node scripts/generate-api.js src/main/webapp/app/shared/api/generated OncoKbAPI OncoKbPrivateAPI",
Expand Down
359 changes: 359 additions & 0 deletions scripts/generate-news-sections.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,359 @@
const fs = require('fs');
const path = require('path');
const MarkdownIt = require('markdown-it');

const oncokbBaseUrls = [
'https://oncokb.org',
'http://oncokb.org',
'https://www.oncokb.org',
'http://www.oncokb.org',
];

const objectPropertyMark = '-------------------';
const objectPropertyMarkRegex = new RegExp(
`"${objectPropertyMark}(.*)${objectPropertyMark}"`,
'gm'
);

/**
* Escapes special characters in a string to be used in a regular expression.
*
* This ensures that characters such as `.*+?^${}()|[]\` are treated as literals.
*
* @param {string} string - The input string to escape.
* @returns {string} - The escaped string safe for use in a regex.
*
* @example
* const escaped = escapeRegExp("Hello (world)!");
* console.log(escaped); // "Hello \\(world\\)!"
*/
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
}

/**
* Replaces all occurrences of a substring in a given string.
*
* Since `String.prototype.replaceAll()` is not available in older Node.js versions,
* this function uses `RegExp` with the global flag.
*
* @param {string} str - The original string.
* @param {string} match - The substring to be replaced.
* @param {string} replacement - The replacement string.
* @returns {string} - The modified string with all occurrences replaced.
*
* @example
* const result = replaceAll("Hello world, world!", "world", "Earth");
* console.log(result); // "Hello Earth, Earth!"
*/
function replaceAll(str, match, replacement) {
return str.replace(new RegExp(escapeRegExp(match), 'g'), () => replacement);
}

/**
* Decodes specific HTML entities in a given string.
*
* This function currently only replaces `"` with `"`.
* Extend this function if more entities need to be decoded.
*
* @param {string} text - The input string containing HTML entities.
* @returns {string} - The decoded string.
*
* @example
* const decoded = decodeHtmlEntities("This is "quoted" text.");
* console.log(decoded); // 'This is "quoted" text.'
*/
function decodeHtmlEntities(text) {
return replaceAll(text, '"', '"');
}

/**
* Fixes HTML-escaped entities in a specific pattern within a string.
*
* This function finds occurrences matching `objectPropertyMarkRegex`,
* extracts the inner content, decodes HTML entities, and wraps it in `{}`.
*
* @param {string} htmlString - The input HTML string to process.
* @returns {string} - The fixed string with decoded entities.
*
* @example
* const fixed = fixHtmlString('"----["Example"]----"');
* console.log(fixed); // '{["Example]"}'
*/
function fixHtmlString(htmlString) {
return htmlString.replace(objectPropertyMarkRegex, (_, group) => {
return `{${decodeHtmlEntities(group)}}`;
});
}

/**
* @param {import('markdown-it')} md - The markdown-it instance used for parsing.
* @param {import('markdown-it').StateCore} state - The state object containing Markdown parsing tokens.
* @returns {boolean | undefined} - The modified string with all occurrences replaced.
*
* @throws {Error} If a mutation is found in a row without an associated gene.
*/
function newlyAddedGenes(md, state) {
let foundNewlyAddedGenes = false;
let index = 0;
let toRemoveStart = -1;
let toRemoveEnd = -1;
const genes = [];
for (const token of state.tokens) {
if (token.content.toLowerCase().includes('new gene')) {
foundNewlyAddedGenes = true;
toRemoveStart = index;
} else if (foundNewlyAddedGenes && token.type === 'bullet_list_close') {
toRemoveEnd = index;
break;
} else if (foundNewlyAddedGenes && token.type === 'inline') {
genes.push(token.content);
}
index++;
}
if (toRemoveStart < 0 && toRemoveEnd < 0) {
return true;
} else if (toRemoveStart < 0 || toRemoveEnd < 0) {
throw new Error(
`Found one remove gene index, but not the other (${toRemoveStart}, ${toRemoveEnd})`
);
}

const alterationPageLinkTags = createMarkdownToken(
md,
'NewlyAddedGenesListItem'
);
alterationPageLinkTags[0].attrSet(
'genes',
`${objectPropertyMark}${JSON.stringify(genes)}${objectPropertyMark}`
);
alterationPageLinkTags[1].tag = 'NewlyAddedGenesListItem';

state.tokens = state.tokens.filter(
(_, idx) => idx < toRemoveStart || idx > toRemoveEnd
);
state.tokens.splice(toRemoveStart, 0, ...alterationPageLinkTags);

return true;
}

/**
* @param {import('markdown-it').StateCore} state - The state object containing Markdown parsing tokens.
* @returns {boolean | undefined} - The modified string with all occurrences replaced.
*
* @throws {Error} If a mutation is found in a row without an associated gene.
*/
function fixLinks(state) {
for (const token of state.tokens) {
if (token.type === 'inline') {
let foundLocalLink = false;
for (const child of token.children) {
if (child.type === 'link_close' && foundLocalLink) {
foundLocalLink = false;
child.tag = 'Link';
continue;
} else if (child.type !== 'link_open') {
continue;
}
const hrefIndex = child.attrIndex('href');
if (hrefIndex < 0) {
continue;
}
const currentUrl = child.attrs[hrefIndex][1];
let replaceUrlString = '';
for (const url of oncokbBaseUrls) {
if (currentUrl.startsWith(url)) {
replaceUrlString = url;
foundLocalLink = true;
break;
}
}
if (foundLocalLink) {
child.tag = 'Link';
const attr = child.attrs[hrefIndex];
attr[0] = 'to';
attr[1] = currentUrl.replace(replaceUrlString, '');
}
}
}
}
}

/**
* @param {import('markdown-it')} md - The markdown-it instance used for token processing.
* @param {import('markdown-it').StateCore} state - The state object containing Markdown parsing tokens.
* @returns {boolean | undefined} - The modified string with all occurrences replaced.
*
* @throws {Error} If a mutation is found in a row without an associated gene.
*/
function addAutoTableLinks(md, state) {
let inTh = false;
let inTd = false;
let columnIdx = 0;
let geneIdx = -1;
let mutationIdx = -1;
let currentGene = '';
for (const token of state.tokens) {
if (token.type === 'th_open') {
inTh = true;
} else if (token.type === 'th_close') {
inTh = false;
columnIdx++;
} else if (token.type === 'td_open') {
inTd = true;
} else if (token.type === 'td_close') {
inTd = false;
columnIdx++;
} else if (token.type === 'tr_open') {
columnIdx = 0;
currentGene = '';
} else if (inTd && columnIdx === geneIdx && token.type === 'inline') {
const child = token.children[0];
currentGene = child.content;
child.content = `{getAlternativeGenePageLinks('${child.content}')}`;
} else if (inTd && columnIdx === mutationIdx && token.type === 'inline') {
const child = token.children[0];
if (currentGene === '') {
throw new Error(`No gene for this row and mutation "${child.content}"`);
}
const alterationPageLinkTags = createMarkdownToken(
md,
'AlterationPageLink'
);
alterationPageLinkTags[0].attrSet('hugoSymbol', currentGene);
alterationPageLinkTags[0].attrSet('alteration', child.content);
token.children = alterationPageLinkTags;
} else if (inTh && token.content === 'Gene') {
geneIdx = columnIdx;
} else if (inTh && token.content === 'Mutation') {
mutationIdx = columnIdx;
}
}
}

/**
* Creates a Markdown token with a specified tag.
*
* @param {import('markdown-it')} md - The markdown-it instance used for parsing.
* @param {string} tag - The tag to set on the first non-text token.
* @returns {import('markdown-it').Token[]} - An array of markdown-it token objects with the modified tag.
*
* @example
* const md = require('markdown-it')();
* const tokens = createMarkdownToken(md, 'custom-tag');
* console.log(tokens);
*/
function createMarkdownToken(md, tag) {
// can't figure out how to make a token on my own so I'm forcing
// markdown-it to make me one
const alterationPageLinkTags = md
.parse('[placeholder](placeholder)', {})[1]
.children.filter(x => x.type !== 'text');
alterationPageLinkTags[0].type = 'para';
alterationPageLinkTags[0].tag = tag;
alterationPageLinkTags[0].attrs = [];
alterationPageLinkTags[1].tag = tag;
return alterationPageLinkTags;
}

const md = new MarkdownIt({
html: true,
linkify: true,
breaks: true,
}).use(md => {
// https://markdown-it.github.io/markdown-it
// https://github.com/markdown-it/markdown-it/blob/master/docs/examples/renderer_rules.md
md.renderer.rules.table_open = function () {
return '<div className="table-responsive">\n<table className="table">';
};

md.renderer.rules.table_close = function () {
return '</table>\n</div>';
};

md.core.ruler.push('add-auto-table-links', state => {
addAutoTableLinks(md, state);
});

md.core.ruler.push('fix-links', state => fixLinks(state));
md.core.ruler.push('fix-styles', state => {
for (const token of state.tokens) {
if (token.attrs != null && token.attrs.length > 0) {
token.attrs = token.attrs.filter(([name]) => name !== 'style');
}
if (token.type === 'table_open') {
token.attrSet('className', 'table');
}
}
return true;
});
md.core.ruler.push('newly-added-genes', state => {
newlyAddedGenes(md, state);
});
});

const args = process.argv.slice(2);
let inputFolder = null;
let outputFolder = null;

for (let i = 0; i < args.length; i++) {
if (args[i] === '-f' && args[i + 1]) {
inputFolder = path.resolve(args[i + 1]);
} else if (args[i] === '-o' && args[i + 1]) {
outputFolder = path.resolve(args[i + 1]);
}
}

if (!inputFolder || !outputFolder) {
console.error(
'Error: Both -f (input folder) and -o (output folder) arguments are required.'
);
process.exit(1);
}

// Ensure input folder exists
if (!fs.existsSync(inputFolder) || !fs.statSync(inputFolder).isDirectory()) {
console.error(
`Error: Input folder "${inputFolder}" does not exist or is not a directory.`
);
process.exit(1);
}

// Ensure output folder exists, or create it
if (!fs.existsSync(outputFolder)) {
fs.mkdirSync(outputFolder, { recursive: true });
}

// Read all Markdown files in the input folder
const files = fs.readdirSync(inputFolder).filter(file => file.endsWith('.md'));

if (files.length === 0) {
console.warn(`Warning: No markdown files found in "${inputFolder}".`);
}

files.forEach(file => {
const filePath = path.join(inputFolder, file);
const content = fs.readFileSync(filePath, 'utf-8');
const htmlContent = fixHtmlString(md.render(content));

const componentName = path
.basename(file, '.md')
.replace(/[^a-zA-Z0-9]/g, '_');

const tsxContent = `import React from 'react';
import { Link } from 'react-router-dom';
import { AlterationPageLink, getAlternativeGenePageLinks } from 'app/shared/utils/UrlUtils';
import { NewlyAddedGenesListItem } from 'app/pages/newsPage/NewlyAddedGenesListItem';
export default function ${componentName}() {
return (
<>
${htmlContent}
</>
);
}`;

const outputFilePath = path.join(outputFolder, `${componentName}.tsx`);
fs.writeFileSync(outputFilePath, tsxContent, 'utf-8');
console.log(`Generated: ${outputFilePath}`);
});
1 change: 1 addition & 0 deletions src/main/webapp/app/config/constants.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,7 @@ export type DataRelease = {
};

export const DATA_RELEASES: DataRelease[] = [
{ date: '02302025', version: 'v4.26' },
{ date: '01302025', version: 'v4.25' },
{ date: '12192024', version: 'v4.24' },
{ date: '11262024', version: 'v4.23' },
Expand Down
Loading

0 comments on commit 2580677

Please sign in to comment.