diff --git a/.github/workflows/update-json-assets.yaml b/.github/workflows/update-json-assets.yaml new file mode 100644 index 0000000..e465515 --- /dev/null +++ b/.github/workflows/update-json-assets.yaml @@ -0,0 +1,86 @@ +name: update json assets +on: + push: + paths: + - .github/workflows/update-json-assets.yaml + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + + schedule: + # runs on the first day of the month at 04:05AM (UTC) + - cron: "5 4 1 * *" + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +concurrency: + group: "updater" + cancel-in-progress: false + +jobs: + update-assets: + runs-on: ubuntu-latest + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version: latest + cache: yarn + - name: Install + run: yarn install + - name: Build + run: yarn run build + - name: run all downloaders + run: | + ls -lisha out/downloader/ + for i in out/downloader/*.js; do + # skipping the syntax downloader + if [[ $i != *"nginx_syntax"* ]]; then + echo "executing 'node $i' ..." + node "$i" || true + fi + done + + git add assets/**/*.json + git status + + - name: creating the pull request body + id: pr-body + run: | + json_path="assets/**/*.json" + + # capturing git diff stats and removing whitespace + diff_shortstat=$(git diff --staged --shortstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') + diff_numstat=$(git diff --staged --numstat "$json_path" | sed -re 's/^[[:blank:]]+|[[:blank:]]+$//g' -e 's/[[:blank:]]+/ /g') + + pr_body="This pull request has been generated **automatically** to **update** the following files:\n\n\`\`\`bash\n$diff_shortstat\n\n$diff_numstat\n\`\`\`\n" + + echo -e "\n----------------------\nThe pull request body:\n----------------------\n" + echo -e "$pr_body" + + echo -e "$pr_body" >> $GITHUB_STEP_SUMMARY + + echo 'pr_body<> $GITHUB_OUTPUT + echo -e "$pr_body" >> $GITHUB_OUTPUT + echo 'EOF' >> $GITHUB_OUTPUT + - uses: actions/upload-artifact@v4 + with: + name: json-assets + path: | + assets/**/*.json + retention-days: 3 + # - name: Create Pull Request + # id: cpr + # uses: peter-evans/create-pull-request@v6 + # with: + # token: ${{ secrets.GITHUB_TOKEN }} + # add-paths: | + # assets/**/*.json + # commit-message: "chore(assets): update json assets" + # branch: update-json-assets + # delete-branch: true + # base: main + # title: "Update JSON files in assets/" + # body: ${{ steps.pr-body.outputs.pr_body }} + # labels: dependencies + # draft: false diff --git a/package.json b/package.json index ccfc5be..a443bd8 100644 --- a/package.json +++ b/package.json @@ -72,6 +72,7 @@ "rimraf": "^5", "swc-loader": "^0.2.6", "turndown": "^7.1.2", + "turndown-plugin-gfm": "^1.0.2", "typescript": "^5", "webpack": "^5", "webpack-cli": "^5" diff --git a/src/downloader/config_url.ts b/src/downloader/config_url.ts index 484112d..9d096b7 100644 --- a/src/downloader/config_url.ts +++ b/src/downloader/config_url.ts @@ -28,6 +28,7 @@ export const httpHeadersWikiURLs = { de: 'https://de.wikipedia.org/wiki/Liste_der_HTTP-Headerfelder', en: 'https://en.wikipedia.org/wiki/List_of_HTTP_header_fields', es: 'https://es.wikipedia.org/wiki/Anexo:Cabeceras_HTTP', + pt: 'https://pt.wikipedia.org/wiki/Lista_de_campos_de_cabe%C3%A7alho_HTTP', 'zh-Hans': 'https://zh.wikipedia.org/zh-cn/HTTP%E5%A4%B4%E5%AD%97%E6%AE%B5', 'zh-Hant-HK': 'https://zh.wikipedia.org/zh-hk/HTTP%E5%A4%B4%E5%AD%97%E6%AE%B5', 'zh-Hant-TW': 'https://zh.wikipedia.org/zh-tw/HTTP%E5%A4%B4%E5%AD%97%E6%AE%B5', diff --git a/src/downloader/http_headers.ts b/src/downloader/http_headers.ts index 66297d5..cbf687e 100644 --- a/src/downloader/http_headers.ts +++ b/src/downloader/http_headers.ts @@ -249,7 +249,7 @@ async function main() { const output = new JsonFileWriter(manifestFiles.httpHeaders("de")); const html = await getText("de", baseUrl); const $ = loadHtml(html); - const handleEnglishRow = ($row: Cheerio, type: ManifestItemType) => { + const handleGermanRow = ($row: Cheerio, type: ManifestItemType) => { const $cols = $row.find("td"); if ($cols.length === 0) return; const headerNames = normalizeHeaderName($cols.eq(0).text()); @@ -269,7 +269,39 @@ async function main() { const $rows = element.find("tr"); for (let row = 0; row < $rows.length; row++) { const $row = $rows.eq(row); - handleEnglishRow($row, ManifestItemType.HttpReqHeader); + handleGermanRow($row, ManifestItemType.HttpReqHeader); + } + } + output.close(); + } + + // pt + { + const baseUrl = httpHeadersWikiURLs.pt; + const output = new JsonFileWriter(manifestFiles.httpHeaders("pt")); + const html = await getText("pt", baseUrl); + const $ = loadHtml(html); + const handlePortgueseRow = ($row: Cheerio, type: ManifestItemType) => { + const $cols = $row.find("td"); + if ($cols.length === 0) return; + const headerNames = normalizeHeaderName($cols.eq(0).text()); + const description = getDescriptionMarkdown($cols.eq(1), baseUrl); + if (!description) print.warn(`header ${headerNames[0]} has no description`); + for (let j = 0; j < headerNames.length; j++) { + const headerName = headerNames[j]; + output.writeItem(j === 0 ? [type, headerName, description] : [type, headerName, -1]); + } + }; + + const $reqH2 = $("h2 #Campos_de_resposta"); + assertLength("request fields h2", $reqH2, 1); + const $tables = getNextTables($reqH2.parent(), "h2"); + assertLength("request fields table", $tables, 2); + for (const element of $tables) { + const $rows = element.find("tr"); + for (let row = 0; row < $rows.length; row++) { + const $row = $rows.eq(row); + handlePortgueseRow($row, ManifestItemType.HttpReqHeader); } } output.close(); diff --git a/src/downloader/lua_openresty.ts b/src/downloader/lua_openresty.ts index b9ed80e..55a1b18 100755 --- a/src/downloader/lua_openresty.ts +++ b/src/downloader/lua_openresty.ts @@ -145,14 +145,9 @@ function processDirectiveElement( item.desc = item.desc || character; item.notes.push(character); - // console.log(temp.toString()); - // console.log(" --- --- --- --- ") - docsHTML += temp.toString(); } - // console.log(docsHTML); - if (item.def.startsWith("no")) { item.def = directiveName + " ;"; } @@ -184,11 +179,6 @@ function processDirectiveElement( {}, ]); - if (directiveName == "server_rewrite_by_lua_block") { - // TODO: needs to be changed remove SVG objects and add required contents (!) - console.log(docsHTML); - } - detailsStream.writeItem([ ManifestItemType.DirectiveDetails, directiveName, diff --git a/src/downloader/nginx_directives.ts b/src/downloader/nginx_directives.ts index c76924e..48887d4 100755 --- a/src/downloader/nginx_directives.ts +++ b/src/downloader/nginx_directives.ts @@ -40,7 +40,7 @@ async function main() { assertLength(`document page title "${titleShouleBe}"`, $title, 1); const directiveLists = $title.parent().nextAll("ul.compact"); - assertLength("length(ul.compact)", directiveLists, 6); + assertLength("length(ul.compact)", directiveLists, 7); const modules: Array<{ moduleName: string; moduleIndex: number; uri: string }> = []; directiveLists.each((i, list) => { diff --git a/yarn.lock b/yarn.lock index 5a2e953..328354a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2929,6 +2929,11 @@ tunnel@0.0.6: resolved "https://registry.yarnpkg.com/tunnel/-/tunnel-0.0.6.tgz#72f1314b34a5b192db012324df2cc587ca47f92c" integrity sha512-1h/Lnq9yajKY2PEbBadPXj3VxsDDu844OnaAo52UVmIzIvwwtBPIuNvkjuzBlTWpfJyUbG3ez0KSBibQkj4ojg== +turndown-plugin-gfm@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz#6f8678a361f35220b2bdf5619e6049add75bf1c7" + integrity sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg== + turndown@^7.1.2: version "7.1.3" resolved "https://registry.yarnpkg.com/turndown/-/turndown-7.1.3.tgz#2890eb76c603e66bf0c9e91526582b563065c57d"