diff --git a/.ci/aptPackagesToInstall.txt b/.ci/aptPackagesToInstall.txt
new file mode 100644
index 0000000..e69de29
diff --git a/.ci/pythonPackagesToInstallFromGit.txt b/.ci/pythonPackagesToInstallFromGit.txt
new file mode 100644
index 0000000..84ad2e6
--- /dev/null
+++ b/.ci/pythonPackagesToInstallFromGit.txt
@@ -0,0 +1 @@
+https://github.com/KOLANICH-libs/WordSplitAbs.py
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..843ba14
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,12 @@
+root = true
+
+[*]
+charset = utf-8
+indent_style = tab
+indent_size = 4
+insert_final_newline = true
+end_of_line = lf
+
+[*.{yml,yaml,ksy}]
+indent_style = space
+indent_size = 2
diff --git a/.github/.templateMarker b/.github/.templateMarker
new file mode 100644
index 0000000..5e3a3e0
--- /dev/null
+++ b/.github/.templateMarker
@@ -0,0 +1 @@
+KOLANICH/python_project_boilerplate.py
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..89ff339
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,8 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "daily"
+    allow:
+      - dependency-type: "all"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
new file mode 100644
index 0000000..805a383
--- /dev/null
+++ b/.github/workflows/CI.yml
@@ -0,0 +1,15 @@
+name: CI
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+jobs:
+  build:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: typical python workflow
+        uses: KOLANICH-GHActions/typical-python-workflow@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2d49db2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,15 @@
+__pycache__
+*.pyc
+*.pyo
+/*.egg-info
+/cache
+#/mapi_tags.ksy
+/mapi_tags.py
+
+*.srctrlbm
+*.srctrldb
+build
+dist
+.eggs
+monkeytype.sqlite3
+/.ipynb_checkpoints
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..6e5ddf8
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,51 @@
+image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest
+
+variables:
+  DOCKER_DRIVER: overlay2
+  SAST_ANALYZER_IMAGE_TAG: latest
+  SAST_DISABLE_DIND: "true"
+  SAST_CONFIDENCE_LEVEL: 5
+  CODECLIMATE_VERSION: latest
+
+include:
+  - template: SAST.gitlab-ci.yml
+  - template: Code-Quality.gitlab-ci.yml
+  - template: License-Management.gitlab-ci.yml
+
+build:
+  tags:
+    - shared
+    - linux
+  stage: build
+  variables:
+    GIT_DEPTH: "1"
+    PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages
+
+  before_script:
+    - export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables`
+    - apt-get update
+    # todo:
+    #- apt-get -y install 
+    #- pip3 install --upgrade 
+    #- python3 ./fix_python_modules_paths.py
+
+  script:
+    - python3 -m build -nw bdist_wheel
+    - mv ./dist/*.whl ./dist/python_project_boilerplate-0.CI-py3-none-any.whl
+    - pip3 install --upgrade ./dist/*.whl
+    - coverage run --source=python_project_boilerplate -m --branch pytest --junitxml=./rspec.xml ./tests/test.py
+    - coverage report -m
+    - coverage xml
+
+  coverage: /^TOTAL(?:\s+\d+){4}\s+(\d+%).+/
+
+  cache:
+    paths:
+      - $PYTHONUSERBASE
+
+  artifacts:
+    paths:
+      - dist
+    reports:
+      junit: ./rspec.xml
+      cobertura: ./coverage.xml
diff --git a/Code_Of_Conduct.md b/Code_Of_Conduct.md
new file mode 100644
index 0000000..bcaa2bf
--- /dev/null
+++ b/Code_Of_Conduct.md
@@ -0,0 +1 @@
+No codes of conduct!
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..20f0fa8
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,4 @@
+include UNLICENSE
+include *.md
+include tests
+include .editorconfig
diff --git a/MAPITagsScraper/DOMUtils.py b/MAPITagsScraper/DOMUtils.py
new file mode 100644
index 0000000..0734f9f
--- /dev/null
+++ b/MAPITagsScraper/DOMUtils.py
@@ -0,0 +1,38 @@
+import typing
+from io import StringIO
+from xml.dom.minidom import Element
+
+
+def minidom2str(md: Element) -> str:
+	with StringIO() as f:
+		md.writexml(f, indent="\t", addindent="\t", newl="\n")
+		return f.getvalue()
+
+
+def minidom2bs4(md: Element) -> "bs4.BeautifulSoup":
+	import bs4
+
+	return bs4.BeautifulSoup(minidom2str(md), "lxml")
+
+
+def getTextFromNodes(node: Element) -> typing.Iterable[str]:
+	if node.nodeType == node.TEXT_NODE:
+		yield node.data
+	else:
+		for cn in node.childNodes:
+			yield from getTextFromNodes(cn)
+
+
+def node2text(node: Element) -> str:
+	return "".join(getTextFromNodes(node))
+
+
+def iterNextSiblings(n: Element) -> typing.Iterable[Element]:
+	while n.nextSibling:
+		n = n.nextSibling
+		yield n
+
+
+def textAfter(n: Element) -> str:
+	"""Get the text after the teg within the parent element untill its end"""
+	return "".join(map(node2text, iterNextSiblings(n)))
diff --git a/MAPITagsScraper/KSEnumValue.py b/MAPITagsScraper/KSEnumValue.py
new file mode 100644
index 0000000..d93654a
--- /dev/null
+++ b/MAPITagsScraper/KSEnumValue.py
@@ -0,0 +1,16 @@
+import typing
+
+
+class KSEnumValue:
+	__slots__ = ("id", "value", "origIds", "doc", "companion", "subject")
+
+	def __init__(self, iD: str, value: int, origIds: typing.Iterable[str], doc: str, companion: str = None, subject: str = None):
+		self.id = iD
+		self.value = value
+		self.origIds = origIds
+		self.doc = doc
+		self.companion = companion
+		self.subject = subject
+
+	def __repr__(self):
+		return self.__class__.__name__ + "(" + ", ".join(repr(getattr(self, k)) for k in self.__class__.__slots__) + ")"
diff --git a/MAPITagsScraper/Source.py b/MAPITagsScraper/Source.py
new file mode 100644
index 0000000..90c949a
--- /dev/null
+++ b/MAPITagsScraper/Source.py
@@ -0,0 +1,53 @@
+from pathlib import Path
+
+
+def cachedFetchFile(cacheFile, uri):
+	cacheFile.parent.mkdir(parents=True, exist_ok=True)
+
+	if cacheFile.is_file():
+		return cacheFile.read_bytes()
+
+	import httpx
+
+	data = httpx.get(uri).content
+	cacheFile.write_bytes(data)
+	return data
+
+
+class ProtoSource:
+	__slots__ = ("name", "traditionalFileName")
+
+	def __init__(self, name, traditionalFileName):
+		self.name = name
+		self.traditionalFileName = traditionalFileName
+
+	def fetch(self, fileDir: Path) -> str:
+		return (fileDir / self.traditionalFileName).read_text(encoding="utf-8")
+
+	def parseEnumValues(self, fileDir: Path):
+		return self.parseValuesFromSrc(self.fetch(fileDir))
+
+	def parseValuesFromSrc(self, src):
+		raise NotImplementedError
+
+	def __repr__(self):
+		return self.__class__.__name__ + "(" + ", ".join(repr(getattr(self, k)) for k in __class__.__slots__) + ")"
+
+
+class Source(ProtoSource):
+	__slots__ = ("uri", "license")
+
+	def __init__(self, name, cachedFileName, uri, license):
+		super().__init__(name, cachedFileName)
+		self.uri = uri
+		self.license = license
+
+	@property
+	def cachedFileName(self):
+		return self.traditionalFileName
+
+	def fetch(self, cacheDir: Path) -> str:
+		return cachedFetchFile(cacheDir / self.cachedFileName, self.uri).decode("utf-8")
+
+	def __repr__(self):
+		return self.__class__.__name__ + "(" + ", ".join(repr(getattr(self, k)) for k in (__class__.__slots__ + super().__slots__)) + ")"
diff --git a/MAPITagsScraper/__init__.py b/MAPITagsScraper/__init__.py
new file mode 100644
index 0000000..0525b20
--- /dev/null
+++ b/MAPITagsScraper/__init__.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import re
+import typing
+from ast import literal_eval
+from copy import deepcopy
+from io import StringIO
+from pathlib import Path
+from warnings import warn
+
+from .KSEnumValue import KSEnumValue
+from .nameNormalizer import *
+from .sources.kaitai import _kaitai
+from .utils import dedupPreservingOrder
+from .sources.kaitai import SerializingContext
+
+__all__ = ("fullPipeline",)
+
+
+def getTagsWithNonUniqueNames():
+	return [(el.id, el.origIds) for el in parsed if "_or_" in el.id]
+
+
+def normalizeUniqueNames(t):
+	from .nameNormalizer import convertName
+
+	for k in list(t.keys()):
+		if not isinstance(t[k], str):
+			if t[k]["id"] == "unkn":
+				oids = list(dedupPreservingOrder(t[k]["-orig-id"]))
+				if len(oids) == 1:
+					v = list(oids)[0]
+					fv = convertName(v)
+					t[k]["id"] = fv
+					t[k]["-orig-id"] = v
+
+
+def mergeSourceIntoContext(ctx: SerializingContext, s: Source.Source, cacheDir: Path) -> None:
+	tagsFromSource = s.parseEnumValues(cacheDir)
+	ctx.enumValues2KSEnumDict(tagsFromSource)
+	ctx.insertSource(s.uri, s.license)
+
+
+def fullPipeline(outputDir: Path, sourcesList: typing.Iterable[Source.Source], cacheDir: Path) -> None:
+	ctx = _kaitai._getCtxFromDir(outputDir)
+
+	for s in sourcesList:
+		mergeSourceIntoContext(ctx, s, cacheDir)
+
+	ctx.sortByKey()
+	ctx.dump(outputDir)
diff --git a/MAPITagsScraper/__main__.py b/MAPITagsScraper/__main__.py
new file mode 100644
index 0000000..bd11228
--- /dev/null
+++ b/MAPITagsScraper/__main__.py
@@ -0,0 +1,57 @@
+from pathlib import Path
+
+from plumbum import cli
+
+from .consts import defaultCacheDir
+from .sources import sources
+
+
+sourcesParamValidator = cli.Set(*sources, case_sensitive=True, csv=True)
+
+
+class CLI(cli.Application):
+	"""A tool to generate a Kaitai Struct spec with MAPI tags enum"""
+
+
+@CLI.subcommand("fetch")
+class FetchCLI(cli.Application):
+	"""Just downloads the files into a cache dir"""
+
+	def main(self, sourceNames: sourcesParamValidator, cacheDir: str = defaultCacheDir):
+		cacheDir = Path(cacheDir)
+
+		for sourceName in sourceNames:
+			s = sources[sourceName]
+			print("Ensuring", s)
+			s.fetch(cacheDir)
+
+
+@CLI.subcommand("convert")
+class ConvertCLI(cli.Application):
+	"""Converts the files into Kaitai Struct spec with tag definitions"""
+
+	def main(self, sourceNames: sourcesParamValidator, cacheDir: str = defaultCacheDir):
+		cacheDir = Path(cacheDir)
+
+		from . import fullPipeline
+
+		sourcesList = [sources[sourceName] for sourceName in sourceNames]
+
+		fullPipeline(Path("."), sourcesList, cacheDir)
+
+
+@CLI.subcommand("check")
+class CheckCLI(cli.Application):
+	"""Just a sanity check to guide manual name assigning"""
+
+	def main(self, cacheDir: str = defaultCacheDir):
+		from pprint import pprint
+
+		from . import getTagsWithNonUniqueNames
+
+		cacheDir = Path(cacheDir)
+		pprint(getTagsWithNonUniqueNames(cacheDir))
+
+
+if __name__ == "__main__":
+	CLI.run()
diff --git a/MAPITagsScraper/consts.py b/MAPITagsScraper/consts.py
new file mode 100644
index 0000000..8a21a8b
--- /dev/null
+++ b/MAPITagsScraper/consts.py
@@ -0,0 +1,5 @@
+from pathlib import Path
+
+defaultCacheDir = Path("./cache")
+defaultKSYFileName = "mapi_tags.ksy"
+GitHubRawBase = "https://raw.githubusercontent.com/"
diff --git a/MAPITagsScraper/mapi_tags.template.ksy b/MAPITagsScraper/mapi_tags.template.ksy
new file mode 100644
index 0000000..1a97583
--- /dev/null
+++ b/MAPITagsScraper/mapi_tags.template.ksy
@@ -0,0 +1,18 @@
+meta:
+  id: mapi_tags
+  title: Outlook MAPI tags
+  application:
+    - Microsoft Outlook MAPI
+    - Microsoft Exchange
+
+doc: |
+  Outlook MAPI tags are enums values used to identify various types of entities in various formats.
+
+doc-ref:
+  - https://docs.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxprops/f6ab1613-aefe-447d-a49c-18217230b148
+  - https://docs.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxocntc/9b636532-9150-4836-9635-9c9b756c9ccf
+  - https://github.com/hfig/MAPI/blob/master/src/MAPI/Schema/MapiFieldsMessage.yaml # MIT
+  - https://github.com/hfig/MAPI/blob/master/src/MAPI/Schema/MapiFieldsOther.yaml # MIT
+  - https://github.com/nektra/outlook-autocomplete/blob/master/OlAutoComplete/nk2props.h # MIT
+  - https://github.com/stephenegriffin/mfcmapi/blob/151856e6ef5af42368a49a1340060aa58d981e8e/core/interpret/genTagArray.h # MIT
+  - https://github.com/dbremner/pstviewtool/blob/52f59893ad4390358053541b0257b4a7f2767024/ptags.h  # Likely Apache. The repo contains no license, but the news (https://www.infoq.com/news/2010/05/Outlook-PST-View-Tool-and-SDK/, also https://web.archive.org/web/20140704101722/http://www.microsoft.com/en-us/news/press/2010/may10/05-24psttoolspr.aspx) claim that this tool and https://github.com/enrondata/pstsdk were published under Apache. Looks plausible since both software were authored by Terry Mahaffey (psviewtool has user name terrymah (though without a proper email) in git commits, likely the same guy as https://github.com/terrymah, pstsdk has the lines `\author Terry Mahaffey`)
diff --git a/MAPITagsScraper/nameNormalizer.py b/MAPITagsScraper/nameNormalizer.py
new file mode 100644
index 0000000..cb8d2c0
--- /dev/null
+++ b/MAPITagsScraper/nameNormalizer.py
@@ -0,0 +1,508 @@
+import re
+import typing
+from enum import IntEnum
+
+import inflection
+
+
+from .utils import dedupPreservingOrder
+
+__all__ = ("canonicalizeOrigName", "convertName")
+
+useWordNinja = False
+
+if useWordNinja:
+	import wordninja
+	wnModel = wordninja.LanguageModel('mapiWordNinjaModel.txt.gz')
+
+numUnderscoreSeparatedStr = re.compile(r"([h-zH-Z]+[a-zA-Z]+|[a-zA-Z]+[h-zH-Z]+)_(\d+)$")
+allowedKSIdRx = re.compile(r"^\w+$")
+multipleUnderscoresRx = re.compile(r"_+")
+
+
+def attachNumber(s: str) -> str:
+	return numUnderscoreSeparatedStr.subn("\\1\\2", s)[0]
+
+def fixMultipleUnderscores(s: str) -> str:
+	return multipleUnderscoresRx.subn("_", s)[0]
+
+
+W_POSTFIX = "_W"
+A_POSTFIX = "_A"
+
+
+def clearPostfixes(n: str) -> str:
+	if n.endswith(W_POSTFIX):
+		n = n[: -len(W_POSTFIX)]
+	elif n.endswith(A_POSTFIX):
+		n = n[: -len(A_POSTFIX)]
+	return n
+
+class OrigIdType(IntEnum):
+	unknown = 0
+	PID_TAG = 1
+	PTAG = 2
+	PR_TAG = 3
+	INT_SCH_TAG = 4
+
+origIdTypeToPrefixMapping = {
+	OrigIdType.PID_TAG: "PidTag",
+	OrigIdType.PTAG: "ptag",
+	OrigIdType.PR_TAG: "PR_",
+	OrigIdType.INT_SCH_TAG: "InternalSchema",
+}
+
+PR_TAG = origIdTypeToPrefixMapping[OrigIdType.PR_TAG]
+
+
+def canonicalizeOrigName(n: str) -> str:
+	"""Removes unneeded prefixes and postfixes from `-orig-id`s"""
+	if n.startswith(PR_TAG):
+		n = clearPostfixes(n)
+	return n
+
+
+wordsSplitterFilters = {
+	OrigIdType.PR_TAG: (
+		('emsmdb', 'ems_mdb'),
+		('addrtype', 'addr_type'),
+		('oraddress', 'or_address'),
+		('storeeid', 'store_eid'),
+		('_svreid', '_svr_eid'),
+		('seqid', 'seq_id'),
+		('draftid', 'draft_id'),
+		('srchid', 'srch_id'),
+		('oflid', 'ofl_id'),
+		('entryid', 'entry_id'),
+		('linkid', 'link_id'),
+		('replacetime', 'replace_time'),
+		('trackstatus', 'track_status'),
+		('clientid', 'client_id'),
+		('parentid', 'parent_id'),
+		('enabledon', 'enabled_on'),
+		('onserver', 'on_server'),
+		('schdinfo_', 'schd_info_'),
+		('_freebusy_', '_free_busy_'),
+		('_mtsout_', '_mts_out_'),
+		('_mtsin_', '_mts_in_'),
+		('xmlstream', 'xml_stream'),
+		('containerid', 'container_id'),
+		('templateid', 'template_id'),
+		('proposedendtime', 'proposed_end_time'),
+		('proposedstarttime', 'proposed_starttime'),
+		('starttime', 'start_time'),
+		('contactphoto', 'contact_photo'),
+		('freebusy', 'free_busy'),
+		('sendpost', 'send_post'),
+		('readpost', 'read_post'),
+		('reportnote', 'report_note'),
+		('sendnote', 'send_note'),
+		('readnote', 'read_note'),
+		('endtxt', 'end_txt'),
+		('begintxt', 'begin_txt'),
+		('bodytag', 'body_tag'),
+		('migrateprofile', 'migrate_profile'),
+		('changenum', 'change_num'),
+		('versionhistory', 'version_history'),
+		('versionskeleton', 'version_skeleton'),
+		('serverid', 'server_id'),
+		('subitemid', 'subitem_id'),
+		('inetmail', 'inet_mail'),
+		('dotstuff', 'dot_stuff'),
+		('newsfeed', 'news_feed'),
+		('peruser', 'per_user'),
+		('mailbeat', 'mail_beat'),
+		('hotsite', 'hot_site'),
+		('endtime', 'end_time'),
+		('fixfont', 'fix_font'),
+		('ccwrap', 'cc_wrap'),
+		('metatag', 'meta_tag'),
+		('iconurl', 'icon_url'),
+		('itemproc', 'item_proc'),
+		('viewinfo', 'view_info'),
+		('displayname', 'display_name'),
+		('fxsrcstream', 'fx_src_stream'),
+		('fxdeststream', 'fx_dest_stream'),
+		('othermailbox', 'other_mailbox'),
+		('viewprivate', 'view_private'),
+		('foldertype', 'folder_type'),
+		('viewtype', 'view_type'),
+		('ostid', 'ost_id'),
+		('shareddata', 'shared_data'),
+		('notfound', 'not_found'),
+		('mapiuid', 'mapi_uid'),
+		('mapiform', 'mapi_form'),
+		('phonebook', 'phone_book'),
+		('testclsid', 'test_clsid'),
+		('labeleduri', 'labeled_uri'),
+		('dispname', 'disp_name'),
+		('syncevent', 'sync_event'),
+		('slowlink', 'slow_link'),
+		('dialup', 'dial_up'),
+		('waitfor', 'wait_for'),
+		('mimewrap', 'mime_wrap'),
+		('tcpip', 'tcp_ip'),
+		('traceinfo', 'trace_info'),
+		('spamtype', 'spam_type'),
+		('userfields', 'user_fields'),
+		('viewlist', 'view_list'),
+		('clearprops', 'clear_props'),
+		('logfile', 'log_file'),
+		('deltax', 'delta_x'),
+		('deltay', 'delta_y'),
+		('xpos', 'x_pos'),
+		('ypos', 'y_pos'),
+		('mailfrom', 'mail_from'),
+		('datainit', 'data_init'),
+		('dataterm', 'data_term'),
+		('outq_', 'out_q_'),
+		('inq_', 'in_q_'),
+		('datablock', 'data_block'),
+		('viewflags', 'view_flags'),
+		('saveas', 'save_as'),
+		('folderid', 'folder_id'),
+		('portno', 'port_no'),
+		('bifinfo', 'bif_info'),
+		('msgtracking', 'msg_tracking'),
+		('autoresponse', 'auto_response'),
+		('favfld', 'fav_fld'),
+		('bodypart', 'body_part'),
+		('listinfo', 'list_info'),
+		('reqcn', 'req_cn'),
+		('reqname', 'req_name'),
+		('insadmin', 'ins_admin')
+	),
+	OrigIdType.PID_TAG: (
+		('_un_modified', '_unmodified'),
+		('msgid', 'msg_id'),
+		('itemid', 'item_id'),
+		('replid', 'repl_id'),
+		('guid', 'guid_'),
+	),
+	OrigIdType.PTAG: (
+		("replid", "repl_id"),
+	),
+	None: (
+		('temporaryflags', 'temporary_flags'),
+		('errorinfo', 'error_info'),
+		('msgsize', 'msg_size'),
+		('attachlist', 'attach_list'),
+		('changenum', 'change_num'),
+		('addrbook', 'addr_book'),
+		('rootdir', 'root_dir'),
+		('msgclass', 'msg_class'),
+		('messageclass', 'message_class'),
+		('mtsid', 'mts_id'),
+		('sentmail', 'sent_mail'),
+		('to_do_', 'todo_'),
+		('subfolder', 'sub_folder'),
+		('rowid', 'row_id'),
+		('recurrenceid', 'recurrence_id'),
+		('readonly', 'read_only'),
+		('pathname', 'path_name'),
+		('templateid', 'template_id'),
+		('datatype', 'data_type'),
+		('codepage', 'code_page'),
+		('_replid', '_repl_id'),
+		('webviewinfo', 'webview_info'),
+		('webview', 'web_view'),
+		('mailuser', 'mail_user'),
+		('longterm', 'long_term'),
+		('newsfeed', 'news_feed')
+	)
+}
+
+filters = {
+	OrigIdType.PR_TAG: (
+		('_oab_', '_offline_address_book_'),
+		('ems_ab_', 'address_book_'),
+		('_addr_', '_address_'),
+		('_auth_', '_authorized_'),
+		('_deliv_', '_delivery_'),
+		('abeid', 'address_book_eid'),
+		('_eid', '_entry_id'),
+		('splus', 'schd_plus'),
+		('_hab_', '_hier_'),
+		('_dl', '_distr_list'),
+		('_mhs_', '_message_handling_system_'),
+		('_mta', '_message_transfer_agent'),
+		('_reckey', '_record_key'),
+		('wb_sf_', 'wb_search_folder_'),
+		('_cont_', '_content_'),
+		('_eid', '_entry_id'),
+		('loglev', 'log_level'),
+		('vrfy', 'verify'),
+		('_hdrs_', '_headers_')
+	),
+	OrigIdType.PID_TAG: (
+		('security_descriptor', 'nt_security_descriptor'),
+		('_distribution_list', '_distr_list'),
+		('_unauthorized_', '_unauth_'),
+		('_away', 'oof'),
+		('_t_bl_', '_table_')
+	),
+	None: (
+		('appointment', 'appt'),
+		('certificate', 'cert'),
+		('recipient_', 'rcpt_'),
+		('access_control_list_', 'acl_'),
+		('hierarchical', 'hier'),
+		('address', 'addr'),
+		('message', 'msg'),
+		('hasattach', 'has_attachments'),
+		('_extended', '_ex'),
+		('_eid', '_entry_id'),
+		('_telephone_', '_phone_'),
+		('received_', 'rcvd_'),
+		('number', 'num'),
+		('_object_', '_obj_'),
+		('_message_', '_msg_'),
+		('internet', 'inet'),
+		('acct', 'account'),
+		('maximum', 'max'),
+		('minimum', 'min'),
+		('transmitable', 'transmittable'),
+		('_binary', '_bin'),
+		('_mid_', '_msg_id_'),
+		('_cpid', '_code_page_id'),
+		('dam_', 'deferred_action_message_'),
+		('attribute', 'attr'),
+		('schedule_', 'schd_'),
+	)
+}
+
+
+def processFilterBank(s, bank):
+	for f in bank:
+		s = s.replace(*f)
+	return s
+
+
+wordninjaFalsePositives = (
+	"corre_lat_or",
+	"e_its",
+	"in_it",
+	"i_pms",
+	"rec_ip",
+	"i_pm",
+	"x_400",
+	"x_500",
+	"x_509",
+	"x_25",
+	"rfc_1006",
+	"a_ddr",
+	"re_pl",
+	"rc_vd",
+	"a_ppt",
+	"tn_ef",
+	"ds_a",
+	"fr_eq",
+	"a_lg",
+	"auto_reply",
+	"time_out",
+	"a_ck",
+	"re_cv",
+	"rcp_t",
+	"canonical_iz_ation",
+	"map_i",
+	"tn_s",
+	"e_smtp",
+	"e_trn",
+	"s_mime",
+	"synchronize_r",
+	"rt_f",
+	"acc_t",
+	"gui_d",
+	"mid_set",
+	"x_mt",
+	"sch_d",
+	"spool_er",
+	"nts_d",
+	"n_td_n",
+	"s_rc",
+	"s_can",
+	"de_st",
+	"i_mail",
+	"rm_q",
+	"x_ref",
+	"t_bl",
+	"ow_a",
+	"at_tr",
+	"p_1",
+	"u_id",
+	"cl_sid",
+	"out_box",
+	"m_db",
+	"as_soc",
+	"p_2",
+	"pre_c",
+	"loop_back",
+	"re_calc",
+	"de_queue",
+	"m_gr",
+	"au_th",
+	"start_tls",
+	"ku_lane",
+	"dia_g",
+	"d_is_tr",
+	"n_ntp",
+	"if_s",
+	"an_r",
+	"c_dorm",
+	"c_doo_or",
+	"cd_of_bc",
+	"s_vr",
+	"transmit_able",
+	"tty_tdd",
+	"pa_b",
+	"a_cl",
+	"du_a",
+	"ad_atp_3",
+	"con_v",
+	"p_km",
+	"version_ing",
+	"l_cid",
+	"in_cr",
+	"re_q",
+	"rg_m",
+	"c_pid",
+	"fl_d",
+	"ex_ch_50",
+	"mb_in",
+	"addr_s",
+	"o_of",
+	"sr_ch",
+	"o_ab",
+	"of_l",
+	"open_ning",
+	"encrypt_er",
+	"fa_v",
+	"m_sdos",
+	"dx_a",
+	"roll_over",
+	"back_off",
+	"de_sig",
+	"una_u_th",
+	"x_121",
+	"xm_it",
+	"l_dap",
+	"cf_g",
+	"adr_s",
+	"mt_s",
+	"pui_d",
+	"mon_the_s",
+	"x_view",
+	"log_on",
+	"cate_g",
+	"back_fill",
+	"in_st",
+	"de_liv",
+	"appt_s",
+	"del_s",
+	"reqc_n",
+	"telet_ex"
+)
+wordninjaFalsePositives = [(el, el.replace("_", "")) for el in wordninjaFalsePositives]
+
+
+def fix_after_wordninja(name):
+	return attachNumber(processFilterBank(name, wordninjaFalsePositives))
+
+
+def detectOrigIdTypeAndSplitFromRestOfName(name: str) -> OrigIdType:
+	for k, v in origIdTypeToPrefixMapping.items():
+		if k:
+			if name.startswith(v):
+				return k, name[len(v) :]
+	
+	return OrigIdType.unknown, name
+
+
+def processFilterBundle(tp: OrigIdType, name: str, bundle, middleFixerFunc = None) -> str:
+	filterBank = bundle.get(tp, ())
+	name = processFilterBank(name, filterBank)
+
+	if middleFixerFunc:
+		name = middleFixerFunc(name)
+
+	name = processFilterBank(name, bundle[None])
+
+	return name
+
+
+def splitJoinedWords(tp: OrigIdType, n: str, useWordNinja: bool = useWordNinja) -> str:
+	"""Tries to normalize a name the way that different kinds of source names result into the same name. Also tries to make the name more easy to read"""
+
+	#ic("splitJoinedWords", n)
+
+	def middleFixerFunc(n: str) -> str:
+		if useWordNinja:
+			n = fix_after_wordninja("_".join(wnModel.split(n)))
+			n = convertName(n, useWordNinja=False)
+		return n
+	
+	n = processFilterBundle(tp, n, wordsSplitterFilters, middleFixerFunc)
+
+	n = fixMultipleUnderscores(n)
+
+	while n[-1] == "_":
+		n = n[:-1]
+
+	while n[0] == "_":
+		n = n[1:]
+
+	return n
+
+from icecream import ic
+
+typesConversionToUnderscoredRequired = frozenset((OrigIdType.PTAG, OrigIdType.PID_TAG, OrigIdType.INT_SCH_TAG))
+
+
+def convertName(n: str, useWordNinja: bool = useWordNinja) -> str:
+	"""Tries to normalize a name the way that different kinds of source names result into the same name. Also tries to make the name more easy to read"""
+
+	tp, n = detectOrigIdTypeAndSplitFromRestOfName(n)
+	#ic(tp, n)
+
+	#if tp in typesConversionToUnderscoredRequired:
+	if tp != OrigIdType.PR_TAG:  # typesConversionToUnderscoredRequired are all except OrigIdType.PR_TAG
+		n = inflection.underscore(n)
+		#ic("inflected", n)
+	else:
+		n = n.lower()
+
+	def middleFixerFunc(n):
+		#ic("After specific filter bundle", n)
+
+		if tp == OrigIdType.PR_TAG:
+			n = attachNumber(n)
+		elif tp == OrigIdType.PTAG:
+			n = n.replace("MTA", "MessageTransferAgent")
+
+		n = n.lower()
+		if "attachment" not in n:
+			n.replace("has_attach", "has_attachments")
+
+		n = splitJoinedWords(tp, n, useWordNinja=useWordNinja)
+		return n
+
+	n = processFilterBundle(tp, n, filters, middleFixerFunc)
+
+	return n
+
+
+def selectAndConvertNamesAdmissibleToId(origIds):
+	for el in origIds:
+		if allowedKSIdRx.match(el):
+			yield convertName(el)
+
+
+def prepareNamesAndOrigIds(origIds, sort: bool=True):
+	origIds = list(dedupPreservingOrder(canonicalizeOrigName(el) for el in origIds))
+	names = selectAndConvertNamesAdmissibleToId(origIds)
+	if sort:
+		names = sorted(set(names))
+	else:
+		names = dedupPreservingOrder(names)
+
+	return origIds, names
diff --git a/MAPITagsScraper/sources/OxProps.py b/MAPITagsScraper/sources/OxProps.py
new file mode 100644
index 0000000..1e1b5c9
--- /dev/null
+++ b/MAPITagsScraper/sources/OxProps.py
@@ -0,0 +1,193 @@
+import re
+import xml.dom.minidom
+from ast import literal_eval
+from pathlib import Path
+from xml.dom.minidom import Element
+
+import commonmark
+import docutils
+import docutils.frontend
+import recommonmark
+from dom_query import select, select_all
+from recommonmark.parser import CommonMarkParser
+
+from ..DOMUtils import *
+from ..KSEnumValue import KSEnumValue
+from ..nameNormalizer import canonicalizeOrigName, convertName, prepareNamesAndOrigIds
+from ..Source import Source, cachedFetchFile
+
+__all__ = ("OxProps",)
+
+oxprops_feed = "https://interoperability.blob.core.windows.net/files/MS-OXPROPS/%5bMS-OXPROPS%5d.rss"
+
+companionValueDocRX = re.compile(r"Contains the (?:value of (?:the (?P<subject1>[^()\.]+?)(?:'s)?\s+)?|(?P<subject2>[^()\.]+?) of the )(?P<propName>Pid\w+) property \(section \d+(\.\d+)+\)(?:\.|, and)?")
+propsInDocsRX = re.compile(r"(?P<propName>Pid\w+) property \((?:\[MS-\w+\] )?section \d+(\.\d+)+\)")
+
+
+def filterPropsInDoc(doc: str):
+	return propsInDocsRX.subn(lambda x: "`" + convertName(canonicalizeOrigName(x.group("propName"))) + "`", doc)[0]
+
+
+def parseMarkdown(t: typing.Union[str, Path]) -> Element:
+	if isinstance(t, Path):
+		t = t.read_text(encoding="utf-8")
+	s = docutils.frontend.OptionParser(components=(CommonMarkParser,)).get_default_values()
+	p = CommonMarkParser()
+	doc = docutils.utils.new_document(None, s)
+	p.parse(t, doc)
+	parsedDocs = doc.asdom()
+	doc = next(iter(parsedDocs.childNodes))
+	return doc
+
+
+def pars2kvpairs(pars: typing.Iterable[Element]) -> typing.Iterable[typing.Tuple[str, str]]:
+	"""Convert a list of `paragraph` tags into a tuple (key, value), where `key` is the content of `strong` tag, and `value` is the rest of `paragraph` content. In MS docs converted to MarkDown  in the sections we are interested in`strong` are headers and the rest of is contents"""
+	for par in pars:
+		nameNode = select(par, "strong")
+		name = node2text(nameNode).strip()
+		cont = textAfter(nameNode).strip()
+		if name.endswith(":"):
+			name = name[:-1]
+		yield (name, cont)
+
+
+def pars2map(pars: typing.Iterable[Element]) -> typing.Mapping[str, str]:
+	"""Convert `paragraph` tags into a map"""
+	return dict(pars2kvpairs(pars))
+
+
+def getPidTagSections(md: Element) -> typing.Iterable[Element]:
+	"""Gets Markdown sections that contain info about `PidTag`s"""
+
+	for sec in select_all(md, "section[names][ids]"):
+		if sec.attributes["ids"].value.startswith("pidtag"):
+			yield sec
+
+
+class SectionDict2Enum:
+	__slots__ = ()
+
+	DESCR_NAME = "Description"
+	CAN_NAME = "Canonical name"
+	PROP_ID_NAME = "Property ID"
+	ALT_NAMES_NAME = "Alternate names"
+
+	def __call__(self, smap, origIds):
+		doc = smap.get(self.__class__.DESCR_NAME, None)
+		cName = smap.get(self.__class__.CAN_NAME, None)
+		if cName is not None:
+			origIds.append(cName)
+		valueSrc = smap.get(self.__class__.PROP_ID_NAME, None)
+		altNames = smap.get(self.__class__.ALT_NAMES_NAME, None)
+		if altNames:
+			origIds.extend(el.strip() for el in altNames.split(","))
+
+		return origIds, valueSrc, doc
+
+
+sectDict2Enum = SectionDict2Enum()
+
+
+def parseSectionIntoEnumItem(sec):
+	name = node2text(select(sec, "title")).strip()
+	return sectDict2Enum(pars2map(select_all(sec, "paragraph")), [name])
+
+
+class DocxMarkdownSource(Source):
+	__slots__ = ("cachedFileStem",)
+
+	def __init__(self, name, cachedFileStem, uri, license):
+		self.cachedFileStem = cachedFileStem
+		super().__init__(name, cachedFileStem + ".md", uri, license)
+
+	def fetch(self, cacheDir: Path) -> str:
+		markdownCacheFile = cacheDir / self.cachedFileName
+
+		if markdownCacheFile.is_file():
+			return markdownCacheFile.read_text()
+
+		data = convertDOCX2MarkDownMemory(getOxPropsDocx(cacheDir, self.cachedFileStem))
+		markdownCacheFile.write_text(data)
+		return data
+
+	def parseValuesFromSrc(self, src):
+		md = parseMarkdown(src)
+		return [KSEnumValueFromSection(sec) for sec in getPidTagSections(md)]
+
+
+def KSEnumValueFromSection(sec):
+	origIds, valueSrc, doc = parseSectionIntoEnumItem(sec)
+	origIds, names = prepareNamesAndOrigIds(origIds, True)
+	value = literal_eval(valueSrc)
+
+	companion = None
+	subj = None
+	if doc:
+		compMatch = companionValueDocRX.match(doc)
+		if compMatch:
+			subj = compMatch.group("subject1")
+			if not subj:
+				subj = compMatch.group("subject2")
+			subj = subj.strip()
+
+			rawPropName = compMatch.group("propName").strip()
+			convertedPropName = convertName(canonicalizeOrigName(rawPropName))
+
+			s = compMatch.span()
+			doc = doc[: s[0]] + doc[s[1] :]
+			companion = convertedPropName
+
+			if doc.startswith(", "):
+				doc = doc[2:]
+
+		doc = filterPropsInDoc(doc)
+
+		if not doc:
+			doc = None
+
+	return KSEnumValue("_or_".join(names), value, origIds, doc, companion, subj)
+
+
+def getOxPropsDocxLink():
+	import httpx
+	from bs4 import BeautifulSoup
+	from dom_query import select
+
+	r = httpx.get(oxprops_feed)
+	d = xml.dom.minidom.parseString(r.text)
+
+	i = select(d, "item")
+
+	h = node2text(select(i, "description"))
+	hd = BeautifulSoup(h, "lxml")
+	for el in hd.select("a"):
+		lh = el["href"]
+		if lh.endswith(".docx"):
+			return lh
+
+
+def getOxPropsDocx(cacheDir: Path, cachedFileStem: str):
+	return cachedFetchFile(cacheDir / (cachedFileStem + ".docx"), getOxPropsDocxLink())
+
+
+targetMDFormat = "gfm+smart"
+srcFmt = "docx"
+extraPandocArgs = (
+	"--from=docx",
+	"--wrap=none",
+)
+
+
+def convertDOCX2MarkDownFile(inputFile: Path) -> str:
+	import pypandoc
+
+	return pypandoc.convert_file(str(inputFile), targetMDFormat, extra_args=extraPandocArgs)
+
+
+def convertDOCX2MarkDownMemory(docx) -> str:
+	import pypandoc
+
+	return pypandoc.convert_text(docx, targetMDFormat, srcFmt, extra_args=extraPandocArgs)
+
+
+oxprops = DocxMarkdownSource("oxprops", "[MS-OXPROPS]", oxprops_feed, "Microsoft proprietary, but reuse in other impls is explicitly allowed")
diff --git a/MAPITagsScraper/sources/__init__.py b/MAPITagsScraper/sources/__init__.py
new file mode 100644
index 0000000..e240ed2
--- /dev/null
+++ b/MAPITagsScraper/sources/__init__.py
@@ -0,0 +1,10 @@
+from .genTagArray import genTagArray
+from .kaitai import _kaitai
+from .mfmy_mfoy import mfmy, mfoy
+from .OxProps import oxprops
+from .ptags import ptags
+from .openchange import OpenChange
+
+sources = (oxprops, mfmy, mfoy, genTagArray, ptags, OpenChange)
+
+sources = {s.name: s for s in sources}
diff --git a/MAPITagsScraper/sources/genTagArray.py b/MAPITagsScraper/sources/genTagArray.py
new file mode 100644
index 0000000..2481b4e
--- /dev/null
+++ b/MAPITagsScraper/sources/genTagArray.py
@@ -0,0 +1,35 @@
+import re
+from ast import literal_eval
+
+from ..consts import GitHubRawBase
+from ..KSEnumValue import KSEnumValue
+from ..nameNormalizer import canonicalizeOrigName, convertName
+from ..Source import Source
+
+parserGenTagRecordRx = re.compile(r"\s*\{\s*(0x[\da-f]+)\s*,\s*(0x[\da-f]+)\s*,\s*L?\"(\w+)\"\s*\}\s*(?:,\s*)")
+
+
+def parseGenTagArrayLines(headerFileLines):
+	for l in headerFileLines:
+		m = parserGenTagRecordRx.match(l)
+		if m:
+			yield m.groups()
+
+
+def KSEnumValueFromGenTagArrayTriple(valueStr, typeStr, nameStr):
+	origName = canonicalizeOrigName(nameStr)
+	name = convertName(origName)
+	rawValue = literal_eval(valueStr)
+	value = rawValue >> (8 * 2)
+
+	return KSEnumValue(name, value, origName, None, None, None)
+
+
+class GenTagArraySource(Source):
+	__slots__ = ()
+
+	def parseValuesFromSrc(self, src):
+		return [KSEnumValueFromGenTagArrayTriple(*el) for el in parseGenTagArrayLines(src.splitlines())]
+
+
+genTagArray = GenTagArraySource("genTagArray", "genTagArray.h", GitHubRawBase + "/stephenegriffin/mfcmapi/151856e6ef5af42368a49a1340060aa58d981e8e/core/interpret/genTagArray.h", "MIT")
diff --git a/MAPITagsScraper/sources/kaitai.py b/MAPITagsScraper/sources/kaitai.py
new file mode 100644
index 0000000..4ea816b
--- /dev/null
+++ b/MAPITagsScraper/sources/kaitai.py
@@ -0,0 +1,157 @@
+import typing
+from pathlib import Path
+
+from ..KSEnumValue import KSEnumValue
+from ..Source import ProtoSource
+from ..utils import sortedDictByKey
+from ..utils.yaml import dumpYaml
+
+
+def KSEnumValueFromKSEnumDictKeyValuePair(key, value):
+	if isinstance(value, str):
+		oid = []
+		doc = None
+		iD = value
+	else:
+		oid = value.get("-orig-id", [])
+		if isinstance(oid, str):
+			oid = [oid]
+		doc = value.get("doc", None)
+		iD = value["id"]
+
+	return KSEnumValue(iD, key, oid, doc)
+
+
+class SerializingContext:
+	__slots__ = ("parent", "ks", "enums", "enum", "meta", "docRef")
+
+	def __init__(self, parent, src):
+		self.parent = parent
+
+		import ruamel.yaml
+
+		y = ruamel.yaml.YAML(typ="rt")
+		ks = y.load(src)
+		if ks is None:  # empty file, usually when serialization has failed
+			ks = y.load(parent.getTemplateFilePath().read_text("utf-8"))
+		self.ks = ks
+
+		meta = ks.get("meta", None)
+		if meta is None:
+			ks["meta"] = meta = ruamel.yaml.comments.CommentedMap()
+		self.meta = meta
+
+		meta["id"] = self.parent.traditionalFileNameStem
+
+		docRef = ks.get("doc-ref", None)
+		if meta is None:
+			ks["doc-ref"] = docRef = ruamel.yaml.comments.CommentedSeq()
+		self.docRef = docRef
+
+		e = ks.get("enums", None)
+		if e is None:
+			ks["enums"] = e = ruamel.yaml.comments.CommentedMap()
+		self.enums = e
+
+		enumName = self.parent.enumName
+		ee = e.get(enumName, None)
+		if ee is None:
+			e[enumName] = ee = ruamel.yaml.comments.CommentedMap()
+
+		self.enum = ee
+
+	def sortByKey(self):
+		self.enums[self.parent.enumName] = t = sortedDictByKey(self.enum)
+
+	def parseValues(self):
+		return [KSEnumValueFromKSEnumDictKeyValuePair(k, v) for k, v in self.enum.items()]
+
+	@classmethod
+	def intoKSEnumDict(cls, ksEnumValue: KSEnumValue, enumInstanceDict, merge=True):
+		oid = ksEnumValue.origIds
+		enumInstanceDict["id"] = ksEnumValue.id
+
+		if oid:
+			if len(oid) == 1:
+				oid = oid[0]
+
+			enumInstanceDict["-orig-id"] = oid
+		else:
+			if merge and "-orig-id" in enumInstanceDict:
+				del enumInstanceDict["-orig-id"]
+
+		if ksEnumValue.doc:
+			enumInstanceDict["doc"] = ksEnumValue.doc
+		else:
+			if merge and "doc" in enumInstanceDict:
+				del enumInstanceDict["doc"]
+
+		if ksEnumValue.companion:
+			enumInstanceDict["-companion"] = ksEnumValue.companion
+
+		if ksEnumValue.subject:
+			enumInstanceDict["-subject"] = ksEnumValue.subject
+
+	def decorateInt(self, i):
+		import ruamel.yaml
+
+		return ruamel.yaml.scalarint.HexInt(i, width=self.parent.hexWidth)
+
+	def insertSource(self, uri, license):
+		# ruamel.yaml.tokens.CommentToken(value, start_mark, end_mark)
+		idx = len(self.docRef)
+		self.docRef.append(uri)
+		self.docRef.yaml_add_eol_comment(license, idx, column=0)
+
+	def enumValues2KSEnumDict(self, enumValues: typing.Iterable[KSEnumValue]):
+		import ruamel.yaml
+
+		for el in enumValues:
+			k = self.decorateInt(el.value)
+			v = self.enum.get(k, None)
+			if v is None:
+				v = ruamel.yaml.comments.CommentedMap()
+				self.enum[k] = v
+
+			self.__class__.intoKSEnumDict(el, v, merge=True)
+
+	def dump(self, outputDir):
+		dumpYaml(self.ks, outputDir / self.parent.traditionalFileName)
+
+
+class KaitaiSource(ProtoSource):
+	__slots__ = ("enumName", "traditionalFileNameStem", "hexWidth")
+
+	def __init__(self, name, traditionalFileNameStem, enumName, hexWidth):
+		self.traditionalFileNameStem = traditionalFileNameStem
+		super().__init__(name, traditionalFileNameStem + ".ksy")
+		self.enumName = enumName
+		self.hexWidth = hexWidth
+
+	def _getCtxFromSrc(self, src):
+		return SerializingContext(self, src)
+
+	def _getCtxFromDir(self, fileDir: Path):
+		return self._getCtxFromSrc(self.fetch(fileDir))
+
+	def parseValuesFromSrc(self, src):
+		return self._getCtxFromSrc(src).parseValues()
+
+	def fetch(self, fileDir: Path) -> str:
+		ksyFile = fileDir / self.traditionalFileName
+		ksyFileToLoad = None
+		if ksyFile.exists():
+			ksyFileToLoad = ksyFile
+		else:
+			ksyFileToLoad = self.getTemplateFilePath()
+
+		return ksyFileToLoad.read_text(encoding="utf-8")
+
+	def getTemplatefileName(self):
+		return self.traditionalFileNameStem + ".template.ksy"
+
+	def getTemplateFilePath(self):
+		return Path(__file__).parent.parent / self.getTemplatefileName()
+
+
+_kaitai = KaitaiSource("_kaitai", "mapi_tags", "tag", 4)
diff --git a/MAPITagsScraper/sources/mfmy_mfoy.py b/MAPITagsScraper/sources/mfmy_mfoy.py
new file mode 100644
index 0000000..1881f0a
--- /dev/null
+++ b/MAPITagsScraper/sources/mfmy_mfoy.py
@@ -0,0 +1,56 @@
+from ..consts import GitHubRawBase
+from ..KSEnumValue import KSEnumValue
+from ..Source import Source
+from ..nameNormalizer import prepareNamesAndOrigIds
+
+MFMRepo = "hfig/MAPI"
+MFMRepoSchemaPath = "src/MAPI/Schema/"
+MFMRepoBranch = "master"
+
+MFMRepoSchemaBase = GitHubRawBase + MFMRepo + "/" + MFMRepoBranch + "/" + MFMRepoSchemaPath
+
+
+class MFMSource(Source):
+	__slots__ = ()
+
+	def processItem(self, k, v_origIds):
+		v_origIds, names = prepareNamesAndOrigIds(v_origIds, True)
+		return KSEnumValue("_or_".join(names), k, v_origIds, None, None, None)
+
+	def processItems(self, enumValuesDictItems):
+		for k, v in enumValuesDictItems:
+			yield self.processItem(k, v)
+
+	def parseValuesFromYaml(self, y):
+		raise NotImplementedError
+
+	def parseValuesFromSrc(self, src):
+		import ruamel.yaml
+
+		y = ruamel.yaml.YAML(typ="safe")
+		y = y.load(src)
+		return self.parseValuesFromYaml(y)
+
+
+class MFMYSource(MFMSource):
+	__slots__ = ()
+
+	def parseValuesFromYaml(self, y):
+		itemsToProcess = (((int(k, 16), v[0:1]) for k, v in y.items()))
+		return self.processItems(itemsToProcess)
+
+
+from icecream import ic
+
+class MFOYSource(MFMSource):
+	__slots__ = ()
+
+	def parseValuesFromYaml(self, y):
+		del y["PS_PUBLIC_STRINGS"]
+		for typeName, enumValuesDict in y.items():
+			itemsToProcess = ((k, (v,)) for k, v in enumValuesDict.items())
+			yield from self.processItems(itemsToProcess)
+
+
+mfoy = MFOYSource("mfoy", "MapiFieldsOther.yaml", MFMRepoSchemaBase + "MapiFieldsOther.yaml", "MIT")
+mfmy = MFMYSource("mfmy", "MapiFieldsMessage.yaml", MFMRepoSchemaBase + "MapiFieldsMessage.yaml", "MIT")
diff --git a/MAPITagsScraper/sources/openchange.py b/MAPITagsScraper/sources/openchange.py
new file mode 100644
index 0000000..80f3544
--- /dev/null
+++ b/MAPITagsScraper/sources/openchange.py
@@ -0,0 +1,127 @@
+from ..consts import GitHubRawBase
+from ..KSEnumValue import KSEnumValue
+from ..Source import Source
+from ..nameNormalizer import prepareNamesAndOrigIds
+from .ptags import parseValueFromSourceEnumStr
+from .OxProps import SectionDict2Enum
+
+from typed_ast import ast27
+from typed_ast import ast3 as ast
+from typed_ast.conversions import py2to3
+import re
+from ast import literal_eval
+
+names1 = {"temporary_private_tags_struct", "knownpropsets", "extra_private_tags_struct", "temporary_private_tags", "temporary_private_tags_struct"}
+
+
+def correctLiteralEval(v):
+	if isinstance(v, dict):
+		return {correctLiteralEval(k): correctLiteralEval(v) for k, v in v.items()}
+	elif isinstance(v, list):
+		return [correctLiteralEval(el) for el in v]
+	elif isinstance(v, bytes):
+		return v.decode("utf-8")
+	return v
+
+
+def correctedLiteralEval(a):
+	return correctLiteralEval(ast.literal_eval(a))
+
+
+def extractGroup1OfNames(sourceAST):
+	res = {}
+	for ael in sourceAST.body:
+		if isinstance(ael, ast.Assign) and len(ael.targets) == 1:
+			nm = ael.targets[0].id
+			if nm in names1:
+				res[nm] = correctedLiteralEval(ael.value)
+	return res
+
+
+def if1LevelAttr(firstLevel: str, secondLevel: str):
+	def res(funcExpr) -> bool:
+		# ic(firstLevel, secondLevel, isinstance(funcExpr, ast.Attribute), isinstance(funcExpr.value, ast.Name), funcExpr.value.id == firstLevel, funcExpr.attr == secondLevel)
+		return isinstance(funcExpr, ast.Attribute) and isinstance(funcExpr.value, ast.Name) and funcExpr.value.id == firstLevel and funcExpr.attr == secondLevel
+
+	return res
+
+
+ifPropertiesAppend = if1LevelAttr("properties", "append")
+ifAltnamelinesAppend = if1LevelAttr("altnamelines", "append")
+ifFWrite = if1LevelAttr("f", "write")
+ifStrLjust = if1LevelAttr("string", "ljust")
+
+
+class OpenChangeSectionDict2Enum(SectionDict2Enum):
+	__slots__ = ()
+
+	CAN_NAME = "CanonicalName"
+	PROP_ID_NAME = "PropertyId"
+	ALT_NAMES_NAME = "AlternateNames"
+
+
+sectDict2Enum = OpenChangeSectionDict2Enum()
+
+
+def parsePropertiesAppend(xprs):
+	for el in xprs:
+		v = el.value
+		if ifPropertiesAppend(v.func):
+			r = correctedLiteralEval(v.args[0])
+			origIds, value, doc = sectDict2Enum(r, [])
+			origId, name = next(zip(*prepareNamesAndOrigIds(origIds)))
+			yield KSEnumValue(name, value, origId, doc, None, None)
+
+
+def parseAltnamelinesAppend(xprs):
+	for el in xprs:
+		v = el.value
+		if ifAltnamelinesAppend(v.func):
+			res = parseValueFromSourceEnumStr(correctedLiteralEval(v.args[0]).replace("\n", ""))
+			if res:
+				yield res
+
+
+from icecream import ic
+
+
+def parseFWrites(xprs):
+	for el in xprs:
+		v = el.value
+		if ifFWrite(v.func):
+			a = v.args[0]
+			if isinstance(a, ast.BinOp):
+				l = a.left
+				if isinstance(l, ast.BinOp):
+					lr = l.right
+					if isinstance(lr, ast.Call) and ifStrLjust(lr.func):
+						r = a.right
+						name = correctedLiteralEval(lr.args[0])
+						val = correctedLiteralEval(r).strip()
+						if val[0] == "=" and val[-1] == ",":
+							val = val[1:-1].strip()
+							val = correctedLiteralEval(val)
+							origId, name = next(zip(*prepareNamesAndOrigIds([name])))
+							yield KSEnumValue(name, val, origId, None, None, None)
+
+
+def parseMMPF(sourceAST):
+	mmpfB = [el for el in sourceAST.body if isinstance(el, ast.FunctionDef) and el.name == "make_mapi_properties_file"][0].body
+	xprs = [el for el in mmpfB if isinstance(el, ast.Expr) and isinstance(el.value, ast.Call)]
+	yield from parsePropertiesAppend(xprs)
+	yield from parseAltnamelinesAppend(xprs)
+	yield from parseFWrites(xprs)
+
+
+class OpenChangeSource(Source):
+	__slots__ = ()
+
+	def parseValuesFromSrc(self, src):
+		src = py2to3(ast27.parse(src))
+		for l in extractGroup1OfNames(src)["temporary_private_tags"].splitlines():
+			if l:
+				yield parseValueFromSourceEnumStr(l)
+		yield from parseMMPF(src)
+
+
+OpenChange = OpenChangeSource("OpenChange", "makepropslist.py", "https://raw.githubusercontent.com/zentyal/openchange/master/script/makepropslist.py", "GPL-3.0")
diff --git a/MAPITagsScraper/sources/ptags.py b/MAPITagsScraper/sources/ptags.py
new file mode 100644
index 0000000..087202f
--- /dev/null
+++ b/MAPITagsScraper/sources/ptags.py
@@ -0,0 +1,67 @@
+from ..consts import GitHubRawBase
+from ..KSEnumValue import KSEnumValue
+from ..Source import Source
+from ..nameNormalizer import prepareNamesAndOrigIds
+
+import ast
+import re
+import typing
+
+import simpleeval
+
+defineRx = re.compile("^\\s*#define\\s+(?P<name>\w+)\\s+(?P<payload>.+)\\s*$")
+removeEndNoRx = re.compile("^(\w+?)(?:_\d+)?$")
+
+evaluator = simpleeval.SimpleEval()
+allowedNodeTypes = {ast.Constant, ast.Num, ast.UnaryOp, ast.BinOp, ast.BinOp, ast.Compare, ast.Expr, ast.Tuple, ast.BitOr}
+evaluator.nodes = {nt: cb for nt, cb in evaluator.nodes.items() if nt in allowedNodeTypes}
+
+
+def parseValueFromSourceEnumStr(l: str) -> typing.Optional[KSEnumValue]:
+	m = defineRx.match(l)
+	if m:
+		origId = removeEndNoRx.match(m.group("name")).group(1)
+		payload = m.group("payload")
+		ppld = payload
+		payload = payload.rsplit(")", 1)
+		typ = None
+		valueRaw = None
+
+		if len(payload) > 1:
+			payload = "".join(payload[:-1])
+			if payload:
+				v = payload.replace("PROP_TAG(", "").replace("(ULONG)", "").split(",")
+				if len(v) == 2:
+					typ, valueRaw = v
+				else:
+					print(v)
+			else:
+				print(l)
+		else:
+			valueRaw = payload[0]
+
+		if valueRaw:
+			try:
+				value = evaluator.eval(valueRaw)
+			except simpleeval.FeatureNotAvailable:
+				pass
+			else:
+				# typ = evaluator.eval(typ)  # not needed
+				origId, name = next(zip(*prepareNamesAndOrigIds([origId])))
+				return KSEnumValue(name, value, origId, None, None, None)
+
+
+class PTagsSource(Source):
+	__slots__ = ()
+
+	def parseValuesFromSrc(self, src: str) -> typing.Iterator[KSEnumValue]:
+		for l in src.splitlines():
+			if not l.startswith("// In File:"):
+				v = parseValueFromSourceEnumStr(l)
+				if v:
+					yield v
+				else:
+					print("ptags line not parsed:", l)
+
+
+ptags = PTagsSource("ptags", "ptags.h", GitHubRawBase + "dbremner/pstviewtool/52f59893ad4390358053541b0257b4a7f2767024/ptags.h", "Likely Apache. The repo contains no license, but the news (https://www.infoq.com/news/2010/05/Outlook-PST-View-Tool-and-SDK/, also https://web.archive.org/web/20140704101722/http://www.microsoft.com/en-us/news/press/2010/may10/05-24psttoolspr.aspx) claim that this tool and https://github.com/enrondata/pstsdk were published under Apache. Looks plausible since both software were authored by Terry Mahaffey (psviewtool has user name terrymah (though without a proper email) in git commits, likely the same guy as https://github.com/terrymah, pstsdk has the lines `author Terry Mahaffey`)")
diff --git a/MAPITagsScraper/utils/__init__.py b/MAPITagsScraper/utils/__init__.py
new file mode 100644
index 0000000..682c9da
--- /dev/null
+++ b/MAPITagsScraper/utils/__init__.py
@@ -0,0 +1,13 @@
+import typing
+
+
+def dedupPreservingOrder(args: typing.Iterable[str]) -> typing.Iterator[str]:
+	dedup = set()
+	for el in args:
+		if el not in dedup:
+			dedup.add(el)
+			yield el
+
+
+def sortedDictByKey(dic):
+	return dic.__class__(sorted(dic.items(), key=lambda x: x[0]))
diff --git a/MAPITagsScraper/utils/yaml.py b/MAPITagsScraper/utils/yaml.py
new file mode 100644
index 0000000..8fd9144
--- /dev/null
+++ b/MAPITagsScraper/utils/yaml.py
@@ -0,0 +1,12 @@
+from pathlib import Path
+
+
+def dumpYaml(ksDocument, outFile: Path):
+	from ruamel.yaml import YAML
+
+	y = YAML(typ="rt")
+	y.width = 100500
+	y.indent(2, 4, 2)
+
+	with outFile.open("wt", encoding="utf-8") as f:
+		y.dump(ksDocument, f)
diff --git a/ReadMe.md b/ReadMe.md
new file mode 100644
index 0000000..717f44e
--- /dev/null
+++ b/ReadMe.md
@@ -0,0 +1,11 @@
+MAPITagsScraper.py [![Unlicensed work](https://raw.githubusercontent.com/unlicense/unlicense.org/master/static/favicon.png)](https://unlicense.org/)
+===============
+~~[wheel (GitLab)](https://gitlab.com/KOLANICH-tools/MAPITagsScraper.py/-/jobs/artifacts/master/raw/dist/MAPITagsScraper-0.CI-py3-none-any.whl?job=build)~~
+~~[wheel (GHA via `nightly.link`)](https://nightly.link/KOLANICH-tools/MAPITagsScraper.py/workflows/CI/master/MAPITagsScraper-0.CI-py3-none-any.whl)~~
+~~![GitLab Build Status](https://gitlab.com/KOLANICH-tools/MAPITagsScraper.py/badges/master/pipeline.svg)~~
+~~![GitLab Coverage](https://gitlab.com/KOLANICH-tools/MAPITagsScraper.py/badges/master/coverage.svg)~~
+~~[![GitHub Actions](https://github.com/KOLANICH-tools/MAPITagsScraper.py/workflows/CI/badge.svg)](https://github.com/KOLANICH-tools/MAPITagsScraper.py/actions/)~~
+[![Libraries.io Status](https://img.shields.io/librariesio/github/KOLANICH-tools/MAPITagsScraper.py.svg)](https://libraries.io/github/KOLANICH-tools/MAPITagsScraper.py)
+[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://github.com/KOLANICH-tools/antiflash.py)
+
+Extracts MAPI tags from MS docs and other sources.
diff --git a/UNLICENSE b/UNLICENSE
new file mode 100644
index 0000000..efb9808
--- /dev/null
+++ b/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <https://unlicense.org/>
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..7bdab85
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,50 @@
+[build-system]
+requires = ["setuptools>=61.2.0", "wheel", "setuptools_scm[toml]>=3.4.3"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "MAPITagsScraper"
+authors = [{name = "KOLANICH"}]
+description = "Extracts MAPI tags identifiers from MS docs"
+readme = "ReadMe.md"
+keywords = ["MAPITagsScraper"]
+license = {text = "Unlicense"}
+classifiers = [
+	"Programming Language :: Python",
+	"Programming Language :: Python :: 3",
+	"Development Status :: 4 - Beta",
+	"Environment :: Other Environment",
+	"Intended Audience :: Developers",
+	"License :: Public Domain",
+	"Operating System :: OS Independent",
+	"Topic :: Software Development :: Libraries :: Python Modules",
+]
+requires-python = ">=3.4"
+dependencies = [
+	"WordSplitAbs @ https://codeberg.org/KOLANICH-libs/WordSplitAbs.py",
+	"commonmark @ git+https://github.com/readthedocs/commonmark.py",
+	"recommonmark @ git+https://github.com/readthedocs/recommonmark.git",
+	"docutils",
+	"dom_query @ git+https://gitlab.com/geusebi/dom_query.git",
+	"inflection @ git+https://github.com/jpvanhal/inflection.git",
+	"ruamel.yaml",
+]
+dynamic = ["version"]
+
+[project.optional-dependencies]
+header = ["simpleeval @ git+https://github.com/danthedeckie/simpleeval.git"]
+
+[project.urls]
+Homepage = "https://codeberg.org/KOLANICH-tools/MAPITagsScraper.py"
+
+[project.scripts]
+MAPITagsScraper = "MAPITagsScraper.__main__:CLI.run"
+
+[tool.setuptools]
+zip-safe = true
+include-package-data = false
+
+[tool.setuptools.packages]
+find = {namespaces = false}
+
+[tool.setuptools_scm]