Skip to content
This repository has been archived by the owner on Oct 9, 2023. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
KOLANICH committed Oct 9, 2023
0 parents commit 70fc300
Show file tree
Hide file tree
Showing 30 changed files with 1,609 additions and 0 deletions.
Empty file added .ci/aptPackagesToInstall.txt
Empty file.
1 change: 1 addition & 0 deletions .ci/pythonPackagesToInstallFromGit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
https://github.com/KOLANICH-libs/WordSplitAbs.py
12 changes: 12 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
root = true

[*]
charset = utf-8
indent_style = tab
indent_size = 4
insert_final_newline = true
end_of_line = lf

[*.{yml,yaml,ksy}]
indent_style = space
indent_size = 2
1 change: 1 addition & 0 deletions .github/.templateMarker
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
KOLANICH/python_project_boilerplate.py
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
allow:
- dependency-type: "all"
15 changes: 15 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: CI
on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

jobs:
build:
runs-on: ubuntu-20.04
steps:
- name: typical python workflow
uses: KOLANICH-GHActions/typical-python-workflow@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
15 changes: 15 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
__pycache__
*.pyc
*.pyo
/*.egg-info
/cache
#/mapi_tags.ksy
/mapi_tags.py

*.srctrlbm
*.srctrldb
build
dist
.eggs
monkeytype.sqlite3
/.ipynb_checkpoints
51 changes: 51 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest

variables:
DOCKER_DRIVER: overlay2
SAST_ANALYZER_IMAGE_TAG: latest
SAST_DISABLE_DIND: "true"
SAST_CONFIDENCE_LEVEL: 5
CODECLIMATE_VERSION: latest

include:
- template: SAST.gitlab-ci.yml
- template: Code-Quality.gitlab-ci.yml
- template: License-Management.gitlab-ci.yml

build:
tags:
- shared
- linux
stage: build
variables:
GIT_DEPTH: "1"
PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages

before_script:
- export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables`
- apt-get update
# todo:
#- apt-get -y install
#- pip3 install --upgrade
#- python3 ./fix_python_modules_paths.py

script:
- python3 -m build -nw bdist_wheel
- mv ./dist/*.whl ./dist/python_project_boilerplate-0.CI-py3-none-any.whl
- pip3 install --upgrade ./dist/*.whl
- coverage run --source=python_project_boilerplate -m --branch pytest --junitxml=./rspec.xml ./tests/test.py
- coverage report -m
- coverage xml

coverage: /^TOTAL(?:\s+\d+){4}\s+(\d+%).+/

cache:
paths:
- $PYTHONUSERBASE

artifacts:
paths:
- dist
reports:
junit: ./rspec.xml
cobertura: ./coverage.xml
1 change: 1 addition & 0 deletions Code_Of_Conduct.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
No codes of conduct!
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include UNLICENSE
include *.md
include tests
include .editorconfig
38 changes: 38 additions & 0 deletions MAPITagsScraper/DOMUtils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import typing
from io import StringIO
from xml.dom.minidom import Element


def minidom2str(md: Element) -> str:
with StringIO() as f:
md.writexml(f, indent="\t", addindent="\t", newl="\n")
return f.getvalue()


def minidom2bs4(md: Element) -> "bs4.BeautifulSoup":
import bs4

return bs4.BeautifulSoup(minidom2str(md), "lxml")


def getTextFromNodes(node: Element) -> typing.Iterable[str]:
if node.nodeType == node.TEXT_NODE:
yield node.data
else:
for cn in node.childNodes:
yield from getTextFromNodes(cn)


def node2text(node: Element) -> str:
return "".join(getTextFromNodes(node))


def iterNextSiblings(n: Element) -> typing.Iterable[Element]:
while n.nextSibling:
n = n.nextSibling
yield n


def textAfter(n: Element) -> str:
"""Get the text after the teg within the parent element untill its end"""
return "".join(map(node2text, iterNextSiblings(n)))
16 changes: 16 additions & 0 deletions MAPITagsScraper/KSEnumValue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import typing


class KSEnumValue:
__slots__ = ("id", "value", "origIds", "doc", "companion", "subject")

def __init__(self, iD: str, value: int, origIds: typing.Iterable[str], doc: str, companion: str = None, subject: str = None):
self.id = iD
self.value = value
self.origIds = origIds
self.doc = doc
self.companion = companion
self.subject = subject

def __repr__(self):
return self.__class__.__name__ + "(" + ", ".join(repr(getattr(self, k)) for k in self.__class__.__slots__) + ")"
53 changes: 53 additions & 0 deletions MAPITagsScraper/Source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from pathlib import Path


def cachedFetchFile(cacheFile, uri):
cacheFile.parent.mkdir(parents=True, exist_ok=True)

if cacheFile.is_file():
return cacheFile.read_bytes()

import httpx

data = httpx.get(uri).content
cacheFile.write_bytes(data)
return data


class ProtoSource:
__slots__ = ("name", "traditionalFileName")

def __init__(self, name, traditionalFileName):
self.name = name
self.traditionalFileName = traditionalFileName

def fetch(self, fileDir: Path) -> str:
return (fileDir / self.traditionalFileName).read_text(encoding="utf-8")

def parseEnumValues(self, fileDir: Path):
return self.parseValuesFromSrc(self.fetch(fileDir))

def parseValuesFromSrc(self, src):
raise NotImplementedError

def __repr__(self):
return self.__class__.__name__ + "(" + ", ".join(repr(getattr(self, k)) for k in __class__.__slots__) + ")"


class Source(ProtoSource):
__slots__ = ("uri", "license")

def __init__(self, name, cachedFileName, uri, license):
super().__init__(name, cachedFileName)
self.uri = uri
self.license = license

@property
def cachedFileName(self):
return self.traditionalFileName

def fetch(self, cacheDir: Path) -> str:
return cachedFetchFile(cacheDir / self.cachedFileName, self.uri).decode("utf-8")

def __repr__(self):
return self.__class__.__name__ + "(" + ", ".join(repr(getattr(self, k)) for k in (__class__.__slots__ + super().__slots__)) + ")"
51 changes: 51 additions & 0 deletions MAPITagsScraper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3

import re
import typing
from ast import literal_eval
from copy import deepcopy
from io import StringIO
from pathlib import Path
from warnings import warn

from .KSEnumValue import KSEnumValue
from .nameNormalizer import *
from .sources.kaitai import _kaitai
from .utils import dedupPreservingOrder
from .sources.kaitai import SerializingContext

__all__ = ("fullPipeline",)


def getTagsWithNonUniqueNames():
return [(el.id, el.origIds) for el in parsed if "_or_" in el.id]


def normalizeUniqueNames(t):
from .nameNormalizer import convertName

for k in list(t.keys()):
if not isinstance(t[k], str):
if t[k]["id"] == "unkn":
oids = list(dedupPreservingOrder(t[k]["-orig-id"]))
if len(oids) == 1:
v = list(oids)[0]
fv = convertName(v)
t[k]["id"] = fv
t[k]["-orig-id"] = v


def mergeSourceIntoContext(ctx: SerializingContext, s: Source.Source, cacheDir: Path) -> None:
tagsFromSource = s.parseEnumValues(cacheDir)
ctx.enumValues2KSEnumDict(tagsFromSource)
ctx.insertSource(s.uri, s.license)


def fullPipeline(outputDir: Path, sourcesList: typing.Iterable[Source.Source], cacheDir: Path) -> None:
ctx = _kaitai._getCtxFromDir(outputDir)

for s in sourcesList:
mergeSourceIntoContext(ctx, s, cacheDir)

ctx.sortByKey()
ctx.dump(outputDir)
57 changes: 57 additions & 0 deletions MAPITagsScraper/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from pathlib import Path

from plumbum import cli

from .consts import defaultCacheDir
from .sources import sources


sourcesParamValidator = cli.Set(*sources, case_sensitive=True, csv=True)


class CLI(cli.Application):
"""A tool to generate a Kaitai Struct spec with MAPI tags enum"""


@CLI.subcommand("fetch")
class FetchCLI(cli.Application):
"""Just downloads the files into a cache dir"""

def main(self, sourceNames: sourcesParamValidator, cacheDir: str = defaultCacheDir):
cacheDir = Path(cacheDir)

for sourceName in sourceNames:
s = sources[sourceName]
print("Ensuring", s)
s.fetch(cacheDir)


@CLI.subcommand("convert")
class ConvertCLI(cli.Application):
"""Converts the files into Kaitai Struct spec with tag definitions"""

def main(self, sourceNames: sourcesParamValidator, cacheDir: str = defaultCacheDir):
cacheDir = Path(cacheDir)

from . import fullPipeline

sourcesList = [sources[sourceName] for sourceName in sourceNames]

fullPipeline(Path("."), sourcesList, cacheDir)


@CLI.subcommand("check")
class CheckCLI(cli.Application):
"""Just a sanity check to guide manual name assigning"""

def main(self, cacheDir: str = defaultCacheDir):
from pprint import pprint

from . import getTagsWithNonUniqueNames

cacheDir = Path(cacheDir)
pprint(getTagsWithNonUniqueNames(cacheDir))


if __name__ == "__main__":
CLI.run()
5 changes: 5 additions & 0 deletions MAPITagsScraper/consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from pathlib import Path

defaultCacheDir = Path("./cache")
defaultKSYFileName = "mapi_tags.ksy"
GitHubRawBase = "https://raw.githubusercontent.com/"
18 changes: 18 additions & 0 deletions MAPITagsScraper/mapi_tags.template.ksy
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
meta:
id: mapi_tags
title: Outlook MAPI tags
application:
- Microsoft Outlook MAPI
- Microsoft Exchange

doc: |
Outlook MAPI tags are enums values used to identify various types of entities in various formats.
doc-ref:
- https://docs.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxprops/f6ab1613-aefe-447d-a49c-18217230b148
- https://docs.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxocntc/9b636532-9150-4836-9635-9c9b756c9ccf
- https://github.com/hfig/MAPI/blob/master/src/MAPI/Schema/MapiFieldsMessage.yaml # MIT
- https://github.com/hfig/MAPI/blob/master/src/MAPI/Schema/MapiFieldsOther.yaml # MIT
- https://github.com/nektra/outlook-autocomplete/blob/master/OlAutoComplete/nk2props.h # MIT
- https://github.com/stephenegriffin/mfcmapi/blob/151856e6ef5af42368a49a1340060aa58d981e8e/core/interpret/genTagArray.h # MIT
- https://github.com/dbremner/pstviewtool/blob/52f59893ad4390358053541b0257b4a7f2767024/ptags.h # Likely Apache. The repo contains no license, but the news (https://www.infoq.com/news/2010/05/Outlook-PST-View-Tool-and-SDK/, also https://web.archive.org/web/20140704101722/http://www.microsoft.com/en-us/news/press/2010/may10/05-24psttoolspr.aspx) claim that this tool and https://github.com/enrondata/pstsdk were published under Apache. Looks plausible since both software were authored by Terry Mahaffey (psviewtool has user name terrymah (though without a proper email) in git commits, likely the same guy as https://github.com/terrymah, pstsdk has the lines `\author Terry Mahaffey`)
Loading

0 comments on commit 70fc300

Please sign in to comment.