Skip to content

Commit

Permalink
Changes to handle unpaired surrogates in LNK
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jul 29, 2023
1 parent d478f74 commit ea82986
Show file tree
Hide file tree
Showing 12 changed files with 204 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
run: |
add-apt-repository -y ppa:gift/dev
apt-get update -q
apt-get install -y build-essential python3 python3-dev libbde-python3 libcaes-python3 libcreg-python3 libesedb-python3 libevt-python3 libevtx-python3 libewf-python3 libfsapfs-python3 libfsext-python3 libfsfat-python3 libfshfs-python3 libfsntfs-python3 libfsxfs-python3 libfvde-python3 libfwnt-python3 libfwsi-python3 liblnk-python3 libluksde-python3 libmodi-python3 libmsiecf-python3 libolecf-python3 libphdi-python3 libqcow-python3 libregf-python3 libscca-python3 libsigscan-python3 libsmdev-python3 libsmraw-python3 libvhdi-python3 libvmdk-python3 libvsgpt-python3 libvshadow-python3 libvslvm-python3 python3-acstore python3-artifacts python3-bencode python3-certifi python3-cffi-backend python3-chardet python3-cryptography python3-dateutil python3-defusedxml python3-dfdatetime python3-dfvfs python3-dfwinreg python3-distutils python3-dtfabric python3-fakeredis python3-flor python3-future python3-idna python3-lz4 python3-mock python3-opensearch python3-pefile python3-psutil python3-pyparsing python3-pytsk3 python3-pyxattr python3-redis python3-requests python3-setuptools python3-six python3-tz python3-urllib3 python3-xlsxwriter python3-yaml python3-yara python3-zmq
apt-get install -y build-essential python3 python3-dev libbde-python3 libcaes-python3 libcreg-python3 libesedb-python3 libevt-python3 libevtx-python3 libewf-python3 libfsapfs-python3 libfsext-python3 libfsfat-python3 libfshfs-python3 libfsntfs-python3 libfsxfs-python3 libfvde-python3 libfwnt-python3 libfwsi-python3 liblnk-python3 libluksde-python3 libmodi-python3 libmsiecf-python3 libolecf-python3 libphdi-python3 libqcow-python3 libregf-python3 libscca-python3 libsigscan-python3 libsmdev-python3 libsmraw-python3 libssl-dev libvhdi-python3 libvmdk-python3 libvsgpt-python3 libvshadow-python3 libvslvm-python3 python3-acstore python3-artifacts python3-bencode python3-certifi python3-cffi-backend python3-chardet python3-cryptography python3-dateutil python3-defusedxml python3-dfdatetime python3-dfvfs python3-dfwinreg python3-distutils python3-dtfabric python3-fakeredis python3-flor python3-future python3-idna python3-lz4 python3-mock python3-opensearch python3-pefile python3-psutil python3-pyparsing python3-pytsk3 python3-pyxattr python3-redis python3-requests python3-setuptools python3-six python3-tz python3-urllib3 python3-xlsxwriter python3-yaml python3-yara python3-zmq
- name: Run tests
env:
LANG: en_US.UTF-8
Expand Down
2 changes: 1 addition & 1 deletion config/dpkg/control
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Description: Data files for plaso (log2timeline)

Package: python3-plaso
Architecture: all
Depends: plaso-data (>= ${binary:Version}), libbde-python3 (>= 20220121), libcaes-python3 (>= 20221127), libcreg-python3 (>= 20200725), libesedb-python3 (>= 20220806), libevt-python3 (>= 20191104), libevtx-python3 (>= 20220724), libewf-python3 (>= 20131210), libfsapfs-python3 (>= 20201107), libfsext-python3 (>= 20220112), libfsfat-python3 (>= 20220816), libfshfs-python3 (>= 20220115), libfsntfs-python3 (>= 20211229), libfsxfs-python3 (>= 20220113), libfvde-python3 (>= 20220121), libfwnt-python3 (>= 20210717), libfwsi-python3 (>= 20150606), liblnk-python3 (>= 20230205), libluksde-python3 (>= 20220121), libmodi-python3 (>= 20210405), libmsiecf-python3 (>= 20150314), libolecf-python3 (>= 20151223), libphdi-python3 (>= 20220110), libqcow-python3 (>= 20201213), libregf-python3 (>= 20201002), libscca-python3 (>= 20190605), libsigscan-python3 (>= 20230109), libsmdev-python3 (>= 20140529), libsmraw-python3 (>= 20140612), libvhdi-python3 (>= 20201014), libvmdk-python3 (>= 20140421), libvsgpt-python3 (>= 20211115), libvshadow-python3 (>= 20160109), libvslvm-python3 (>= 20160109), python3-acstore (>= 20230519), python3-artifacts (>= 20220219), python3-bencode, python3-certifi (>= 2016.9.26), python3-cffi-backend (>= 1.9.1), python3-chardet (>= 2.0.1), python3-cryptography (>= 2.0.2), python3-dateutil (>= 1.5), python3-defusedxml (>= 0.5.0), python3-dfdatetime (>= 20221112), python3-dfvfs (>= 20230407), python3-dfwinreg (>= 20211207), python3-dtfabric (>= 20230518), python3-flor (>= 1.1.3), python3-future (>= 0.16.0), python3-idna (>= 2.5), python3-lz4 (>= 0.10.0), python3-opensearch, python3-pefile (>= 2021.5.24), python3-psutil (>= 5.4.3), python3-pyparsing (>= 2.4.2), python3-pytsk3 (>= 20210419), python3-pyxattr (>= 0.7.2), python3-redis (>= 3.4), python3-requests (>= 2.18.0), python3-six (>= 1.1.0), python3-tz, python3-urllib3 (>= 1.21.1), python3-xlsxwriter (>= 0.9.3), python3-yaml (>= 3.10), python3-yara (>= 3.4.0), python3-zmq (>= 2.1.11), ${misc:Depends}
Depends: plaso-data (>= ${binary:Version}), libbde-python3 (>= 20220121), libcaes-python3 (>= 20221127), libcreg-python3 (>= 20200725), libesedb-python3 (>= 20220806), libevt-python3 (>= 20191104), libevtx-python3 (>= 20220724), libewf-python3 (>= 20131210), libfsapfs-python3 (>= 20201107), libfsext-python3 (>= 20220112), libfsfat-python3 (>= 20220816), libfshfs-python3 (>= 20220115), libfsntfs-python3 (>= 20211229), libfsxfs-python3 (>= 20220113), libfvde-python3 (>= 20220121), libfwnt-python3 (>= 20210717), libfwsi-python3 (>= 20230710), liblnk-python3 (>= 20230716), libluksde-python3 (>= 20220121), libmodi-python3 (>= 20210405), libmsiecf-python3 (>= 20150314), libolecf-python3 (>= 20151223), libphdi-python3 (>= 20220110), libqcow-python3 (>= 20201213), libregf-python3 (>= 20201002), libscca-python3 (>= 20190605), libsigscan-python3 (>= 20230109), libsmdev-python3 (>= 20140529), libsmraw-python3 (>= 20140612), libvhdi-python3 (>= 20201014), libvmdk-python3 (>= 20140421), libvsgpt-python3 (>= 20211115), libvshadow-python3 (>= 20160109), libvslvm-python3 (>= 20160109), python3-acstore (>= 20230519), python3-artifacts (>= 20220219), python3-bencode, python3-certifi (>= 2016.9.26), python3-cffi-backend (>= 1.9.1), python3-chardet (>= 2.0.1), python3-cryptography (>= 2.0.2), python3-dateutil (>= 1.5), python3-defusedxml (>= 0.5.0), python3-dfdatetime (>= 20221112), python3-dfvfs (>= 20230407), python3-dfwinreg (>= 20211207), python3-dtfabric (>= 20230518), python3-flor (>= 1.1.3), python3-future (>= 0.16.0), python3-idna (>= 2.5), python3-lz4 (>= 0.10.0), python3-opensearch, python3-pefile (>= 2021.5.24), python3-psutil (>= 5.4.3), python3-pyparsing (>= 2.4.2), python3-pytsk3 (>= 20210419), python3-pyxattr (>= 0.7.2), python3-redis (>= 3.4), python3-requests (>= 2.18.0), python3-six (>= 1.1.0), python3-tz, python3-urllib3 (>= 1.21.1), python3-xlsxwriter (>= 0.9.3), python3-yaml (>= 3.10), python3-yara (>= 3.4.0), python3-zmq (>= 2.1.11), ${misc:Depends}
Description: Python 3 module of plaso (log2timeline)
Plaso (log2timeline) is a framework to create super timelines. Its
purpose is to extract timestamps from various files found on typical
Expand Down
1 change: 1 addition & 0 deletions config/linux/ubuntu_install_plaso.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ PYTHON_DEPENDENCIES="libbde-python3
libsigscan-python3
libsmdev-python3
libsmraw-python3
libssl-dev
libvhdi-python3
libvmdk-python3
libvsgpt-python3
Expand Down
4 changes: 2 additions & 2 deletions dependencies.ini
Original file line number Diff line number Diff line change
Expand Up @@ -252,15 +252,15 @@ version_property: get_version()
[pyfwsi]
dpkg_name: libfwsi-python3
l2tbinaries_name: libfwsi
minimum_version: 20150606
minimum_version: 20230710
pypi_name: libfwsi-python
rpm_name: libfwsi-python3
version_property: get_version()

[pylnk]
dpkg_name: liblnk-python3
l2tbinaries_name: liblnk
minimum_version: 20230205
minimum_version: 20230716
pypi_name: liblnk-python
rpm_name: liblnk-python3
version_property: get_version()
Expand Down
4 changes: 2 additions & 2 deletions plaso/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@
'pyfsxfs': ('get_version()', '20220113', None, True),
'pyfvde': ('get_version()', '20220121', None, True),
'pyfwnt': ('get_version()', '20210717', None, True),
'pyfwsi': ('get_version()', '20150606', None, True),
'pylnk': ('get_version()', '20230205', None, True),
'pyfwsi': ('get_version()', '20230710', None, True),
'pylnk': ('get_version()', '20230716', None, True),
'pyluksde': ('get_version()', '20220121', None, True),
'pymodi': ('get_version()', '20210405', None, True),
'pymsiecf': ('get_version()', '20150314', None, True),
Expand Down
86 changes: 57 additions & 29 deletions plaso/lib/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,33 @@
MICROSECONDS_PER_MINUTE = 60000000
NANOSECONDS_PER_SECOND = 1000000000

SOURCE_TYPE_ARCHIVE = 'archive'
# Characters that are considered non-printable Unicode characters.
NON_PRINTABLE_CHARACTERS = {}

# Escape C0 control characters as \x##
NON_PRINTABLE_CHARACTERS.update({
value: f'\\x{value:02x}' for value in range(0, 0x20)})

# Escape C1 control character as \x##
NON_PRINTABLE_CHARACTERS.update({
value: f'\\x{value:02x}' for value in range(0x7f, 0xa0)})

# Escape Unicode surrogate characters as \U########
NON_PRINTABLE_CHARACTERS.update({
value: f'\\U{value:08x}' for value in range(0xd800, 0xe000)})

NON_PRINTABLE_CHARACTER_TRANSLATION_TABLE = str.maketrans(
NON_PRINTABLE_CHARACTERS)

# Compression formats.
COMPRESSION_FORMAT_NONE = 'none'
COMPRESSION_FORMAT_ZLIB = 'zlib'

COMPRESSION_FORMATS = frozenset([
COMPRESSION_FORMAT_NONE,
COMPRESSION_FORMAT_ZLIB])

# Default worker process memory limit of 2 GiB.
DEFAULT_WORKER_MEMORY_LIMIT = 2048 * 1024 * 1024

# Consider a worker process inactive after 15 minutes of no status updates.
DEFAULT_WORKER_TIMEOUT = 15.0 * 60.0

FAILURE_MODE_EXHAUST_MEMORY = 'exhaust_memory'
FAILURE_MODE_NOT_RESPONDING = 'not_responding'
FAILURE_MODE_TERMINATED = 'terminated'
FAILURE_MODE_TIME_OUT = 'time_out'

# Operating system families.
OPERATING_SYSTEM_FAMILY_LINUX = 'Linux'
OPERATING_SYSTEM_FAMILY_MACOS = 'MacOS'
OPERATING_SYSTEM_FAMILY_UNKNOWN = 'Unknown'
Expand All @@ -40,10 +47,47 @@
OPERATING_SYSTEM_FAMILY_WINDOWS_9x,
OPERATING_SYSTEM_FAMILY_WINDOWS_NT])

# Serialization formats.
SERIALIZER_FORMAT_JSON = 'json'

SERIALIZER_FORMATS = frozenset([SERIALIZER_FORMAT_JSON])

# Source types.
SOURCE_TYPE_ARCHIVE = 'archive'

# Storage formats.
STORAGE_FORMAT_SQLITE = 'sqlite'
STORAGE_FORMAT_REDIS = 'redis'

SESSION_STORAGE_FORMATS = frozenset([STORAGE_FORMAT_SQLITE])
TASK_STORAGE_FORMATS = frozenset([STORAGE_FORMAT_SQLITE, STORAGE_FORMAT_REDIS])

DEFAULT_STORAGE_FORMAT = STORAGE_FORMAT_SQLITE

# Storage types.

# The session storage contains the results of one or more sessions.
# A typical session is a single run of a tool (log2timeline.py).
# The task storage contains the results of one or more tasks. Tasks
# are used to split work within a session. A typical task is a single
# run of a worker process.

STORAGE_TYPE_SESSION = 'session'
STORAGE_TYPE_TASK = 'task'

STORAGE_TYPES = frozenset([STORAGE_TYPE_SESSION, STORAGE_TYPE_TASK])

# Default worker process memory limit of 2 GiB.
DEFAULT_WORKER_MEMORY_LIMIT = 2048 * 1024 * 1024

# Consider a worker process inactive after 15 minutes of no status updates.
DEFAULT_WORKER_TIMEOUT = 15.0 * 60.0

FAILURE_MODE_EXHAUST_MEMORY = 'exhaust_memory'
FAILURE_MODE_NOT_RESPONDING = 'not_responding'
FAILURE_MODE_TERMINATED = 'terminated'
FAILURE_MODE_TIME_OUT = 'time_out'

STATUS_INDICATOR_ABORTED = 'aborted'
STATUS_INDICATOR_ANALYZING = 'analyzing'
STATUS_INDICATOR_COLLECTING = 'collecting'
Expand All @@ -69,23 +113,7 @@
STATUS_INDICATOR_NOT_RESPONDING,
STATUS_INDICATOR_KILLED])

STORAGE_FORMAT_SQLITE = 'sqlite'
STORAGE_FORMAT_REDIS = 'redis'

SESSION_STORAGE_FORMATS = frozenset([STORAGE_FORMAT_SQLITE])
TASK_STORAGE_FORMATS = frozenset([STORAGE_FORMAT_SQLITE, STORAGE_FORMAT_REDIS])

DEFAULT_STORAGE_FORMAT = STORAGE_FORMAT_SQLITE

# The session storage contains the results of one or more sessions.
# A typical session is a single run of a tool (log2timeline.py).
# The task storage contains the results of one or more tasks. Tasks
# are used to split work within a session. A typical task is a single
# run of a worker process.
STORAGE_TYPE_SESSION = 'session'
STORAGE_TYPE_TASK = 'task'

STORAGE_TYPES = frozenset([STORAGE_TYPE_SESSION, STORAGE_TYPE_TASK])
# Time descriptions.

TIME_DESCRIPTION_ADDED = 'Added Time'
TIME_DESCRIPTION_BACKUP = 'Backup Time'
Expand Down
36 changes: 28 additions & 8 deletions plaso/parsers/shared/shell_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,17 @@

from plaso.containers import windows_events
from plaso.helpers.windows import shell_folders
from plaso.lib import definitions


class ShellItemsParser(object):
"""Parses for Windows NT shell items."""

NAME = 'shell_items'

_PATH_ESCAPE_CHARACTERS = {'\\': '\\\\'}
_PATH_ESCAPE_CHARACTERS.update(definitions.NON_PRINTABLE_CHARACTERS)

def __init__(self, origin):
"""Initializes the parser.
Expand All @@ -22,6 +26,7 @@ def __init__(self, origin):
"""
super(ShellItemsParser, self).__init__()
self._origin = origin
self._path_escape_characters = str.maketrans(self._PATH_ESCAPE_CHARACTERS)
self._path_segments = []

def _GetDateTime(self, fat_date_time):
Expand All @@ -38,6 +43,20 @@ def _GetDateTime(self, fat_date_time):

return dfdatetime_fat_date_time.FATDateTime(fat_date_time=fat_date_time)

def _GetSanitizedPathString(self, path):
"""Retrieves a sanitize path string.
Args:
path (str): path.
Returns:
str: sanitized path string.
"""
if not path:
return None

return path.translate(self._path_escape_characters)

def _ParseShellItem(self, parser_mediator, shell_item):
"""Parses a shell item.
Expand All @@ -54,7 +73,7 @@ def _ParseShellItem(self, parser_mediator, shell_item):
event_data = windows_events.WindowsShellItemFileEntryEventData()
event_data.modification_time = self._GetDateTime(
shell_item.get_modification_time_as_integer())
event_data.name = shell_item.name
event_data.name = self._GetSanitizedPathString(shell_item.name)
event_data.origin = self._origin
event_data.shell_item_path = self.CopyToPath()

Expand All @@ -72,7 +91,8 @@ def _ParseShellItem(self, parser_mediator, shell_item):
extension_block.get_creation_time_as_integer())
event_data.file_reference = file_reference
event_data.localized_name = extension_block.localized_name
event_data.long_name = extension_block.long_name
event_data.long_name = self._GetSanitizedPathString(
extension_block.long_name)

# TODO: change to generate an event_data for each extension block.
if (event_data.access_time or event_data.creation_time or
Expand Down Expand Up @@ -109,20 +129,20 @@ def _ParseShellItemPathSegment(self, shell_item):

elif isinstance(shell_item, pyfwsi.volume):
if shell_item.name:
path_segment = shell_item.name
path_segment = self._GetSanitizedPathString(shell_item.name)
elif shell_item.identifier:
path_segment = '{{{0:s}}}'.format(shell_item.identifier)

elif isinstance(shell_item, pyfwsi.file_entry):
long_name = ''
for extension_block in shell_item.extension_blocks:
if isinstance(extension_block, pyfwsi.file_entry_extension):
long_name = extension_block.long_name
long_name = self._GetSanitizedPathString(extension_block.long_name)

if long_name:
path_segment = long_name
elif shell_item.name:
path_segment = shell_item.name
path_segment = self._GetSanitizedPathString(shell_item.name)

elif isinstance(shell_item, pyfwsi.network_location):
if shell_item.location:
Expand Down Expand Up @@ -151,16 +171,16 @@ def CopyToPath(self):
number_of_path_segments -= 1
for path_segment in self._path_segments[1:]:
# Remove a trailing \ except for the last path segment.
if path_segment.endswith('\\') and number_of_path_segments > 1:
path_segment = path_segment[:-1]
if path_segment.endswith('\\\\') and number_of_path_segments > 1:
path_segment = path_segment[:-2]

if ((path_segment.startswith('<') and path_segment.endswith('>')) or
len(strings) == 1):
strings.append(' {0:s}'.format(path_segment))
elif path_segment.startswith('\\'):
strings.append('{0:s}'.format(path_segment))
else:
strings.append('\\{0:s}'.format(path_segment))
strings.append('\\\\{0:s}'.format(path_segment))
number_of_path_segments -= 1

return ''.join(strings)
Expand Down
Loading

0 comments on commit ea82986

Please sign in to comment.