Automatically fetch windows profile from Microsoft when required. Thi…

…s prevents Rekall appearing broken immediately after a windows update because users will still get it to work it will just take a few more minutes while profiles are fetched from MS. This is the same as how the windows debugger does it. Diffbased off https://codereview.appspot.com/223800043/ [email protected] Review URL: https://codereview.appspot.com/222920043
google · Mar 31, 2015 · aac2965 · aac2965
1 parent c1f4555
commit aac2965
Show file tree

Hide file tree

Showing 14 changed files with 269 additions and 200 deletions.
diff --git a/rekall/args.py b/rekall/args.py
@@ -368,6 +368,9 @@ def ConfigureCommandLineParser(command_metadata, parser, critical=False):
             kwargs["nargs"] = "+" if required else "*"
             kwargs["choices"] = list(kwargs["choices"])
 
+        elif arg_type == "Choices":
+            kwargs["choices"] = list(kwargs["choices"])
+
         # Skip option if not critical.
         critical_arg = kwargs.pop("critical", False)
         if critical and critical_arg:

diff --git a/rekall/constants.py b/rekall/constants.py
@@ -18,7 +18,7 @@
 #
 import time
 
-VERSION = "1.3.0"
+VERSION = "1.3.1"
 CODENAME = "Dammastock"
 SCAN_BLOCKSIZE = 1024 * 1024 * 10
 

diff --git a/rekall/plugins/guess_profile.py b/rekall/plugins/guess_profile.py
@@ -64,6 +64,21 @@ def Keywords(self):
 
     def VerifyProfile(self, profile_name):
         profile = self.session.LoadProfile(profile_name)
+
+        # If the user allows it we can just try to fetch and build the profile
+        # locally.
+        if profile == None and self.session.GetParameter(
+                "autodetect_build_local") in ("full", "basic"):
+            build_local_profile = self.session.plugins.build_local_profile()
+            try:
+                logging.debug("Will build local profile %s", profile_name)
+                build_local_profile.fetch_and_parse(profile_name)
+                profile = self.session.LoadProfile(
+                    profile_name, use_cache=False)
+
+            except IOError:
+                pass
+
         if profile != None:
             return self._ApplyFindDTB(self.find_dtb_impl, profile)
 
@@ -115,6 +130,12 @@ def DetectFromHit(self, hit, file_offset, address_space):
                      " (Default 1.0)",
                      type="Float")
 
+config.DeclareOption("autodetect_build_local", default="basic",
+                     group="Autodetection Overrides",
+                     choices=["full", "basic", "none"],
+                     help="Attempts to fetch and build profile locally.",
+                     type="Choices")
+
 config.DeclareOption("autodetect_scan_length", default=1000000000,
                      group="Autodetection Overrides",
                      help="How much of physical memory to scan before failing")

diff --git a/rekall/plugins/tools/caching_url_manager.py b/rekall/plugins/tools/caching_url_manager.py
@@ -41,10 +41,14 @@
     help="Location of the profile cache directory.")
 
 
-class CachingURLManager(io_manager.IOManager):
+class CachingManager(io_manager.IOManager):
+
+    # We wrap this io manager class
+    DELEGATE = io_manager.URLManager
+
     # If the cache is available we should be selected before the regular
-    # URLManager.
-    order = io_manager.URLManager.order - 10
+    # manager.
+    order = DELEGATE.order - 10
 
     def __init__(self, session=None, **kwargs):
         cache_dir = session.GetParameter("cache_dir")
@@ -67,11 +71,11 @@ def __init__(self, session=None, **kwargs):
 
         # We use an IO manager to manage the cache directory directly.
         self.cache_io_manager = io_manager.DirectoryIOManager(urn=cache_dir)
-        self.url_manager = io_manager.URLManager(session=session, **kwargs)
+        self.url_manager = self.DELEGATE(session=session, **kwargs)
 
         self.CheckUpstreamRepository()
 
-        super(CachingURLManager, self).__init__(session=session, **kwargs)
+        super(CachingManager, self).__init__(session=session, **kwargs)
 
     def __str__(self):
         return "Local Cache %s" % self.cache_io_manager
@@ -118,3 +122,7 @@ def CheckUpstreamRepository(self):
 
         if modified:
             self.cache_io_manager.FlushInventory()
+
+
+class CacheDirectoryManager(CachingManager):
+    DELEGATE = io_manager.DirectoryIOManager
diff --git a/rekall/plugins/tools/ipython.py b/rekall/plugins/tools/ipython.py
@@ -168,8 +168,8 @@ def args(cls, parser):
                             help="The name of a program to page output "
                             "(e.g. notepad or less).")
 
-    def __init__(self, session=None, **kwargs):
-        super(SessionMod, self).__init__(session=session)
+    def __init__(self, **kwargs):
+        super(SessionMod, self).__init__(session=kwargs.pop("session"))
         self.kwargs = kwargs
 
     def render(self, renderer):

diff --git a/rekall/plugins/tools/mspdb.py b/rekall/plugins/tools/mspdb.py
@@ -88,6 +88,7 @@
 import logging
 import ntpath
 import os
+import platform
 import subprocess
 import urllib2
 
@@ -157,32 +158,68 @@ def render(self, renderer):
             self.pdb_filename = self.filename
             self.guid = self.guid.upper()
 
+        # Write the file data to the renderer.
+        pdb_file_data = self.FetchPDBFile(self.pdb_filename, self.guid)
+        with renderer.open(filename=self.pdb_filename,
+                           directory=self.dump_dir,
+                           mode="wb") as fd:
+            fd.write(pdb_file_data)
+
+    def FetchPDBFile(self, pdb_filename, guid):
+        # Ensure the pdb filename has the correct extension.
+        if not pdb_filename.endswith(".pdb"):
+            pdb_filename += ".pdb"
+
         for url in self.SYM_URLS:
-            basename = ntpath.splitext(self.pdb_filename)[0]
-            url += "/%s/%s/%s.pd_" % (self.pdb_filename,
-                                      self.guid, basename)
+            basename = ntpath.splitext(pdb_filename)[0]
+            url += "/%s/%s/%s.pd_" % (pdb_filename, guid, basename)
 
-            renderer.format("Trying to fetch {0}\n", url)
+            self.session.report_progress("Trying to fetch %s\n", url)
             request = urllib2.Request(url, None, headers={
                 'User-Agent': self.USER_AGENT})
 
-            data = urllib2.urlopen(request).read()
-            renderer.format("Received {0} bytes\n", len(data))
+            url_handler = urllib2.urlopen(request)
+            with utils.TempDirectory() as temp_dir:
+                compressed_output_file = os.path.join(
+                    temp_dir, "%s.pd_" % basename)
 
-            output_file = "%s.pd_" % basename
-            with renderer.open(filename=output_file,
-                               directory=self.dump_dir,
-                               mode="wb") as fd:
-                fd.write(data)
+                output_file = os.path.join(
+                    temp_dir, "%s.pdb" % basename)
 
-            try:
-                subprocess.check_call(["cabextract",
-                                       os.path.basename(output_file)],
-                                      cwd=self.dump_dir)
-            except subprocess.CalledProcessError:
-                renderer.report_error(
-                    "Failed to decompress output file {0}. "
-                    "Ensure cabextract is installed.\n", output_file)
+                # Download the compressed file to a temp file.
+                with open(compressed_output_file, "wb") as outfd:
+                    while True:
+                        data = url_handler.read(8192)
+                        if not data:
+                            break
+
+                        outfd.write(data)
+                        self.session.report_progress(
+                            "%s: Downloaded %s bytes", basename, outfd.tell())
+
+                # Now try to decompress it with system tools. This might fail.
+                try:
+                    if platform.system() == "Windows":
+                        # This should already be installed on windows systems.
+                        subprocess.check_call(
+                            ["expand", compressed_output_file, output_file],
+                            cwd=self.dump_dir)
+                    else:
+                        # In Linux we just hope the cabextract program was
+                        # installed.
+                        subprocess.check_call(
+                            ["cabextract", compressed_output_file],
+                            cwd=self.dump_dir)
+
+                except subprocess.CalledProcessError:
+                    raise RuntimeError(
+                        "Failed to decompress output file %s. "
+                        "Ensure cabextract is installed.\n" % output_file)
+
+                # We read the entire file into memory here - it should not be
+                # larger than approximately 10mb.
+                with open(output_file, "rb") as fd:
+                    return fd.read(50 * 1024 * 1024)
 
 
 class TestFetchPDB(testlib.DisabledTest):
@@ -1044,6 +1081,13 @@ def Resolve(self, idx):
         except KeyError:
             return obj.NoneObject("Index not known")
 
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, trace):
+        self.address_space.close()
+
+
 
 class ParsePDB(core.DirectoryDumperMixin, plugin.Command):
     """Parse the PDB streams."""
@@ -1161,49 +1205,57 @@ def PostProcessVTypes(self, vtypes):
 
         return vtypes
 
+    def parse_pdb(self):
+        with self.tpi:
+            vtypes = {}
+
+            for i, (struct_name, definition) in enumerate(self.tpi.Structs()):
+                self.session.report_progress(
+                    " Exporting %s: %s", i, struct_name)
+
+                struct_name = str(struct_name)
+                existing_definition = vtypes.get(struct_name)
+                if existing_definition:
+                    # Merge the old definition into the new definition.
+                    definition[1].update(existing_definition[1])
+
+                vtypes[struct_name] = definition
+
+            self.metadata.update(dict(
+                ProfileClass=self.profile_class,
+                Type="Profile",
+                PDBFile=os.path.basename(self.filename),
+                ))
+
+            self.metadata.update(self.tpi.metadata)
+
+            # Demangle all constants.
+            demangler = pe_vtypes.Demangler(self.metadata)
+            constants = {}
+            for name, value in self.tpi.constants.iteritems():
+                constants[demangler.DemangleName(name)] = value
+
+            functions = {}
+            for name, value in self.tpi.functions.iteritems():
+                functions[demangler.DemangleName(name)] = value
+
+            vtypes = self.PostProcessVTypes(vtypes)
+
+            result = {
+                "$METADATA": self.metadata,
+                "$STRUCTS": vtypes,
+                "$ENUMS": self.tpi.enums,
+                }
+
+            if not self.concise:
+                result["$REVENUMS"] = self.tpi.rev_enums
+                result["$CONSTANTS"] = constants
+                result["$FUNCTIONS"] = functions
+
+            return result
+
     def render(self, renderer):
-        vtypes = {}
-
-        for i, (struct_name, definition) in enumerate(self.tpi.Structs()):
-            self.session.report_progress(" Exporting %s: %s", i, struct_name)
-            struct_name = str(struct_name)
-            existing_definition = vtypes.get(struct_name)
-            if existing_definition:
-                # Merge the old definition into the new definition.
-                definition[1].update(existing_definition[1])
-
-            vtypes[struct_name] = definition
-
-        self.metadata.update(dict(
-            ProfileClass=self.profile_class,
-            Type="Profile",
-            PDBFile=os.path.basename(self.filename),
-            ))
-
-        self.metadata.update(self.tpi.metadata)
-
-        # Demangle all constants.
-        demangler = pe_vtypes.Demangler(self.metadata)
-        constants = {}
-        for name, value in self.tpi.constants.iteritems():
-            constants[demangler.DemangleName(name)] = value
-
-        functions = {}
-        for name, value in self.tpi.functions.iteritems():
-            functions[demangler.DemangleName(name)] = value
-
-        vtypes = self.PostProcessVTypes(vtypes)
-
-        result = {
-            "$METADATA": self.metadata,
-            "$STRUCTS": vtypes,
-            "$ENUMS": self.tpi.enums,
-            }
-
-        if not self.concise:
-            result["$REVENUMS"] = self.tpi.rev_enums
-            result["$CONSTANTS"] = constants
-            result["$FUNCTIONS"] = functions
+        result = self.parse_pdb()
 
         if self.output_filename:
             with renderer.open(filename=self.output_filename,