Skip to content

Commit

Permalink
Add capability for persisting the state of IkvmFileIdentityUtil and I…
Browse files Browse the repository at this point in the history
…kvmAssemblyInfoUtil between runs of the tasks that use them. This should cut down on run time, preventing examination of files between runs that have already been examined. Does require that we retrieve write times each pass. But, even though these aren't great, they're still better than otherwise.
  • Loading branch information
wasabii committed Oct 15, 2023
1 parent 8b9865f commit b89e06e
Show file tree
Hide file tree
Showing 8 changed files with 347 additions and 40 deletions.
34 changes: 34 additions & 0 deletions src/IKVM.MSBuild.Tasks.Tests/IkvmFileIdentityUtilTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;

using FluentAssertions;

using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace IKVM.MSBuild.Tasks.Tests
{

[TestClass]
public class IkvmFileIdentityUtilTests
{

[TestMethod]
public async Task CanSaveState()
{
var u = new IkvmFileIdentityUtil(new IkvmAssemblyInfoUtil());

var f = Path.GetTempFileName();
File.WriteAllText(f, "TEST");
var i = await u.GetIdentityForFileAsync(f, CancellationToken.None);

var x = new XElement("Test");
await u.SaveStateXmlAsync(x);

x.Should().HaveElement("File").Which.Should().HaveAttribute("Path", f).And.HaveAttribute("Identity", i);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public void CanPrepare(string tfm, string targetFrameworkIdentifier, string targ
t.BuildEngine = engine.Object;
t.ToolFramework = tfm;
t.ToolVersion = "";
t.StateFile = Path.GetTempFileName();
t.Items = a.ToArray();
t.References = a.ToArray();
t.Execute().Should().BeTrue();
Expand Down
96 changes: 87 additions & 9 deletions src/IKVM.MSBuild.Tasks/IkvmAssemblyInfoUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,21 @@
using System.Reflection.Metadata;
using System.Reflection.PortableExecutable;
using System.Threading.Tasks;
using System.Xml.Linq;
using System.Xml.Serialization;

public class IkvmAssemblyInfoUtil
{

const string XML_ASSEMBLY_ELEMENT_NAME = "Assembly";
const string XML_PATH_ATTRIBUTE_NAME = "Path";
const string XML_LAST_WRITE_TIME_UTC_ATTRIBUTE_NAME = "LastWriteTimeUtc";
const string XML_ASSEMBLY_INFO_ELEMENT_NAME = "AssemblyInfo";

/// <summary>
/// Defines the cached information per assembly.
/// </summary>
[XmlRoot(XML_ASSEMBLY_INFO_ELEMENT_NAME)]
public struct AssemblyInfo
{

Expand All @@ -25,7 +33,7 @@ public struct AssemblyInfo
/// <param name="name"></param>
/// <param name="mvid"></param>
/// <param name="references"></param>
public AssemblyInfo(string name, Guid mvid, IList<string> references)
public AssemblyInfo(string name, Guid mvid, List<string> references)
{
Name = name;
Mvid = mvid;
Expand All @@ -35,21 +43,27 @@ public AssemblyInfo(string name, Guid mvid, IList<string> references)
/// <summary>
/// Name of the assembly.
/// </summary>
[XmlAttribute("Name")]
public string Name { get; set; }

/// <summary>
/// Gets the MVID of the assembly.
/// </summary>
[XmlAttribute("Mvid")]
public Guid Mvid { get; set; }

/// <summary>
/// Names of the references of the assembly.
/// </summary>
public IList<string> References { get; set; }
[XmlElement("Reference")]
public List<string> References { get; set; }

}

readonly ConcurrentDictionary<string, Task<AssemblyInfo?>> assemblyInfoCache = new();
readonly static XmlSerializer assemblyInfoSerializer = new XmlSerializer(typeof(AssemblyInfo));

readonly Dictionary<string, (DateTime LastWriteTimeUtc, AssemblyInfo? Info)> state = new();
readonly ConcurrentDictionary<string, Task<(DateTime LastWriteTimeUtc, AssemblyInfo? Info)>> cache = new();

/// <summary>
/// Initializes a new instance.
Expand All @@ -59,35 +73,99 @@ public IkvmAssemblyInfoUtil()

}

/// <summary>
/// Loads a previously saved XML element representing the stored state.
/// </summary>
/// <param name="root"></param>
/// <returns></returns>
public void LoadStateXml(XElement root)
{
if (root == null)
throw new ArgumentNullException(nameof(root));

foreach (var element in root.Elements(XML_ASSEMBLY_ELEMENT_NAME))
{
var path = (string)element.Attribute(XML_PATH_ATTRIBUTE_NAME);
if (path == null)
continue;

var lastWriteTimeUtc = (DateTime?)element.Attribute(XML_LAST_WRITE_TIME_UTC_ATTRIBUTE_NAME);
if (lastWriteTimeUtc == null)
continue;

var assemblyInfoXml = new XDocument(element.Element(XML_ASSEMBLY_INFO_ELEMENT_NAME));
if (assemblyInfoXml == null)
continue;

var assemblyInfo = (AssemblyInfo?)assemblyInfoSerializer.Deserialize(assemblyInfoXml.CreateReader());
if (assemblyInfo == null)
continue;

state[path] = (lastWriteTimeUtc.Value, assemblyInfo);
}
}

/// <summary>
/// Saves a new XML element representing the stored state.
/// </summary>
/// <param name="root"></param>
/// <returns></returns>
public async Task SaveStateXmlAsync(XElement root)
{
foreach (var i in cache)
{
var (lastWriteTimeUtc, info) = await i.Value;

// serialize assembly info structure
var infoXmlDoc = new XDocument();
using (var infoXmlWrt = infoXmlDoc.CreateWriter())
assemblyInfoSerializer.Serialize(infoXmlWrt, info);

root.Add(new XElement(XML_ASSEMBLY_ELEMENT_NAME, new XAttribute(XML_PATH_ATTRIBUTE_NAME, i.Key), new XAttribute(XML_LAST_WRITE_TIME_UTC_ATTRIBUTE_NAME, lastWriteTimeUtc), infoXmlDoc.Root));
}
}

/// <summary>
/// Gets the assembly info for the given assembly path.
/// </summary>
/// <param name="path"></param>
/// <returns></returns>
public Task<AssemblyInfo?> GetAssemblyInfoAsync(string path)
public async Task<AssemblyInfo?> GetAssemblyInfoAsync(string path)
{
return assemblyInfoCache.GetOrAdd(path, ReadAssemblyInfoAsync);
if (string.IsNullOrWhiteSpace(path))
throw new ArgumentException($"'{nameof(path)}' cannot be null or whitespace.", nameof(path));
if (File.Exists(path) == false)
throw new FileNotFoundException($"Could not find file '{path}'.");

return (await cache.GetOrAdd(path, CreateAssemblyInfoAsync)).Info;
}

/// <summary>
/// Reads the assembly info from the given assembly path.
/// </summary>
/// <param name="path"></param>
/// <returns></returns>
Task<AssemblyInfo?> ReadAssemblyInfoAsync(string path)
Task<(DateTime LastWriteTimeUtc, AssemblyInfo? Info)> CreateAssemblyInfoAsync(string path)
{
return Task.Run<AssemblyInfo?>(() =>
return Task.Run(() =>
{
var lastWriteTimeUtc = File.GetLastWriteTimeUtc(path);

// check if loaded state contains up to date information
if (state.TryGetValue(path, out var entry))
if (entry.LastWriteTimeUtc == lastWriteTimeUtc)
return (lastWriteTimeUtc, entry.Info);

try
{
using var fsstm = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read);
using var perdr = new PEReader(fsstm);
var mrdr = perdr.GetMetadataReader();
return new AssemblyInfo(mrdr.GetString(mrdr.GetAssemblyDefinition().Name), mrdr.GetGuid(mrdr.GetModuleDefinition().Mvid), mrdr.AssemblyReferences.Select(i => mrdr.GetString(mrdr.GetAssemblyReference(i).Name)).ToList());
return (lastWriteTimeUtc, new AssemblyInfo(mrdr.GetString(mrdr.GetAssemblyDefinition().Name), mrdr.GetGuid(mrdr.GetModuleDefinition().Mvid), mrdr.AssemblyReferences.Select(i => mrdr.GetString(mrdr.GetAssemblyReference(i).Name)).ToList()));
}
catch
{
return null;
return (lastWriteTimeUtc, null);
}
});
}
Expand Down
102 changes: 79 additions & 23 deletions src/IKVM.MSBuild.Tasks/IkvmFileIdentityUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,82 +3,138 @@

using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Xml.Linq;

public class IkvmFileIdentityUtil
{

const string XML_FILE_ELEMENT_NAME = "File";
const string XML_PATH_ATTRIBUTE_NAME = "Path";
const string XML_LAST_WRITE_TIME_UTC_ATTRIBUTE_NAME = "LastWriteTimeUtc";
const string XML_IDENTITY_ATTRIBUTE_NAME = "Identity";

readonly static Regex sha1Regex = new(@"^([\w\-]+)", RegexOptions.Compiled);
readonly static Regex md5Regex = new(@"^([\w\-]+)", RegexOptions.Compiled);

readonly IkvmAssemblyInfoUtil assemblyInfoUtil;
readonly ConcurrentDictionary<string, Task<string>> fileIdentityCache = new();
readonly Dictionary<string, (DateTime LastWriteTimeUtc, string Identity)> state = new();
readonly ConcurrentDictionary<string, Task<(DateTime LastWriteTimeUtc, string Identity)>> cache = new();

/// <summary>
/// Initializes a new instance.
/// </summary>
/// <param name="assemblyInfoUtil"></param>
public IkvmFileIdentityUtil(IkvmAssemblyInfoUtil assemblyInfoUtil)
{
this.assemblyInfoUtil = assemblyInfoUtil ?? throw new ArgumentNullException(nameof(assemblyInfoUtil));
}

/// <summary>
/// Loads a previously saved XML element representing the stored state.
/// </summary>
/// <param name="root"></param>
/// <returns></returns>
public void LoadStateXml(XElement root)
{
if (root == null)
throw new ArgumentNullException(nameof(root));

foreach (var element in root.Elements(XML_FILE_ELEMENT_NAME))
{
var path = (string)element.Attribute(XML_PATH_ATTRIBUTE_NAME);
if (path == null)
continue;

var lastWriteTimeUtc = (DateTime?)element.Attribute(XML_LAST_WRITE_TIME_UTC_ATTRIBUTE_NAME);
if (lastWriteTimeUtc == null)
continue;

var identity = (string)element.Attribute(XML_IDENTITY_ATTRIBUTE_NAME);
if (identity == null)
continue;

state[path] = (lastWriteTimeUtc.Value, identity);
}
}

/// <summary>
/// Saves a new XML element representing the stored state.
/// </summary>
/// <param name="root"></param>
/// <returns></returns>
public async Task SaveStateXmlAsync(XElement root)
{
foreach (var i in cache)
{
var (lastWriteTimeUtc, identity) = await i.Value;
root.Add(new XElement(XML_FILE_ELEMENT_NAME, new XAttribute(XML_PATH_ATTRIBUTE_NAME, i.Key), new XAttribute(XML_LAST_WRITE_TIME_UTC_ATTRIBUTE_NAME, lastWriteTimeUtc), new XAttribute(XML_IDENTITY_ATTRIBUTE_NAME, identity)));
}
}

/// <summary>
/// Gets the hash value for the given file.
/// </summary>
/// <param name="file"></param>
/// <param name="path"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
public Task<string> GetIdentityForFileAsync(string file, CancellationToken cancellationToken)
public async Task<string> GetIdentityForFileAsync(string path, CancellationToken cancellationToken)
{
if (string.IsNullOrWhiteSpace(file))
throw new ArgumentException($"'{nameof(file)}' cannot be null or whitespace.", nameof(file));
if (File.Exists(file) == false)
throw new FileNotFoundException($"Could not find file '{file}'.");
if (string.IsNullOrWhiteSpace(path))
throw new ArgumentException($"'{nameof(path)}' cannot be null or whitespace.", nameof(path));
if (File.Exists(path) == false)
throw new FileNotFoundException($"Could not find file '{path}'.");

return fileIdentityCache.GetOrAdd(file, CreateIdentityForFileAsync);
return (await cache.GetOrAdd(path, CreateIdentityForFileAsync)).Identity;
}

/// <summary>
/// Gets the hash value for the given file.
/// </summary>
/// <param name="file"></param>
/// <param name="path"></param>
/// <returns></returns>
Task<string> CreateIdentityForFileAsync(string file)
Task<(DateTime LastWriteTimeUtc, string Identity)> CreateIdentityForFileAsync(string path)
{
return Task.Run(async () =>
{
if (string.IsNullOrWhiteSpace(file))
throw new ArgumentException($"'{nameof(file)}' cannot be null or whitespace.", nameof(file));
if (File.Exists(file) == false)
throw new FileNotFoundException($"Could not find file '{file}'.");
var lastWriteTimeUtc = File.GetLastWriteTimeUtc(path);

// check if loaded state contains up to date information
if (state.TryGetValue(path, out var entry))
if (entry.LastWriteTimeUtc == lastWriteTimeUtc)
return (lastWriteTimeUtc, entry.Identity);

// file might have a companion SHA1 hash, let's use it, no calculation required
var sha1File = file + ".sha1";
var sha1File = path + ".sha1";
if (File.Exists(sha1File))
if (File.ReadAllText(sha1File) is string h)
return $"SHA1:{Regex.Match(h.Trim(), @"^([\w\-]+)").Value}";
return (lastWriteTimeUtc, $"SHA1:{sha1Regex.Match(h).Value}");

// file might have a companion MD5 hash, let's use it, no calculation required
var md5File = file + ".md5";
var md5File = path + ".md5";
if (File.Exists(md5File))
if (File.ReadAllText(md5File) is string h)
return $"MD5:{Regex.Match(h.Trim(), @"^([\w\-]+)").Value}";
return (lastWriteTimeUtc, $"MD5:{md5Regex.Match(h).Value}");

// if the file is potentially a .NET assembly
if (Path.GetExtension(file) == ".dll" || Path.GetExtension(file) == ".exe")
if (await assemblyInfoUtil.GetAssemblyInfoAsync(file) is IkvmAssemblyInfoUtil.AssemblyInfo a)
return $"MVID:{a.Mvid}";
if (Path.GetExtension(path) == ".dll" || Path.GetExtension(path) == ".exe")
if (await assemblyInfoUtil.GetAssemblyInfoAsync(path) is IkvmAssemblyInfoUtil.AssemblyInfo a)
return (lastWriteTimeUtc, $"MVID:{a.Mvid}");

// fallback to a standard full MD5 of the file
using var stm = File.OpenRead(file);
using var stm = File.OpenRead(path);
var hsh = ComputeHash(stm);
var bld = new StringBuilder(hsh.Length * 2);
foreach (var b in hsh)
bld.Append(b.ToString("x2"));

return bld.ToString();
return (lastWriteTimeUtc, bld.ToString());
});
}

Expand Down
Loading

0 comments on commit b89e06e

Please sign in to comment.