Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a basic version of data generator #25

Merged
merged 6 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DbSeeder.sln
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
ProjectSection(SolutionItems) = preProject
.gitignore = .gitignore
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DbSeeder", "DbSeeder\DbSeeder.csproj", "{AA89F82A-3339-4168-B56C-22F5559700F8}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DbSeeder.Tests", "DbSeeder.Tests\DbSeeder.Tests.csproj", "{4C3EA657-97AD-4E2E-AC54-57F4A05FA8CD}"
Expand Down
7 changes: 7 additions & 0 deletions DbSeeder/Data/Bogus/BogusGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
namespace DbSeeder.Data.Bogus;

public record BogusGenerator(
string Category,
string GeneratorIdentifier,
Type ReturnType,
Dictionary<string, Type> Params);
117 changes: 117 additions & 0 deletions DbSeeder/Data/Bogus/BogusUtilities.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
using System.Reflection;
using Bogus;

namespace DbSeeder.Data.Bogus;

public static class BogusUtilities
{
private static readonly Dictionary<string, HashSet<Type>> AllowedTypes = new()
{
{ "string", [typeof(string), typeof(char?), typeof(char[])] },
{
"number",
[
typeof(int?), typeof(float?), typeof(double?), typeof(byte?), typeof(int), typeof(float),
typeof(double), typeof(byte)
]
}
};

public static Dictionary<string, List<BogusGenerator>> GetBogusGenerators()
{
var generators = new Dictionary<string, List<BogusGenerator>>();

var fakerType = typeof(Faker);

var props = fakerType.GetProperties()
.Where(p => p.CustomAttributes.Any(x => x.AttributeType.Name == "RegisterMustasheMethodsAttribute"))
.ToList();

foreach (var p in props)
{
var generatorCategory = p.Name.ToLower();
if (!generators.ContainsKey(generatorCategory))
{
generators[generatorCategory] = [];
}

var methods = p.PropertyType
.GetMethods(BindingFlags.Instance | BindingFlags.Public | BindingFlags.DeclaredOnly)
.Where(m => m.IsPublic &&
!(m.Name.StartsWith("get_") || m.Name.StartsWith("set_")))
.ToList();

foreach (var m in methods)
{
var methodParams = m.GetParameters().ToDictionary(x => x.Name!, x => x.ParameterType);
var generator = new BogusGenerator(
generatorCategory,
generatorCategory + m.Name.ToLower(),
m.ReturnType,
methodParams);

generators[generatorCategory].Add(generator);
}
}

return generators;
}

public static Dictionary<string, List<BogusGenerator>> GetFiltersForReturnType(
this Dictionary<string, List<BogusGenerator>> src, string returnType)
{
var allowed = AllowedTypes[returnType];

var allowedGenerators = new Dictionary<string, List<BogusGenerator>>();
foreach (var (category, generators) in src)
{
allowedGenerators.Add(category, []);
foreach (var generator in generators)
{
if (allowed.Contains(generator.ReturnType))
{
allowedGenerators[category].Add(generator);
}
}
}

return allowedGenerators;
}

public static dynamic? Generate(BogusGenerator generator)
{
var faker = new Faker();
var generationMethod = generator.GeneratorIdentifier[generator.Category.Length..];

var generatorProperty = faker.GetType().GetProperty(generator.Category,
BindingFlags.Instance | BindingFlags.Public | BindingFlags.IgnoreCase);
if (generatorProperty != null)
{
var categoryGenerator = generatorProperty.GetValue(faker);
var generatorMethod = categoryGenerator.GetType().GetMethod(generationMethod,

Check warning on line 91 in DbSeeder/Data/Bogus/BogusUtilities.cs

View workflow job for this annotation

GitHub Actions / main

Dereference of a possibly null reference.

Check warning on line 91 in DbSeeder/Data/Bogus/BogusUtilities.cs

View workflow job for this annotation

GitHub Actions / main

Dereference of a possibly null reference.

Check warning on line 91 in DbSeeder/Data/Bogus/BogusUtilities.cs

View workflow job for this annotation

GitHub Actions / main

Dereference of a possibly null reference.
BindingFlags.Instance | BindingFlags.Public | BindingFlags.IgnoreCase);
if (generatorMethod != null)
{
var parameters = generatorMethod.GetParameters();
if (parameters.Length > 0)
{
// TODO[#26]: Implement generators with params

Check warning on line 98 in DbSeeder/Data/Bogus/BogusUtilities.cs

View workflow job for this annotation

GitHub Actions / main

Complete the task associated to this 'TODO' comment. (https://rules.sonarsource.com/csharp/RSPEC-1135)
throw new NotImplementedException("This feature is currently not implemented. " +
"Only parameterless generators can be used");
}

// TODO[#27]: Implement constraints handling

Check warning on line 103 in DbSeeder/Data/Bogus/BogusUtilities.cs

View workflow job for this annotation

GitHub Actions / main

Complete the task associated to this 'TODO' comment. (https://rules.sonarsource.com/csharp/RSPEC-1135)
var result = generatorMethod.Invoke(categoryGenerator, null);
return Convert.ChangeType(result, generatorMethod.ReturnType)!;
}

Console.WriteLine($"Method '{generationMethod}' not found in '{generator.Category}' category");
}
else
{
Console.WriteLine($"Category '{generator.Category}' not found on Faker object");
}

return null;
}
}
65 changes: 65 additions & 0 deletions DbSeeder/Data/Bogus/DataGeneratorsEvaluator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
using DbSeeder.Schema;

namespace DbSeeder.Data.Bogus;

public static class DataGeneratorsEvaluator
{
public static List<BogusGenerator> FindBestNGenerators(
this Dictionary<string, List<BogusGenerator>> allGenerators,
Column column,
int n = 1)
{
var weights = new Dictionary<int, List<BogusGenerator>>();

foreach (var (_, generatorsCategory) in allGenerators)
{
foreach (var generator in generatorsCategory)
{
if (generator.Params.Count != 0)
{
// TODO[#26]: Implement generators with params

Check warning on line 20 in DbSeeder/Data/Bogus/DataGeneratorsEvaluator.cs

View workflow job for this annotation

GitHub Actions / main

Complete the task associated to this 'TODO' comment. (https://rules.sonarsource.com/csharp/RSPEC-1135)
continue;
}

var weight = CalculateLevenshteinDistance(column.Name, generator.GeneratorIdentifier);
if (!weights.ContainsKey(weight))
{
weights[weight] = [];
}

weights[weight].Add(generator);
}
}

return weights.OrderBy(x => x.Key)
.SelectMany(x => x.Value)
.Take(n)
.ToList();
}

private static int CalculateLevenshteinDistance(string word1, string word2)
{
var n = word1.Length;
var m = word2.Length;
var distance = new int[n + 1, m + 1];

for (var i = 0; i <= n; i++) distance[i, 0] = i;
for (var j = 0; j <= m; j++) distance[0, j] = j;

for (var i = 1; i <= n; i++)
{
for (var j = 1; j <= m; j++)
{
var cost = (word2[j - 1] == word1[i - 1]) ? 0 : 1;

distance[i, j] = Math.Min(
Math.Min(
distance[i - 1, j] + 1, // Deletion
distance[i, j - 1] + 1), // Insertion
distance[i - 1, j - 1] + cost); // Substitution
}
}

return distance[n, m];
}
}
57 changes: 57 additions & 0 deletions DbSeeder/Data/GeneratorFactory.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
using System.Data;
using DbSeeder.Data.Bogus;
using DbSeeder.Schema;

namespace DbSeeder.Data;

internal static class GeneratorFactory
{
private static readonly HashSet<string> StringTypes = ["text", "varchar", "nvarchar", "char", "nchar", "ntext"];

private static readonly HashSet<string> NumeralTypes =
["bigint", "int", "smallint", "tinyint", "bit", "decimal", "money", "smallmoney", "float", "real"];

private static readonly HashSet<string> BinaryTypes = ["binary", "varbinary", "image"];

private static readonly HashSet<string> DateTimeTypes =
["datetime", "smalldatetime", "date", "time", "datetime2", "datetimeoffset"];

private static readonly HashSet<string> OtherTypes =
["uniqueidentifier", "timestamp", "xml", "udt", "structured", "variant"];

private static readonly Dictionary<string, List<BogusGenerator>> Generators = BogusUtilities.GetBogusGenerators();

public static object? GetGeneratorByColumnV2(Column col)
{
// type filter
var generators = Generators;
if (StringTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase))
{
generators = generators.GetFiltersForReturnType("string");
}
else if (NumeralTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase))
{
generators = generators.GetFiltersForReturnType("number");
}

// semantic filter
var generator = generators.FindBestNGenerators(col, n: 1).First();
var generatedValue = (object?)BogusUtilities.Generate(generator);
return generatedValue;
}

public static (Type, Func<object>) GetGeneratorByColumn(Column col)
{
if (StringTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase))
{
return (typeof(string), () => Guid.NewGuid().ToString("N"));
}

if (NumeralTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) // add type limits check
{
return (typeof(long), () => Random.Shared.NextInt64() * 17);
}

throw new NotImplementedException($"{col.DataType} is not currently supported");
}
}
23 changes: 23 additions & 0 deletions DbSeeder/Data/IDataGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
using DbSeeder.Schema;

namespace DbSeeder.Data;

public interface IDataGenerator<out TOut>
{
/// <summary>
/// Generates a single instance of an object that matches the SQL schema's entry for the specified table.
/// </summary>
/// <param name="table">The table schema for which to generate an instance.</param>
/// <returns>An object representing an instance of the SQL schema's entry.</returns>
/// <remarks>Any constraints encountered are only applicable within the scope of this method call.</remarks>
TOut Generate(Table table);

/// <summary>
/// Generates the requested number of records for the specified table.
/// </summary>
/// <param name="table">The table schema for which to generate records.</param>
/// <param name="count">The number of records to generate.</param>
/// <returns>An asynchronous stream of objects representing the generated records.</returns>
/// <remarks>Any constraints encountered are only applicable within the scope of this method call.</remarks>
IAsyncEnumerable<TOut> GenerateMultiple(Table table, int count);
}
8 changes: 8 additions & 0 deletions DbSeeder/Data/Sql/BulkInsertOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace DbSeeder.Data.Sql;

/// <summary>
/// Represents options for generating bulk insert SQL queries.
/// </summary>
/// <param name="TotalRecords">The total number of records to generate.</param>
/// <param name="RecordsPerInsert">The number of records per insert query.</param>
public record BulkInsertOptions(int TotalRecords, int RecordsPerInsert);
8 changes: 8 additions & 0 deletions DbSeeder/Data/Sql/BulkInsertSqlQuery.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace DbSeeder.Data.Sql;

public record BulkInsertSqlQuery(
string Table,
IReadOnlyList<IReadOnlyDictionary<string, string>> Values) // [col: value]
{

}
15 changes: 15 additions & 0 deletions DbSeeder/Data/Sql/ISqlDataGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
using DbSeeder.Schema;

namespace DbSeeder.Data.Sql;

public interface ISqlDataGenerator : IDataGenerator<InsertSqlQuery>
{
/// <summary>
/// Generates bulk insert SQL queries for inserting multiple instances of objects that match the SQL schema's entry for the specified table.
/// </summary>
/// <param name="table">The table schema for which to generate bulk insert queries.</param>
/// <param name="options">Options specifying the total number of records and the number of records per insert.</param>
/// <returns>An asynchronous stream of bulk insert SQL queries.</returns>
/// <remarks>Any constraints encountered are only applicable within the scope of this method call.</remarks>
IAsyncEnumerable<BulkInsertSqlQuery> GenerateBulk(Table table, BulkInsertOptions options);
}
41 changes: 41 additions & 0 deletions DbSeeder/Data/Sql/InsertSqlQuery.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using System.Text;

namespace DbSeeder.Data.Sql;

public record InsertSqlQuery(
string Table,
IReadOnlyDictionary<string, string> Value,
IReadOnlyList<InsertSqlQuery>? RelatedQueries = null)
{
public override string ToString()
{
var sb = new StringBuilder();

if (RelatedQueries is not null && RelatedQueries.Any())
{
foreach (var query in RelatedQueries)
{
sb.AppendLine(query.ToString());
}
}

sb.AppendLine(BuildSqlQuery());
return sb.ToString();

string BuildSqlQuery()
{
return $"insert into {Table}({GetInsertCols()}) " +
$"value ({GetInsertValues()});";
}
}

private string GetInsertCols()
{
return string.Join(",", Value.Keys);
}

private string GetInsertValues()
{
return string.Join(",", Value.Values);
}
}
Loading
Loading