From f390f5ee46c54906b3c4176528b72efb31a34fc0 Mon Sep 17 00:00:00 2001 From: Bardin08 Date: Sun, 28 Apr 2024 19:36:57 +0300 Subject: [PATCH 1/5] Implement a basic version of data generator --- DbSeeder/Data/IDataGenerator.cs | 23 ++++++ DbSeeder/Data/SqlQueryGenerator.cs | 121 +++++++++++++++++++++++++++++ DbSeeder/Program.cs | 15 +++- DbSeeder/Schema/Table.cs | 1 + 4 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 DbSeeder/Data/IDataGenerator.cs create mode 100644 DbSeeder/Data/SqlQueryGenerator.cs diff --git a/DbSeeder/Data/IDataGenerator.cs b/DbSeeder/Data/IDataGenerator.cs new file mode 100644 index 0000000..f59c7cd --- /dev/null +++ b/DbSeeder/Data/IDataGenerator.cs @@ -0,0 +1,23 @@ +using DbSeeder.Schema; + +namespace DbSeeder.Data; + +public interface IDataGenerator +{ + /// + /// Generates a single instance of an object that matches the SQL schema's entry for the specified table. + /// + /// The table schema for which to generate an instance. + /// An object representing an instance of the SQL schema's entry. + /// Any constraints encountered are only applicable within the scope of this method call. + TOut Generate(Table table); + + /// + /// Generates the requested number of records for the specified table. + /// + /// The table schema for which to generate records. + /// The number of records to generate. + /// An asynchronous stream of objects representing the generated records. + /// Any constraints encountered are only applicable within the scope of this method call. + IAsyncEnumerable GenerateMultiple(Table table, int count); +} diff --git a/DbSeeder/Data/SqlQueryGenerator.cs b/DbSeeder/Data/SqlQueryGenerator.cs new file mode 100644 index 0000000..0e542f0 --- /dev/null +++ b/DbSeeder/Data/SqlQueryGenerator.cs @@ -0,0 +1,121 @@ +using DbSeeder.Schema; + +namespace DbSeeder.Data; + +public interface ISqlDataGenerator : IDataGenerator +{ + /// + /// Generates bulk insert SQL queries for inserting multiple instances of objects that match the SQL schema's entry for the specified table. + /// + /// The table schema for which to generate bulk insert queries. + /// Options specifying the total number of records and the number of records per insert. + /// An asynchronous stream of bulk insert SQL queries. + /// Any constraints encountered are only applicable within the scope of this method call. + IAsyncEnumerable GenerateBulk(Table table, BulkInsertOptions options); +} + +/// +/// Represents options for generating bulk insert SQL queries. +/// +/// The total number of records to generate. +/// The number of records per insert query. +public record BulkInsertOptions(int TotalRecords, int RecordsPerInsert); + +internal class SqlQueryGenerator : ISqlDataGenerator +{ + public InsertSqlQuery Generate(Table table) + { + if (table.PrimaryKey is null) + { + throw new NotImplementedException("Currently unsupported. Check GitHub issue #21"); + } + + var isPkAutoIncremental = table.PrimaryKey.Column.IsAutoIncrement; + var cols = table.Columns.AsEnumerable(); + + if (isPkAutoIncremental) + { + cols = cols.Where(c => !c.IsPrimaryKey); + } + + var dict = new Dictionary(); + foreach (var col in cols.ToList()) + { + var (_, func) = GeneratorFactory.GetGeneratorByColumn(col); + dict.Add(col.Name, func().ToString()); + } + + return new InsertSqlQuery(table.Name, Value: dict); + } + + + + public IAsyncEnumerable GenerateMultiple(Table table, int count) + { + throw new NotImplementedException(); + } + + public IAsyncEnumerable GenerateBulk(Table table, BulkInsertOptions options) + { + throw new NotImplementedException(); + } +} + +internal static class GeneratorFactory +{ + private static readonly string[] StringTypes = ["text", "varchar", "nvarchar", "char"]; + private static readonly string[] NumeralTypes = ["int", "long", "bit"]; + + public static (Type, Func) GetGeneratorByColumn(Column col) + { + if (StringTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) + { + return (typeof(string), () => Guid.NewGuid().ToString("N")); + } + + if (NumeralTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) // add type limits check + { + return (typeof(long), () => Random.Shared.NextInt64() * 17); + } + + throw new NotImplementedException($"{col.DataType} is not currently supported"); + } +} + + + +public record InsertSqlQuery( + string Table, + IReadOnlyDictionary Value) // col: value +{ + public override string ToString() + { + return $"insert into {Table}({GetInsertCols()}) " + + $"value ({GetInsertValues()});"; + } + + private string GetInsertCols() + { + return string.Join(",", Value.Keys); + } + + private string GetInsertValues() + { + return string.Join(",", Value.Values); + } +} + +// --> insert into {table} +// value ( { string.Join(",", Values[i]) } ); + +public record BulkInsertSqlQuery( + string Table, + IReadOnlyList> Values) // [col: value] +{ + +} + +// --> insert into {table} +// values ( +// foreach Values[i]: {string.Join(",", Values[i])} +// ); diff --git a/DbSeeder/Program.cs b/DbSeeder/Program.cs index 9e09733..20c835d 100644 --- a/DbSeeder/Program.cs +++ b/DbSeeder/Program.cs @@ -1,4 +1,5 @@ -using DbSeeder.Schema; +using DbSeeder.Data; +using DbSeeder.Schema; using DbSeeder.SqlParser; using DbSeeder.SqlParser.SyntaxTree; using DbSeeder.SqlParser.SyntaxTree.Validation; @@ -47,10 +48,18 @@ FOREIGN KEY (profile_id) REFERENCES profiles(id) var sqlSchema = new SqlSchemaBuilder().Build(astRoot); - Console.WriteLine("Sql Schema Parsed"); + Console.WriteLine("\t\t // --- Sql Schema Parsed --- \\\\"); foreach (var table in sqlSchema.Tables) { - Console.WriteLine($"{table.Name}({string.Join(", ", table.Columns.Select(x => x.Name))})"); + Console.WriteLine($"\t --> {table.Name}({string.Join(", ", table.Columns.Select(x => x.Name))})"); + } + + Console.WriteLine("\n\n\t\t// --- Data Generation --- \\\\"); + var generator = new SqlQueryGenerator(); + for (var i = 0; i < 15; i++) + { + var profile = generator.Generate(sqlSchema.Tables.First(x => x.Name == "users")); + Console.WriteLine("\t {0}", profile); } } diff --git a/DbSeeder/Schema/Table.cs b/DbSeeder/Schema/Table.cs index 3cded09..4f771d5 100644 --- a/DbSeeder/Schema/Table.cs +++ b/DbSeeder/Schema/Table.cs @@ -5,6 +5,7 @@ public class Table(string name, SqlSchema schema) private readonly List _columns = []; private readonly List _foreignKeys = []; + // TODO[#21]: Implement key define logic if not specified manually // There are some rules how to deal if PK is not defined // ex: find the first attribute with NOT NULL & UNIQUE constraints public PrimaryKey? PrimaryKey { get; private set; } From 689254d736b4dde80a93df451dc3527fce6eddf0 Mon Sep 17 00:00:00 2001 From: Bardin08 Date: Sun, 28 Apr 2024 19:39:22 +0300 Subject: [PATCH 2/5] Adjust namespaces --- DbSeeder/Data/GeneratorFactory.cs | 24 +++++ DbSeeder/Data/Sql/BulkInsertOptions.cs | 8 ++ DbSeeder/Data/Sql/BulkInsertSqlQuery.cs | 8 ++ DbSeeder/Data/Sql/ISqlDataGenerator.cs | 15 +++ DbSeeder/Data/Sql/InsertSqlQuery.cs | 22 +++++ DbSeeder/Data/Sql/SqlQueryGenerator.cs | 51 ++++++++++ DbSeeder/Data/SqlQueryGenerator.cs | 121 ------------------------ DbSeeder/Program.cs | 1 + 8 files changed, 129 insertions(+), 121 deletions(-) create mode 100644 DbSeeder/Data/GeneratorFactory.cs create mode 100644 DbSeeder/Data/Sql/BulkInsertOptions.cs create mode 100644 DbSeeder/Data/Sql/BulkInsertSqlQuery.cs create mode 100644 DbSeeder/Data/Sql/ISqlDataGenerator.cs create mode 100644 DbSeeder/Data/Sql/InsertSqlQuery.cs create mode 100644 DbSeeder/Data/Sql/SqlQueryGenerator.cs delete mode 100644 DbSeeder/Data/SqlQueryGenerator.cs diff --git a/DbSeeder/Data/GeneratorFactory.cs b/DbSeeder/Data/GeneratorFactory.cs new file mode 100644 index 0000000..599ceb2 --- /dev/null +++ b/DbSeeder/Data/GeneratorFactory.cs @@ -0,0 +1,24 @@ +using DbSeeder.Schema; + +namespace DbSeeder.Data; + +internal static class GeneratorFactory +{ + private static readonly string[] StringTypes = ["text", "varchar", "nvarchar", "char"]; + private static readonly string[] NumeralTypes = ["int", "long", "bit"]; + + public static (Type, Func) GetGeneratorByColumn(Column col) + { + if (StringTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) + { + return (typeof(string), () => Guid.NewGuid().ToString("N")); + } + + if (NumeralTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) // add type limits check + { + return (typeof(long), () => Random.Shared.NextInt64() * 17); + } + + throw new NotImplementedException($"{col.DataType} is not currently supported"); + } +} \ No newline at end of file diff --git a/DbSeeder/Data/Sql/BulkInsertOptions.cs b/DbSeeder/Data/Sql/BulkInsertOptions.cs new file mode 100644 index 0000000..d5c1d2a --- /dev/null +++ b/DbSeeder/Data/Sql/BulkInsertOptions.cs @@ -0,0 +1,8 @@ +namespace DbSeeder.Data.Sql; + +/// +/// Represents options for generating bulk insert SQL queries. +/// +/// The total number of records to generate. +/// The number of records per insert query. +public record BulkInsertOptions(int TotalRecords, int RecordsPerInsert); \ No newline at end of file diff --git a/DbSeeder/Data/Sql/BulkInsertSqlQuery.cs b/DbSeeder/Data/Sql/BulkInsertSqlQuery.cs new file mode 100644 index 0000000..e8bb568 --- /dev/null +++ b/DbSeeder/Data/Sql/BulkInsertSqlQuery.cs @@ -0,0 +1,8 @@ +namespace DbSeeder.Data.Sql; + +public record BulkInsertSqlQuery( + string Table, + IReadOnlyList> Values) // [col: value] +{ + +} \ No newline at end of file diff --git a/DbSeeder/Data/Sql/ISqlDataGenerator.cs b/DbSeeder/Data/Sql/ISqlDataGenerator.cs new file mode 100644 index 0000000..fc367e9 --- /dev/null +++ b/DbSeeder/Data/Sql/ISqlDataGenerator.cs @@ -0,0 +1,15 @@ +using DbSeeder.Schema; + +namespace DbSeeder.Data.Sql; + +public interface ISqlDataGenerator : IDataGenerator +{ + /// + /// Generates bulk insert SQL queries for inserting multiple instances of objects that match the SQL schema's entry for the specified table. + /// + /// The table schema for which to generate bulk insert queries. + /// Options specifying the total number of records and the number of records per insert. + /// An asynchronous stream of bulk insert SQL queries. + /// Any constraints encountered are only applicable within the scope of this method call. + IAsyncEnumerable GenerateBulk(Table table, BulkInsertOptions options); +} \ No newline at end of file diff --git a/DbSeeder/Data/Sql/InsertSqlQuery.cs b/DbSeeder/Data/Sql/InsertSqlQuery.cs new file mode 100644 index 0000000..2a7aca5 --- /dev/null +++ b/DbSeeder/Data/Sql/InsertSqlQuery.cs @@ -0,0 +1,22 @@ +namespace DbSeeder.Data.Sql; + +public record InsertSqlQuery( + string Table, + IReadOnlyDictionary Value) // col: value +{ + public override string ToString() + { + return $"insert into {Table}({GetInsertCols()}) " + + $"value ({GetInsertValues()});"; + } + + private string GetInsertCols() + { + return string.Join(",", Value.Keys); + } + + private string GetInsertValues() + { + return string.Join(",", Value.Values); + } +} \ No newline at end of file diff --git a/DbSeeder/Data/Sql/SqlQueryGenerator.cs b/DbSeeder/Data/Sql/SqlQueryGenerator.cs new file mode 100644 index 0000000..3b549c6 --- /dev/null +++ b/DbSeeder/Data/Sql/SqlQueryGenerator.cs @@ -0,0 +1,51 @@ +using DbSeeder.Schema; + +namespace DbSeeder.Data.Sql; + +internal class SqlQueryGenerator : ISqlDataGenerator +{ + public InsertSqlQuery Generate(Table table) + { + if (table.PrimaryKey is null) + { + throw new NotImplementedException("Currently unsupported. Check GitHub issue #21"); + } + + var isPkAutoIncremental = table.PrimaryKey.Column.IsAutoIncrement; + var cols = table.Columns.AsEnumerable(); + + if (isPkAutoIncremental) + { + cols = cols.Where(c => !c.IsPrimaryKey); + } + + var dict = new Dictionary(); + foreach (var col in cols.ToList()) + { + var (_, func) = GeneratorFactory.GetGeneratorByColumn(col); + dict.Add(col.Name, func().ToString()); + } + + return new InsertSqlQuery(table.Name, Value: dict); + } + + + + public IAsyncEnumerable GenerateMultiple(Table table, int count) + { + throw new NotImplementedException(); + } + + public IAsyncEnumerable GenerateBulk(Table table, BulkInsertOptions options) + { + throw new NotImplementedException(); + } +} + +// --> insert into {table} +// value ( { string.Join(",", Values[i]) } ); + +// --> insert into {table} +// values ( +// foreach Values[i]: {string.Join(",", Values[i])} +// ); diff --git a/DbSeeder/Data/SqlQueryGenerator.cs b/DbSeeder/Data/SqlQueryGenerator.cs deleted file mode 100644 index 0e542f0..0000000 --- a/DbSeeder/Data/SqlQueryGenerator.cs +++ /dev/null @@ -1,121 +0,0 @@ -using DbSeeder.Schema; - -namespace DbSeeder.Data; - -public interface ISqlDataGenerator : IDataGenerator -{ - /// - /// Generates bulk insert SQL queries for inserting multiple instances of objects that match the SQL schema's entry for the specified table. - /// - /// The table schema for which to generate bulk insert queries. - /// Options specifying the total number of records and the number of records per insert. - /// An asynchronous stream of bulk insert SQL queries. - /// Any constraints encountered are only applicable within the scope of this method call. - IAsyncEnumerable GenerateBulk(Table table, BulkInsertOptions options); -} - -/// -/// Represents options for generating bulk insert SQL queries. -/// -/// The total number of records to generate. -/// The number of records per insert query. -public record BulkInsertOptions(int TotalRecords, int RecordsPerInsert); - -internal class SqlQueryGenerator : ISqlDataGenerator -{ - public InsertSqlQuery Generate(Table table) - { - if (table.PrimaryKey is null) - { - throw new NotImplementedException("Currently unsupported. Check GitHub issue #21"); - } - - var isPkAutoIncremental = table.PrimaryKey.Column.IsAutoIncrement; - var cols = table.Columns.AsEnumerable(); - - if (isPkAutoIncremental) - { - cols = cols.Where(c => !c.IsPrimaryKey); - } - - var dict = new Dictionary(); - foreach (var col in cols.ToList()) - { - var (_, func) = GeneratorFactory.GetGeneratorByColumn(col); - dict.Add(col.Name, func().ToString()); - } - - return new InsertSqlQuery(table.Name, Value: dict); - } - - - - public IAsyncEnumerable GenerateMultiple(Table table, int count) - { - throw new NotImplementedException(); - } - - public IAsyncEnumerable GenerateBulk(Table table, BulkInsertOptions options) - { - throw new NotImplementedException(); - } -} - -internal static class GeneratorFactory -{ - private static readonly string[] StringTypes = ["text", "varchar", "nvarchar", "char"]; - private static readonly string[] NumeralTypes = ["int", "long", "bit"]; - - public static (Type, Func) GetGeneratorByColumn(Column col) - { - if (StringTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) - { - return (typeof(string), () => Guid.NewGuid().ToString("N")); - } - - if (NumeralTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) // add type limits check - { - return (typeof(long), () => Random.Shared.NextInt64() * 17); - } - - throw new NotImplementedException($"{col.DataType} is not currently supported"); - } -} - - - -public record InsertSqlQuery( - string Table, - IReadOnlyDictionary Value) // col: value -{ - public override string ToString() - { - return $"insert into {Table}({GetInsertCols()}) " + - $"value ({GetInsertValues()});"; - } - - private string GetInsertCols() - { - return string.Join(",", Value.Keys); - } - - private string GetInsertValues() - { - return string.Join(",", Value.Values); - } -} - -// --> insert into {table} -// value ( { string.Join(",", Values[i]) } ); - -public record BulkInsertSqlQuery( - string Table, - IReadOnlyList> Values) // [col: value] -{ - -} - -// --> insert into {table} -// values ( -// foreach Values[i]: {string.Join(",", Values[i])} -// ); diff --git a/DbSeeder/Program.cs b/DbSeeder/Program.cs index 20c835d..7b07ede 100644 --- a/DbSeeder/Program.cs +++ b/DbSeeder/Program.cs @@ -1,4 +1,5 @@ using DbSeeder.Data; +using DbSeeder.Data.Sql; using DbSeeder.Schema; using DbSeeder.SqlParser; using DbSeeder.SqlParser.SyntaxTree; From 74dbb98805f2bf8d22b2098ea412ceb8000f4868 Mon Sep 17 00:00:00 2001 From: Bardin08 Date: Mon, 29 Apr 2024 15:51:33 +0300 Subject: [PATCH 3/5] Implement FK support --- DbSeeder/Data/Sql/InsertSqlQuery.cs | 27 +++++- DbSeeder/Data/Sql/SqlQueryGenerator.cs | 88 ++++++++++++++++++- DbSeeder/Program.cs | 15 +++- DbSeeder/Schema/Column.cs | 2 + DbSeeder/Schema/Table.cs | 2 +- DbSeeder/SqlParser/SyntaxTree/AstBuilder.cs | 1 + .../SyntaxTree/ForeignKeyConstraintBuilder.cs | 6 ++ 7 files changed, 129 insertions(+), 12 deletions(-) diff --git a/DbSeeder/Data/Sql/InsertSqlQuery.cs b/DbSeeder/Data/Sql/InsertSqlQuery.cs index 2a7aca5..122c35e 100644 --- a/DbSeeder/Data/Sql/InsertSqlQuery.cs +++ b/DbSeeder/Data/Sql/InsertSqlQuery.cs @@ -1,13 +1,32 @@ +using System.Text; + namespace DbSeeder.Data.Sql; public record InsertSqlQuery( string Table, - IReadOnlyDictionary Value) // col: value + IReadOnlyDictionary Value, + IReadOnlyList? RelatedQueries = null) { public override string ToString() { - return $"insert into {Table}({GetInsertCols()}) " + - $"value ({GetInsertValues()});"; + var sb = new StringBuilder(); + + if (RelatedQueries is not null && RelatedQueries.Any()) + { + foreach (var query in RelatedQueries) + { + sb.AppendLine(query.ToString()); + } + } + + sb.AppendLine(BuildSqlQuery()); + return sb.ToString(); + + string BuildSqlQuery() + { + return $"insert into {Table}({GetInsertCols()}) " + + $"value ({GetInsertValues()});"; + } } private string GetInsertCols() @@ -19,4 +38,4 @@ private string GetInsertValues() { return string.Join(",", Value.Values); } -} \ No newline at end of file +} diff --git a/DbSeeder/Data/Sql/SqlQueryGenerator.cs b/DbSeeder/Data/Sql/SqlQueryGenerator.cs index 3b549c6..88cbed6 100644 --- a/DbSeeder/Data/Sql/SqlQueryGenerator.cs +++ b/DbSeeder/Data/Sql/SqlQueryGenerator.cs @@ -1,9 +1,14 @@ +using System.Collections.Concurrent; using DbSeeder.Schema; +using Google.Protobuf.WellKnownTypes; +using Org.BouncyCastle.Security; namespace DbSeeder.Data.Sql; internal class SqlQueryGenerator : ISqlDataGenerator { + private readonly ConcurrentDictionary> _generationContextBuffer = []; + public InsertSqlQuery Generate(Table table) { if (table.PrimaryKey is null) @@ -11,7 +16,16 @@ public InsertSqlQuery Generate(Table table) throw new NotImplementedException("Currently unsupported. Check GitHub issue #21"); } - var isPkAutoIncremental = table.PrimaryKey.Column.IsAutoIncrement; + var generationContextId = Guid.NewGuid(); + var generationContext = new Dictionary(); + _generationContextBuffer.GetOrAdd(generationContextId, generationContext); + + return GenerateInternal(table, generationContextId); + } + + private InsertSqlQuery GenerateInternal(Table table, Guid generationContextId) + { + var isPkAutoIncremental = table.PrimaryKey!.Column.IsAutoIncrement; var cols = table.Columns.AsEnumerable(); if (isPkAutoIncremental) @@ -19,17 +33,83 @@ public InsertSqlQuery Generate(Table table) cols = cols.Where(c => !c.IsPrimaryKey); } - var dict = new Dictionary(); + var generatedValues = new Dictionary(); foreach (var col in cols.ToList()) { var (_, func) = GeneratorFactory.GetGeneratorByColumn(col); - dict.Add(col.Name, func().ToString()); + generatedValues.Add(col.Name, func().ToString()!); } - return new InsertSqlQuery(table.Name, Value: dict); + var hasReferencedTables = table.ForeignKeys.Any(); + if (!hasReferencedTables) + { + return new InsertSqlQuery(table.Name, generatedValues); + } + + + var relatedDataInsertQueries = new List(); + foreach (var fk in table.ForeignKeys) + { + var generatedContext = _generationContextBuffer[generationContextId]; + if (generatedContext.TryGetValue(fk.RefTable.Name, out var existedRecord)) + { + generatedValues[fk.Column.Name] = existedRecord.Value[fk.RefColumn.Name]; + continue; + } + + var relatedDataInsertQuery = GenerateRelatedData( + generationContextId, generatedValues, fk); + + if (relatedDataInsertQuery is not null) + { + relatedDataInsertQueries.Add(relatedDataInsertQuery); + } + } + + return new InsertSqlQuery(table.Name, generatedValues, relatedDataInsertQueries.AsReadOnly()); } + private InsertSqlQuery? GenerateRelatedData( + Guid generatedContextId, + IReadOnlyDictionary referencerValues, + ForeignKey fk) + { + var generatedContext = _generationContextBuffer[generatedContextId]; + if (generatedContext.TryGetValue(fk.RefTable.Name, out var existedRecord)) + { + return null; + } + + var insertSqlQuery = GenerateInternal(fk.RefTable, generatedContextId); + + // set up the proper reference (col == refCol value); + // To get the col's value that was generated earlier, we have to pass the context + var valuesWithValidFkValue = insertSqlQuery.Value.ToDictionary(); + valuesWithValidFkValue[fk.RefColumn.Name] = referencerValues[fk.Column.Name]; + insertSqlQuery = insertSqlQuery with { Value = valuesWithValidFkValue }; + generatedContext.TryAdd(insertSqlQuery.Table, insertSqlQuery); + + var hasReferencedTables = fk.RefTable.ForeignKeys.Any(); + if (hasReferencedTables) + { + foreach (var intFk in fk.RefTable.ForeignKeys) + { + var relatedDataInsertQuery = GenerateRelatedData( + generatedContextId, insertSqlQuery.Value, intFk); + + if (relatedDataInsertQuery is not null) + { + insertSqlQuery = insertSqlQuery with + { + RelatedQueries = [relatedDataInsertQuery] + }; + } + } + } + + return insertSqlQuery; + } public IAsyncEnumerable GenerateMultiple(Table table, int count) { diff --git a/DbSeeder/Program.cs b/DbSeeder/Program.cs index 7b07ede..4c98f0d 100644 --- a/DbSeeder/Program.cs +++ b/DbSeeder/Program.cs @@ -26,6 +26,15 @@ name VARCHAR(122) NOT NULL UNIQUE, profile_id INT, FOREIGN KEY (profile_id) REFERENCES profiles(id) ); + + CREATE TABLE activity + ( + id INT AUTO_INCREMENT PRIMARY KEY, + profile_id INT, + user_id INT, + FOREIGN KEY (profile_id) REFERENCES profiles(id), + FOREIGN KEY (user_id) REFERENCES users(id) + ); """; var lexer = new SqlLexer(sqlScript); var tokens = lexer.Tokenize(); @@ -57,10 +66,10 @@ FOREIGN KEY (profile_id) REFERENCES profiles(id) Console.WriteLine("\n\n\t\t// --- Data Generation --- \\\\"); var generator = new SqlQueryGenerator(); - for (var i = 0; i < 15; i++) + for (var i = 0; i < 1; i++) { - var profile = generator.Generate(sqlSchema.Tables.First(x => x.Name == "users")); - Console.WriteLine("\t {0}", profile); + var profile = generator.Generate(sqlSchema.GetTableByName("activity")!); + Console.WriteLine("\n-------\n\n{0}\n-------\n", profile); } } diff --git a/DbSeeder/Schema/Column.cs b/DbSeeder/Schema/Column.cs index ed33d9c..9f448c1 100644 --- a/DbSeeder/Schema/Column.cs +++ b/DbSeeder/Schema/Column.cs @@ -1,3 +1,5 @@ +using ZstdSharp.Unsafe; + namespace DbSeeder.Schema; public record Column diff --git a/DbSeeder/Schema/Table.cs b/DbSeeder/Schema/Table.cs index 4f771d5..6b7b3b4 100644 --- a/DbSeeder/Schema/Table.cs +++ b/DbSeeder/Schema/Table.cs @@ -71,5 +71,5 @@ private void SetTableForeignKey(Column column) } private Column? GetColumnByName(string columnName) - => Columns.FirstOrDefault(c => c.Name.Equals(columnName, StringComparison.Ordinal)); + => Columns.SingleOrDefault(c => c.Name.Equals(columnName, StringComparison.Ordinal)); } diff --git a/DbSeeder/SqlParser/SyntaxTree/AstBuilder.cs b/DbSeeder/SqlParser/SyntaxTree/AstBuilder.cs index f2ccfbe..b7fd5a9 100644 --- a/DbSeeder/SqlParser/SyntaxTree/AstBuilder.cs +++ b/DbSeeder/SqlParser/SyntaxTree/AstBuilder.cs @@ -46,6 +46,7 @@ private void HandleToken(SqlToken token, ref SyntaxTreeNode? localRoot) localRoot.Children.Remove(fkDefinition); fkOwnerColRoot!.Children.Add(fkDefinition); + _fkBuilder.Reset(); } return; } diff --git a/DbSeeder/SqlParser/SyntaxTree/ForeignKeyConstraintBuilder.cs b/DbSeeder/SqlParser/SyntaxTree/ForeignKeyConstraintBuilder.cs index d09ebce..7af5248 100644 --- a/DbSeeder/SqlParser/SyntaxTree/ForeignKeyConstraintBuilder.cs +++ b/DbSeeder/SqlParser/SyntaxTree/ForeignKeyConstraintBuilder.cs @@ -116,4 +116,10 @@ private void CreateConstraintRoot(ref SyntaxTreeNode treeLocalRoot) _lastAddedNode = _constraintLocalRoot; } + + public void Reset() + { + IsComplete = _referenceKeywordMet = false; + _constraintLocalRoot = _lastAddedNode = null; + } } From b52f39a23c57e57342a8a7e00427f121cb6bbbb6 Mon Sep 17 00:00:00 2001 From: Bardin08 Date: Mon, 29 Apr 2024 16:10:09 +0300 Subject: [PATCH 4/5] Cleanup fk logic --- DbSeeder/Data/Sql/SqlQueryGenerator.cs | 85 +++++++++++++------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/DbSeeder/Data/Sql/SqlQueryGenerator.cs b/DbSeeder/Data/Sql/SqlQueryGenerator.cs index 88cbed6..7ce5e53 100644 --- a/DbSeeder/Data/Sql/SqlQueryGenerator.cs +++ b/DbSeeder/Data/Sql/SqlQueryGenerator.cs @@ -1,7 +1,5 @@ using System.Collections.Concurrent; using DbSeeder.Schema; -using Google.Protobuf.WellKnownTypes; -using Org.BouncyCastle.Security; namespace DbSeeder.Data.Sql; @@ -46,27 +44,7 @@ private InsertSqlQuery GenerateInternal(Table table, Guid generationContextId) return new InsertSqlQuery(table.Name, generatedValues); } - - var relatedDataInsertQueries = new List(); - foreach (var fk in table.ForeignKeys) - { - var generatedContext = _generationContextBuffer[generationContextId]; - if (generatedContext.TryGetValue(fk.RefTable.Name, out var existedRecord)) - { - generatedValues[fk.Column.Name] = existedRecord.Value[fk.RefColumn.Name]; - continue; - } - - var relatedDataInsertQuery = GenerateRelatedData( - generationContextId, generatedValues, fk); - - if (relatedDataInsertQuery is not null) - { - relatedDataInsertQueries.Add(relatedDataInsertQuery); - } - } - - return new InsertSqlQuery(table.Name, generatedValues, relatedDataInsertQueries.AsReadOnly()); + return GenerateRelated(table, generationContextId, generatedValues); } private InsertSqlQuery? GenerateRelatedData( @@ -82,33 +60,58 @@ private InsertSqlQuery GenerateInternal(Table table, Guid generationContextId) var insertSqlQuery = GenerateInternal(fk.RefTable, generatedContextId); - // set up the proper reference (col == refCol value); - // To get the col's value that was generated earlier, we have to pass the context - var valuesWithValidFkValue = insertSqlQuery.Value.ToDictionary(); - valuesWithValidFkValue[fk.RefColumn.Name] = referencerValues[fk.Column.Name]; - insertSqlQuery = insertSqlQuery with { Value = valuesWithValidFkValue }; + insertSqlQuery = UpdateInsertQueryValues(insertSqlQuery, referencerValues, fk.RefColumn.Name, fk.Column.Name); generatedContext.TryAdd(insertSqlQuery.Table, insertSqlQuery); var hasReferencedTables = fk.RefTable.ForeignKeys.Any(); - if (hasReferencedTables) + if (!hasReferencedTables) + { + return insertSqlQuery; + } + + return GenerateRelated(fk.RefTable, generatedContextId, insertSqlQuery.Value.ToDictionary()); + } + + private static InsertSqlQuery UpdateInsertQueryValues( + InsertSqlQuery insertSqlQuery, + IReadOnlyDictionary referencerValues, + string src, + string dest) + { + var updatedValue = new Dictionary(insertSqlQuery.Value) { - foreach (var intFk in fk.RefTable.ForeignKeys) + [src] = referencerValues[dest] + }; + insertSqlQuery = insertSqlQuery with { Value = updatedValue }; + return insertSqlQuery; + } + + private InsertSqlQuery GenerateRelated( + Table table, + Guid generationContextId, + Dictionary generatedValues) + { + var relatedDataInsertQueries = new List(); + foreach (var fk in table.ForeignKeys) + { + var generatedContext = _generationContextBuffer[generationContextId]; + if (generatedContext.TryGetValue(fk.RefTable.Name, out var existedRecord)) { - var relatedDataInsertQuery = GenerateRelatedData( - generatedContextId, insertSqlQuery.Value, intFk); - - if (relatedDataInsertQuery is not null) - { - insertSqlQuery = insertSqlQuery with - { - RelatedQueries = [relatedDataInsertQuery] - }; - } + generatedValues[fk.Column.Name] = existedRecord.Value[fk.RefColumn.Name]; + continue; + } + + var relatedDataInsertQuery = GenerateRelatedData( + generationContextId, generatedValues, fk); + + if (relatedDataInsertQuery is not null) + { + relatedDataInsertQueries.Add(relatedDataInsertQuery); } } - return insertSqlQuery; + return new InsertSqlQuery(table.Name, generatedValues, relatedDataInsertQueries.AsReadOnly()); } public IAsyncEnumerable GenerateMultiple(Table table, int count) From 00ec721c803ba91f0ee9fc12d10be50c4e31b2a5 Mon Sep 17 00:00:00 2001 From: Bardin08 Date: Sat, 11 May 2024 01:13:03 +0300 Subject: [PATCH 5/5] Use Bogus as a date provider --- DbSeeder.sln | 1 + DbSeeder/Data/Bogus/BogusGenerator.cs | 7 ++ DbSeeder/Data/Bogus/BogusUtilities.cs | 117 ++++++++++++++++++ .../Data/Bogus/DataGeneratorsEvaluator.cs | 65 ++++++++++ DbSeeder/Data/GeneratorFactory.cs | 39 +++++- DbSeeder/Data/Sql/SqlQueryGenerator.cs | 32 ++--- DbSeeder/DbSeeder.csproj | 1 + DbSeeder/Program.cs | 2 +- docs/db-seeder-logo.svg | 115 +++++++++++++++++ 9 files changed, 353 insertions(+), 26 deletions(-) create mode 100644 DbSeeder/Data/Bogus/BogusGenerator.cs create mode 100644 DbSeeder/Data/Bogus/BogusUtilities.cs create mode 100644 DbSeeder/Data/Bogus/DataGeneratorsEvaluator.cs create mode 100644 docs/db-seeder-logo.svg diff --git a/DbSeeder.sln b/DbSeeder.sln index c62a117..0fcfb1c 100644 --- a/DbSeeder.sln +++ b/DbSeeder.sln @@ -4,6 +4,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution ProjectSection(SolutionItems) = preProject .gitignore = .gitignore EndProjectSection +EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DbSeeder", "DbSeeder\DbSeeder.csproj", "{AA89F82A-3339-4168-B56C-22F5559700F8}" EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DbSeeder.Tests", "DbSeeder.Tests\DbSeeder.Tests.csproj", "{4C3EA657-97AD-4E2E-AC54-57F4A05FA8CD}" diff --git a/DbSeeder/Data/Bogus/BogusGenerator.cs b/DbSeeder/Data/Bogus/BogusGenerator.cs new file mode 100644 index 0000000..81702b0 --- /dev/null +++ b/DbSeeder/Data/Bogus/BogusGenerator.cs @@ -0,0 +1,7 @@ +namespace DbSeeder.Data.Bogus; + +public record BogusGenerator( + string Category, + string GeneratorIdentifier, + Type ReturnType, + Dictionary Params); diff --git a/DbSeeder/Data/Bogus/BogusUtilities.cs b/DbSeeder/Data/Bogus/BogusUtilities.cs new file mode 100644 index 0000000..e079b4b --- /dev/null +++ b/DbSeeder/Data/Bogus/BogusUtilities.cs @@ -0,0 +1,117 @@ +using System.Reflection; +using Bogus; + +namespace DbSeeder.Data.Bogus; + +public static class BogusUtilities +{ + private static readonly Dictionary> AllowedTypes = new() + { + { "string", [typeof(string), typeof(char?), typeof(char[])] }, + { + "number", + [ + typeof(int?), typeof(float?), typeof(double?), typeof(byte?), typeof(int), typeof(float), + typeof(double), typeof(byte) + ] + } + }; + + public static Dictionary> GetBogusGenerators() + { + var generators = new Dictionary>(); + + var fakerType = typeof(Faker); + + var props = fakerType.GetProperties() + .Where(p => p.CustomAttributes.Any(x => x.AttributeType.Name == "RegisterMustasheMethodsAttribute")) + .ToList(); + + foreach (var p in props) + { + var generatorCategory = p.Name.ToLower(); + if (!generators.ContainsKey(generatorCategory)) + { + generators[generatorCategory] = []; + } + + var methods = p.PropertyType + .GetMethods(BindingFlags.Instance | BindingFlags.Public | BindingFlags.DeclaredOnly) + .Where(m => m.IsPublic && + !(m.Name.StartsWith("get_") || m.Name.StartsWith("set_"))) + .ToList(); + + foreach (var m in methods) + { + var methodParams = m.GetParameters().ToDictionary(x => x.Name!, x => x.ParameterType); + var generator = new BogusGenerator( + generatorCategory, + generatorCategory + m.Name.ToLower(), + m.ReturnType, + methodParams); + + generators[generatorCategory].Add(generator); + } + } + + return generators; + } + + public static Dictionary> GetFiltersForReturnType( + this Dictionary> src, string returnType) + { + var allowed = AllowedTypes[returnType]; + + var allowedGenerators = new Dictionary>(); + foreach (var (category, generators) in src) + { + allowedGenerators.Add(category, []); + foreach (var generator in generators) + { + if (allowed.Contains(generator.ReturnType)) + { + allowedGenerators[category].Add(generator); + } + } + } + + return allowedGenerators; + } + + public static dynamic? Generate(BogusGenerator generator) + { + var faker = new Faker(); + var generationMethod = generator.GeneratorIdentifier[generator.Category.Length..]; + + var generatorProperty = faker.GetType().GetProperty(generator.Category, + BindingFlags.Instance | BindingFlags.Public | BindingFlags.IgnoreCase); + if (generatorProperty != null) + { + var categoryGenerator = generatorProperty.GetValue(faker); + var generatorMethod = categoryGenerator.GetType().GetMethod(generationMethod, + BindingFlags.Instance | BindingFlags.Public | BindingFlags.IgnoreCase); + if (generatorMethod != null) + { + var parameters = generatorMethod.GetParameters(); + if (parameters.Length > 0) + { + // TODO[#26]: Implement generators with params + throw new NotImplementedException("This feature is currently not implemented. " + + "Only parameterless generators can be used"); + } + + // TODO[#27]: Implement constraints handling + var result = generatorMethod.Invoke(categoryGenerator, null); + return Convert.ChangeType(result, generatorMethod.ReturnType)!; + } + + Console.WriteLine($"Method '{generationMethod}' not found in '{generator.Category}' category"); + } + else + { + Console.WriteLine($"Category '{generator.Category}' not found on Faker object"); + } + + return null; + } +} diff --git a/DbSeeder/Data/Bogus/DataGeneratorsEvaluator.cs b/DbSeeder/Data/Bogus/DataGeneratorsEvaluator.cs new file mode 100644 index 0000000..df205c0 --- /dev/null +++ b/DbSeeder/Data/Bogus/DataGeneratorsEvaluator.cs @@ -0,0 +1,65 @@ +using DbSeeder.Schema; + +namespace DbSeeder.Data.Bogus; + +public static class DataGeneratorsEvaluator +{ + public static List FindBestNGenerators( + this Dictionary> allGenerators, + Column column, + int n = 1) + { + var weights = new Dictionary>(); + + foreach (var (_, generatorsCategory) in allGenerators) + { + foreach (var generator in generatorsCategory) + { + if (generator.Params.Count != 0) + { + // TODO[#26]: Implement generators with params + continue; + } + + var weight = CalculateLevenshteinDistance(column.Name, generator.GeneratorIdentifier); + if (!weights.ContainsKey(weight)) + { + weights[weight] = []; + } + + weights[weight].Add(generator); + } + } + + return weights.OrderBy(x => x.Key) + .SelectMany(x => x.Value) + .Take(n) + .ToList(); + } + + private static int CalculateLevenshteinDistance(string word1, string word2) + { + var n = word1.Length; + var m = word2.Length; + var distance = new int[n + 1, m + 1]; + + for (var i = 0; i <= n; i++) distance[i, 0] = i; + for (var j = 0; j <= m; j++) distance[0, j] = j; + + for (var i = 1; i <= n; i++) + { + for (var j = 1; j <= m; j++) + { + var cost = (word2[j - 1] == word1[i - 1]) ? 0 : 1; + + distance[i, j] = Math.Min( + Math.Min( + distance[i - 1, j] + 1, // Deletion + distance[i, j - 1] + 1), // Insertion + distance[i - 1, j - 1] + cost); // Substitution + } + } + + return distance[n, m]; + } +} diff --git a/DbSeeder/Data/GeneratorFactory.cs b/DbSeeder/Data/GeneratorFactory.cs index 599ceb2..473b25b 100644 --- a/DbSeeder/Data/GeneratorFactory.cs +++ b/DbSeeder/Data/GeneratorFactory.cs @@ -1,11 +1,44 @@ +using System.Data; +using DbSeeder.Data.Bogus; using DbSeeder.Schema; namespace DbSeeder.Data; internal static class GeneratorFactory { - private static readonly string[] StringTypes = ["text", "varchar", "nvarchar", "char"]; - private static readonly string[] NumeralTypes = ["int", "long", "bit"]; + private static readonly HashSet StringTypes = ["text", "varchar", "nvarchar", "char", "nchar", "ntext"]; + + private static readonly HashSet NumeralTypes = + ["bigint", "int", "smallint", "tinyint", "bit", "decimal", "money", "smallmoney", "float", "real"]; + + private static readonly HashSet BinaryTypes = ["binary", "varbinary", "image"]; + + private static readonly HashSet DateTimeTypes = + ["datetime", "smalldatetime", "date", "time", "datetime2", "datetimeoffset"]; + + private static readonly HashSet OtherTypes = + ["uniqueidentifier", "timestamp", "xml", "udt", "structured", "variant"]; + + private static readonly Dictionary> Generators = BogusUtilities.GetBogusGenerators(); + + public static object? GetGeneratorByColumnV2(Column col) + { + // type filter + var generators = Generators; + if (StringTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) + { + generators = generators.GetFiltersForReturnType("string"); + } + else if (NumeralTypes.Contains(col.DataType, StringComparer.OrdinalIgnoreCase)) + { + generators = generators.GetFiltersForReturnType("number"); + } + + // semantic filter + var generator = generators.FindBestNGenerators(col, n: 1).First(); + var generatedValue = (object?)BogusUtilities.Generate(generator); + return generatedValue; + } public static (Type, Func) GetGeneratorByColumn(Column col) { @@ -21,4 +54,4 @@ public static (Type, Func) GetGeneratorByColumn(Column col) throw new NotImplementedException($"{col.DataType} is not currently supported"); } -} \ No newline at end of file +} diff --git a/DbSeeder/Data/Sql/SqlQueryGenerator.cs b/DbSeeder/Data/Sql/SqlQueryGenerator.cs index 7ce5e53..03409b8 100644 --- a/DbSeeder/Data/Sql/SqlQueryGenerator.cs +++ b/DbSeeder/Data/Sql/SqlQueryGenerator.cs @@ -34,8 +34,8 @@ private InsertSqlQuery GenerateInternal(Table table, Guid generationContextId) var generatedValues = new Dictionary(); foreach (var col in cols.ToList()) { - var (_, func) = GeneratorFactory.GetGeneratorByColumn(col); - generatedValues.Add(col.Name, func().ToString()!); + var value = GeneratorFactory.GetGeneratorByColumnV2(col); + generatedValues.Add(col.Name, value?.ToString() ?? "null"); } var hasReferencedTables = table.ForeignKeys.Any(); @@ -44,33 +44,29 @@ private InsertSqlQuery GenerateInternal(Table table, Guid generationContextId) return new InsertSqlQuery(table.Name, generatedValues); } - return GenerateRelated(table, generationContextId, generatedValues); + return GenerateForeignSubKeys(table, generationContextId, generatedValues); } - private InsertSqlQuery? GenerateRelatedData( + private InsertSqlQuery? GenerateForeignKey( Guid generatedContextId, IReadOnlyDictionary referencerValues, ForeignKey fk) { var generatedContext = _generationContextBuffer[generatedContextId]; - if (generatedContext.TryGetValue(fk.RefTable.Name, out var existedRecord)) + if (generatedContext.ContainsKey(fk.RefTable.Name)) { return null; } var insertSqlQuery = GenerateInternal(fk.RefTable, generatedContextId); - insertSqlQuery = UpdateInsertQueryValues(insertSqlQuery, referencerValues, fk.RefColumn.Name, fk.Column.Name); generatedContext.TryAdd(insertSqlQuery.Table, insertSqlQuery); var hasReferencedTables = fk.RefTable.ForeignKeys.Any(); - if (!hasReferencedTables) - { - return insertSqlQuery; - } - - return GenerateRelated(fk.RefTable, generatedContextId, insertSqlQuery.Value.ToDictionary()); + return !hasReferencedTables + ? insertSqlQuery + : GenerateForeignSubKeys(fk.RefTable, generatedContextId, insertSqlQuery.Value.ToDictionary()); } private static InsertSqlQuery UpdateInsertQueryValues( @@ -87,7 +83,7 @@ private static InsertSqlQuery UpdateInsertQueryValues( return insertSqlQuery; } - private InsertSqlQuery GenerateRelated( + private InsertSqlQuery GenerateForeignSubKeys( Table table, Guid generationContextId, Dictionary generatedValues) @@ -102,7 +98,7 @@ private InsertSqlQuery GenerateRelated( continue; } - var relatedDataInsertQuery = GenerateRelatedData( + var relatedDataInsertQuery = GenerateForeignKey( generationContextId, generatedValues, fk); if (relatedDataInsertQuery is not null) @@ -124,11 +120,3 @@ public IAsyncEnumerable GenerateBulk(Table table, BulkInsert throw new NotImplementedException(); } } - -// --> insert into {table} -// value ( { string.Join(",", Values[i]) } ); - -// --> insert into {table} -// values ( -// foreach Values[i]: {string.Join(",", Values[i])} -// ); diff --git a/DbSeeder/DbSeeder.csproj b/DbSeeder/DbSeeder.csproj index 8f67ab3..59f56c1 100644 --- a/DbSeeder/DbSeeder.csproj +++ b/DbSeeder/DbSeeder.csproj @@ -8,6 +8,7 @@ + diff --git a/DbSeeder/Program.cs b/DbSeeder/Program.cs index 4c98f0d..a0a4c5d 100644 --- a/DbSeeder/Program.cs +++ b/DbSeeder/Program.cs @@ -66,7 +66,7 @@ FOREIGN KEY (user_id) REFERENCES users(id) Console.WriteLine("\n\n\t\t// --- Data Generation --- \\\\"); var generator = new SqlQueryGenerator(); - for (var i = 0; i < 1; i++) + for (var i = 0; i < 10; i++) { var profile = generator.Generate(sqlSchema.GetTableByName("activity")!); Console.WriteLine("\n-------\n\n{0}\n-------\n", profile); diff --git a/docs/db-seeder-logo.svg b/docs/db-seeder-logo.svg new file mode 100644 index 0000000..c661d55 --- /dev/null +++ b/docs/db-seeder-logo.svg @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + + +