From cd10bb952f4f3b5cfb19fb6397313d1f805fbb57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20G=C3=A9rard?= Date: Wed, 11 May 2022 14:55:50 +0200 Subject: [PATCH 1/9] Improve doc of "Choosing from a populated table" models.Language.objects.all() does gets evaluated at runtime but that returns a QuerySet, django only hit the database when we start iterating in the queryset. Previous documentation was a bit misleading. I would find more intuitive to recommend to use `language = factory.Iterator(models.Language.objects.all)` but I havn't changed that here, let me know if you prefer that I change it that way --- docs/recipes.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/recipes.rst b/docs/recipes.rst index 6afa9d1a..93ed77b4 100644 --- a/docs/recipes.rst +++ b/docs/recipes.rst @@ -52,8 +52,9 @@ simply use a :class:`factory.Iterator` on the chosen queryset: language = factory.Iterator(models.Language.objects.all()) -Here, ``models.Language.objects.all()`` won't be evaluated until the -first call to ``UserFactory``; thus avoiding DB queries at import time. +Here, ``models.Language.objects.all()`` is a QuerySet and will only hit the database +when factory_boy will start iterating on it, i.e on the first call to ``UserFactory``; +thus avoiding DB queries at import time. Reverse dependencies (reverse ForeignKey) From 31e8a41827aea805e0475f4e6aceaa19d84cdaac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastien=20G=C3=A9rard?= Date: Fri, 13 May 2022 11:51:10 +0200 Subject: [PATCH 2/9] Update docs/recipes.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: François Freitag --- docs/recipes.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/recipes.rst b/docs/recipes.rst index 93ed77b4..a1717ad5 100644 --- a/docs/recipes.rst +++ b/docs/recipes.rst @@ -52,9 +52,10 @@ simply use a :class:`factory.Iterator` on the chosen queryset: language = factory.Iterator(models.Language.objects.all()) -Here, ``models.Language.objects.all()`` is a QuerySet and will only hit the database -when factory_boy will start iterating on it, i.e on the first call to ``UserFactory``; -thus avoiding DB queries at import time. +Here, ``models.Language.objects.all()`` is a +:class:`~django.db.models.query.QuerySet` and will only hit the database when +``factory_boy`` starts iterating on it, i.e on the first call to +``UserFactory``; thus avoiding DB queries at import time. Reverse dependencies (reverse ForeignKey) From ed107a8b399d2a58b1bc67a924b16e5ecae258ef Mon Sep 17 00:00:00 2001 From: Hugo Pellissari Pavan Date: Fri, 13 May 2022 13:40:07 -0300 Subject: [PATCH 3/9] Support creating SQLAlchemy sessions from a callable Allows setting the database session through the sqlalchemy_session_factory attribute, receiving a Callable that returns a sqlalchemy.orm.Session instance. This ensures better control to dynamically set the Session in the factory. --- docs/changelog.rst | 2 ++ docs/orms.rst | 19 +++++++++++++++++++ factory/alchemy.py | 12 ++++++++++++ tests/test_alchemy.py | 28 ++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+) diff --git a/docs/changelog.rst b/docs/changelog.rst index e45e4be1..34f3cac0 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -10,6 +10,8 @@ ChangeLog - :issue:`366`: Add :class:`factory.django.Password` to generate Django :class:`~django.contrib.auth.models.User` passwords. + - :issue:`304`: Add :attr:`~factory.alchemy.SQLAlchemyOptions.sqlalchemy_session_factory` to dynamically + create sessions for use by the :class:`~factory.alchemy.SQLAlchemyModelFactory`. - Add support for Django 3.2 - Add support for Django 4.0 - Add support for Python 3.10 diff --git a/docs/orms.rst b/docs/orms.rst index e464fc4e..3d255a31 100644 --- a/docs/orms.rst +++ b/docs/orms.rst @@ -369,6 +369,25 @@ To work, this class needs an `SQLAlchemy`_ session object affected to the :attr: SQLAlchemy session to use to communicate with the database when creating an object through this :class:`SQLAlchemyModelFactory`. + .. attribute:: sqlalchemy_session_factory + + .. versionadded:: 3.3.0 + + :class:`~collections.abc.Callable` returning a :class:`~sqlalchemy.orm.Session` instance to use to communicate + with the database. You can either provide the session through this attribute, or through + :attr:`~factory.alchemy.SQLAlchemyOptions.sqlalchemy_session`, but not both at the same time. + + .. code-block:: python + + from . import common + + class UserFactory(factory.alchemy.SQLAlchemyModelFactory): + class Meta: + model = User + sqlalchemy_session_factory = lambda: common.Session() + + username = 'john' + .. attribute:: sqlalchemy_session_persistence Control the action taken by ``sqlalchemy_session`` at the end of a create call. diff --git a/factory/alchemy.py b/factory/alchemy.py index ef7a591a..cf20b537 100644 --- a/factory/alchemy.py +++ b/factory/alchemy.py @@ -22,10 +22,18 @@ def _check_sqlalchemy_session_persistence(self, meta, value): (meta, VALID_SESSION_PERSISTENCE_TYPES, value) ) + @staticmethod + def _check_has_sqlalchemy_session_set(meta, value): + if value and meta.sqlalchemy_session: + raise RuntimeError("Provide either a sqlalchemy_session or a sqlalchemy_session_factory, not both") + def _build_default_options(self): return super()._build_default_options() + [ base.OptionDefault('sqlalchemy_get_or_create', (), inherit=True), base.OptionDefault('sqlalchemy_session', None, inherit=True), + base.OptionDefault( + 'sqlalchemy_session_factory', None, inherit=True, checker=self._check_has_sqlalchemy_session_set + ), base.OptionDefault( 'sqlalchemy_session_persistence', None, @@ -90,6 +98,10 @@ def _get_or_create(cls, model_class, session, args, kwargs): @classmethod def _create(cls, model_class, *args, **kwargs): """Create an instance of the model, and save it to the database.""" + session_factory = cls._meta.sqlalchemy_session_factory + if session_factory: + cls._meta.sqlalchemy_session = session_factory() + session = cls._meta.sqlalchemy_session if session is None: diff --git a/tests/test_alchemy.py b/tests/test_alchemy.py index 03410838..005fb0fa 100644 --- a/tests/test_alchemy.py +++ b/tests/test_alchemy.py @@ -264,6 +264,34 @@ def test_build_does_not_raises_exception_when_no_session_was_set(self): self.assertEqual(inst1.id, 1) +class SQLAlchemySessionFactoryTestCase(unittest.TestCase): + + def test_create_get_session_from_sqlalchemy_session_factory(self): + class SessionGetterFactory(SQLAlchemyModelFactory): + class Meta: + model = models.StandardModel + sqlalchemy_session = None + sqlalchemy_session_factory = lambda: models.session + + id = factory.Sequence(lambda n: n) + + SessionGetterFactory.create() + self.assertEqual(SessionGetterFactory._meta.sqlalchemy_session, models.session) + # Reuse the session obtained from sqlalchemy_session_factory. + SessionGetterFactory.create() + + def test_create_raise_exception_sqlalchemy_session_factory_not_callable(self): + message = "^Provide either a sqlalchemy_session or a sqlalchemy_session_factory, not both$" + with self.assertRaisesRegex(RuntimeError, message): + class SessionAndGetterFactory(SQLAlchemyModelFactory): + class Meta: + model = models.StandardModel + sqlalchemy_session = models.session + sqlalchemy_session_factory = lambda: models.session + + id = factory.Sequence(lambda n: n) + + class NameConflictTests(unittest.TestCase): """Regression test for `TypeError: _save() got multiple values for argument 'session'` From bba4823fd7f209e69399acbc480beb68ff183ba4 Mon Sep 17 00:00:00 2001 From: kingbuzzman Date: Thu, 19 May 2022 11:16:23 +0200 Subject: [PATCH 4/9] Adds docs --- factory/django.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/factory/django.py b/factory/django.py index 3d60d92c..e1c9e6e8 100644 --- a/factory/django.py +++ b/factory/django.py @@ -24,7 +24,7 @@ DEFAULT_DB_ALIAS = 'default' # Same as django.db.DEFAULT_DB_ALIAS -DJANGO_22 = Version('2.2') <= Version(django_version) < Version('3.0') +DJANGO_22 = Version(django_version) < Version('3.0') _LAZY_LOADS = {} @@ -205,9 +205,18 @@ def create_batch(cls, size, **kwargs): @classmethod def _refresh_database_pks(cls, model_cls, objs): + """ + Before Django 3.0, there is an issue when bulk_insert. + + The issue is that if you create an instance of a model, + and reference it in another unsaved instance of a model. + When you create the instance of the first one, the pk/id + is never updated on the sub model that referenced the first. + """ if not DJANGO_22: return - fields = [f for f in model_cls._meta.get_fields() if isinstance(f, models.fields.related.ForeignObject)] + fields = [f for f in model_cls._meta.get_fields() + if isinstance(f, models.fields.related.ForeignObject)] if not fields: return for obj in objs: From 94e0c91cc3769c0b20dc6fb9f7208307fd6b87cb Mon Sep 17 00:00:00 2001 From: kingbuzzman Date: Thu, 19 May 2022 11:16:48 +0200 Subject: [PATCH 5/9] Removes signales from bulk_insert --- factory/django.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/factory/django.py b/factory/django.py index e1c9e6e8..b7370b6e 100644 --- a/factory/django.py +++ b/factory/django.py @@ -231,12 +231,8 @@ def _bulk_create(cls, size, **kwargs): collector.sort() for model_cls, objs in collector.data.items(): manager = cls._get_manager(model_cls) - for instance in objs: - models.signals.pre_save.send(model_cls, instance=instance, created=False) cls._refresh_database_pks(model_cls, objs) manager.bulk_create(objs) - for instance in objs: - models.signals.post_save.send(model_cls, instance=instance, created=True) return models_to_create @classmethod From 2235ec50f55b88ba88905dd992ee8c9c1fc88041 Mon Sep 17 00:00:00 2001 From: kingbuzzman Date: Thu, 19 May 2022 11:32:07 +0200 Subject: [PATCH 6/9] Minor clean up on Collector --- factory/django.py | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/factory/django.py b/factory/django.py index b7370b6e..bd26abe1 100644 --- a/factory/django.py +++ b/factory/django.py @@ -340,18 +340,9 @@ def _make_data(self, params): class Collector: - def __init__(self, using): - self.using = using + def __init__(self): # Initially, {model: {instances}}, later values become lists. self.data = defaultdict(list) - # {model: {(field, value): {instances}}} - self.field_updates = defaultdict(functools.partial(defaultdict, set)) - # {model: {field: {instances}}} - self.restricted_objects = defaultdict(functools.partial(defaultdict, set)) - # fast_deletes is a list of queryset-likes that can be deleted without - # fetching the objects into memory. - self.fast_deletes = [] - # Tracks deletion-order dependency for databases without transactions # or ability to defer constraint checks. Only concrete model classes # should be included, as the dependencies exist only between actual @@ -361,7 +352,7 @@ def __init__(self, using): def add(self, objs, source=None, nullable=False, reverse_dependency=False): """ - Add 'objs' to the collection of objects to be deleted. If the call is + Add 'objs' to the collection of objects to be inserted in order. If the call is the result of a cascade, 'source' should be the model that caused it, and 'nullable' should be set to True if the relation can be null. Return a list of all objects that were not already collected. @@ -377,16 +368,12 @@ def add(self, objs, source=None, nullable=False, reverse_dependency=False): continue if id(obj) not in lookup: new_objs.append(obj) - # import ipdb; ipdb.sset_trace() instances.extend(new_objs) # Nullable relationships can be ignored -- they are nulled out before # deleting, and therefore do not affect the order in which objects have # to be deleted. if source is not None and not nullable: self.add_dependency(source, model, reverse_dependency=reverse_dependency) - # if not nullable: - # import ipdb; ipdb.sset_trace() - # self.add_dependency(source, model, reverse_dependency=reverse_dependency) return new_objs def add_dependency(self, model, dependency, reverse_dependency=False): @@ -403,11 +390,7 @@ def collect( objs, source=None, nullable=False, - collect_related=True, - source_attr=None, reverse_dependency=False, - keep_parents=False, - fail_on_restricted=True, ): """ Add 'objs' to the collection of objects to be deleted as well as all @@ -434,7 +417,6 @@ def collect( if not new_objs: return - # import ipdb; ipdb.sset_trace() model = new_objs[0].__class__ def get_candidate_relations(opts): @@ -454,7 +436,6 @@ def get_candidate_relations(opts): collected_objs.append(val) for name, _ in factory_cls._meta.post_declarations.as_dict().items(): - for obj in new_objs: val = getattr(obj, name, None) if isinstance(val, models.Model): @@ -466,10 +447,15 @@ def get_candidate_relations(opts): ) def sort(self): + """ + Sort the model instances by the least dependecies to the most dependencies. + + We want to insert the models with no dependencies first, and continue inserting + using the models that the higher models depend on. + """ sorted_models = [] concrete_models = set() models = list(self.data) - # import ipdb; ipdb.sset_trace() while len(sorted_models) < len(models): found = False for model in models: @@ -481,6 +467,7 @@ def sort(self): concrete_models.add(model._meta.concrete_model) found = True if not found: + logger.debug('dependency order could not be determined') return self.data = {model: self.data[model] for model in sorted_models} From f386342529015ee820223def4656577e49f7f3c5 Mon Sep 17 00:00:00 2001 From: kingbuzzman Date: Thu, 19 May 2022 12:44:24 +0200 Subject: [PATCH 7/9] Removes reverse_dependency --- factory/django.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/factory/django.py b/factory/django.py index bd26abe1..6485f81c 100644 --- a/factory/django.py +++ b/factory/django.py @@ -226,7 +226,7 @@ def _refresh_database_pks(cls, model_cls, objs): @classmethod def _bulk_create(cls, size, **kwargs): models_to_create = cls.build_batch(size, **kwargs) - collector = Collector(cls._meta.database) + collector = Collector() collector.collect(cls, models_to_create) collector.sort() for model_cls, objs in collector.data.items(): @@ -350,7 +350,7 @@ def __init__(self): # parent. self.dependencies = defaultdict(set) # {model: {models}} - def add(self, objs, source=None, nullable=False, reverse_dependency=False): + def add(self, objs, source=None, nullable=False): """ Add 'objs' to the collection of objects to be inserted in order. If the call is the result of a cascade, 'source' should be the model that caused it, @@ -373,12 +373,10 @@ def add(self, objs, source=None, nullable=False, reverse_dependency=False): # deleting, and therefore do not affect the order in which objects have # to be deleted. if source is not None and not nullable: - self.add_dependency(source, model, reverse_dependency=reverse_dependency) + self.add_dependency(source, model) return new_objs - def add_dependency(self, model, dependency, reverse_dependency=False): - if reverse_dependency: - model, dependency = dependency, model + def add_dependency(self, model, dependency): self.dependencies[model._meta.concrete_model].add( dependency._meta.concrete_model ) @@ -390,7 +388,6 @@ def collect( objs, source=None, nullable=False, - reverse_dependency=False, ): """ Add 'objs' to the collection of objects to be deleted as well as all @@ -400,10 +397,6 @@ def collect( If the call is the result of a cascade, 'source' should be the model that caused it and 'nullable' should be set to True, if the relation can be null. - If 'reverse_dependency' is True, 'source' will be deleted before the - current model, rather than after. (Needed for cascading to parent - models, the one case in which the cascade follows the forwards - direction of an FK rather than the reverse direction.) If 'keep_parents' is True, data of parent model's will be not deleted. If 'fail_on_restricted' is False, error won't be raised even if it's prohibited to delete such objects due to RESTRICT, that defers @@ -412,7 +405,7 @@ def collect( can be deleted. """ new_objs = self.add( - objs, source, nullable, reverse_dependency=reverse_dependency + objs, source, nullable ) if not new_objs: return @@ -443,7 +436,7 @@ def get_candidate_relations(opts): if collected_objs: new_objs = self.collect( - factory_cls=factory_cls, objs=collected_objs, source=model, reverse_dependency=False + factory_cls=factory_cls, objs=collected_objs, source=model ) def sort(self): From 1f74911367747cb4ae44278b1d4b7ae9fee3f51d Mon Sep 17 00:00:00 2001 From: kingbuzzman Date: Thu, 19 May 2022 12:48:48 +0200 Subject: [PATCH 8/9] Simpler code --- factory/django.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/factory/django.py b/factory/django.py index 6485f81c..4181ec9f 100644 --- a/factory/django.py +++ b/factory/django.py @@ -412,23 +412,20 @@ def collect( model = new_objs[0].__class__ - def get_candidate_relations(opts): - # The candidate relations are the ones that come from N-1 and 1-1 relations. - # N-N (i.e., many-to-many) relations aren't candidates for deletion. - return ( - f - for f in opts.get_fields(include_hidden=True) - if isinstance(f, models.ForeignKey) - ) + # The candidate relations are the ones that come from N-1 and 1-1 relations. + candidate_relations = ( + f for f in model._meta.get_fields(include_hidden=True) + if isinstance(f, models.ForeignKey) + ) collected_objs = [] - for field in get_candidate_relations(model._meta): + for field in candidate_relations: for obj in new_objs: val = getattr(obj, field.name) if isinstance(val, models.Model): collected_objs.append(val) - for name, _ in factory_cls._meta.post_declarations.as_dict().items(): + for name, in factory_cls._meta.post_declarations.as_dict().keys(): for obj in new_objs: val = getattr(obj, name, None) if isinstance(val, models.Model): From d111d1a0d3ed189ac3c6638c8385674f4ee97906 Mon Sep 17 00:00:00 2001 From: kingbuzzman Date: Thu, 19 May 2022 13:29:51 +0200 Subject: [PATCH 9/9] Adds basic test for collector --- factory/django.py | 4 ++-- tests/test_django.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/factory/django.py b/factory/django.py index 4181ec9f..cffe1b42 100644 --- a/factory/django.py +++ b/factory/django.py @@ -226,7 +226,7 @@ def _refresh_database_pks(cls, model_cls, objs): @classmethod def _bulk_create(cls, size, **kwargs): models_to_create = cls.build_batch(size, **kwargs) - collector = Collector() + collector = DependencyInsertOrderCollector() collector.collect(cls, models_to_create) collector.sort() for model_cls, objs in collector.data.items(): @@ -339,7 +339,7 @@ def _make_data(self, params): return thumb_io.getvalue() -class Collector: +class DependencyInsertOrderCollector: def __init__(self): # Initially, {model: {instances}}, later values become lists. self.data = defaultdict(list) diff --git a/tests/test_django.py b/tests/test_django.py index 3d59522e..fa576b28 100644 --- a/tests/test_django.py +++ b/tests/test_django.py @@ -175,6 +175,16 @@ class Meta: level_2 = factory.SubFactory(Level2Factory) +class DependencyInsertOrderCollector(django_test.TestCase): + + def test_empty(self): + collector = factory.django.DependencyInsertOrderCollector() + collector.collect(Level2Factory, []) + collector.sort() + + self.assertEqual(collector.data, {}) + + @unittest.skipIf(SKIP_BULK_INSERT, "bulk insert not supported by current db.") class DjangoBulkInsert(django_test.TestCase):