New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
API for performantly bulk-creating pages programatically #11480
base: main
Are you sure you want to change the base?
Changes from 10 commits
798145f
f4cf75a
c3752db
20379e3
a7a67f4
66ea8cd
c7e07e9
5be4dff
cbe1649
abc1a91
4acfa08
8d70e88
4daf714
d189813
710c8b3
5a35316
725fc25
23d7e45
6b17ad2
4ddecb5
0fa99c9
ca2a774
8f0fe01
d1c6193
685f779
c5a7ea3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1299,6 +1299,164 @@ | |
def __str__(self): | ||
return self.title | ||
|
||
def _process_sibling(self, nodes, parent): | ||
""" | ||
Create child nodes according to the node_order_by attribute. | ||
""" | ||
|
||
newpositions = [] | ||
siblings_so_far = self.get_siblings() | ||
for i, node in enumerate(nodes): | ||
pos = self._prepare_pos_var_for_add_sibling("sorted-sibling") | ||
node.depth = self.depth | ||
siblings = self.get_sorted_pos_queryset(siblings_so_far, node) | ||
try: | ||
newpos = siblings.all()[0]._get_lastpos_in_path() | ||
except IndexError: | ||
newpos = None | ||
pos = "last-sibling" | ||
newpositions.append(newpos) | ||
_, newpath = self.reorder_nodes_before_add_or_move( | ||
pos, newpos, self.depth, self, siblings, None, False | ||
) | ||
|
||
parent.numchild += 1 | ||
node.path = newpath | ||
# Add node to siblings_so_far. Make node a queryset to union with siblings_so_far | ||
siblings_so_far = siblings_so_far.union(siblings) | ||
|
||
return nodes | ||
|
||
def _process_leaf(self, nodes): | ||
""" | ||
Process children to add to a leaf node | ||
""" | ||
# Get number of children | ||
|
||
step = 1 | ||
max_length = self.__class__._meta.get_field("path").max_length | ||
for node in nodes: | ||
node.depth = self.depth + 1 | ||
node.path = self.__class__._get_path(self.path, node.depth, step) | ||
step += 1 | ||
if len(node.path) > max_length: | ||
raise ValidationError( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice work on the handling of this. This message is nice and helpful! |
||
"The new node is too deep in the tree, try \ | ||
increasing the path.max_length property \ | ||
and UPDATE your database" | ||
) | ||
self.numchild += 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be a nice touch if in-memory changes were also reverted in the event of an error. |
||
node._cached_parent_obj = self | ||
|
||
self.save() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it might be slightly clearer overall to keep this as a 'preparation' step, and leave saving up to a different method. What do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that's a great suggestion. It'll help separate the preparation step and the saving step. Since we're loading the children to the db and saving changes to the parent page I think we can keep them in a separate method. |
||
|
||
return | ||
|
||
def _process_unordered_children(self, nodes): | ||
""" | ||
Create child nodes without any specific order. | ||
""" | ||
last_child = self.get_last_child() | ||
max_length = self.__class__._meta.get_field("path").max_length | ||
for node in nodes: | ||
node.depth = self.depth + 1 | ||
node.path = last_child._inc_path() | ||
last_child = node | ||
if len(node.path) > max_length: | ||
raise ValidationError( | ||
"The new node is too deep in the tree, try \ | ||
increasing the path.max_length property \ | ||
and UPDATE your database" | ||
) | ||
self.numchild += 1 | ||
node._cached_parent_obj = self | ||
|
||
self.save() | ||
|
||
return | ||
|
||
def _process_child_nodes(self, nodes): | ||
""" | ||
Process the child nodes of this page, setting their paths and depths | ||
as appropriate and also updating the parent's numchild. This is performed by the treebeard library when saving | ||
the page to the database using their inbuilt tree functionality. | ||
""" | ||
|
||
if self.node_order_by and not self.is_leaf(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should be able to remove this case for page types, since |
||
return self.get_last_child()._process_sibling(nodes, self) | ||
|
||
if self.is_leaf(): | ||
return self._process_leaf(nodes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Naming things is hard, and a lot of these methods could benefit from a bit of a rethink. In the context of these additions, they might seem fine, but when you consider the codebase on the whole, names benefit from more detail to distinguish them from existing (or future) similarly named methods. See how you go with moving the functionality to the Manager first of all, then we can look at the naming in future. Tip for future: Discussions about naming can often derail an otherwise non-controversial PR, demotivate everybody involved in the process, and reducing the likelihood of changes being merged. You can side-step such problems early on by keeping your additions together in a single method, and using things like comments to explain better what is happening (it also makes code easier to review). It can always be split out at the end if deemed necessary! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the tip! I'll keep that in mind 🙌 I didn't know how naming methods would impact the codebase so I didn't think as well as I should've before adding it. Sorry about that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem! learning is all part of the process :) |
||
|
||
return self._process_unordered_children(nodes) | ||
|
||
def _check_unique(self, children, existing_pages=None): | ||
if not existing_pages: | ||
existing_pages = [] | ||
# Extract slugs from the children | ||
# Children is a list of Page objects | ||
slugs = [ | ||
child.slug | ||
for child in children + list(existing_pages) | ||
if hasattr(child, "slug") and child.slug | ||
] | ||
if len(slugs) != len(set(slugs)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're converting the list to a |
||
raise ValidationError( | ||
{ | ||
"slug": _( | ||
"Duplicate slugs in use within the parent page at '%(parent_url_path)s'" | ||
) | ||
% { | ||
"parent_url_path": self.path, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes! Thank you for catching that! I got confused between |
||
} | ||
} | ||
) | ||
for child in children: | ||
if not hasattr(child, "slug") or not child.slug: | ||
NXPY123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
candidate_slug = slugify(child.title, allow_unicode=True) | ||
suffix = 1 | ||
while candidate_slug in slugs: | ||
suffix += 1 | ||
candidate_slug = "%s-%d" % (candidate_slug, suffix) | ||
child.slug = candidate_slug | ||
slugs.append(candidate_slug) | ||
|
||
if child.locale_id is None: | ||
child.locale = self.get_default_locale() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Localised content tends to live in its own part of the tree, so inheriting the parent page's locale might be a better option here |
||
return | ||
|
||
@transaction.atomic | ||
def bulk_add_children(self, children): | ||
""" | ||
Add multiple pages as children of this page. | ||
|
||
This calls bulk_create on the Page model, so it bypasses the save method and does not | ||
""" | ||
|
||
# Check if the children are in adding state | ||
for child in children: | ||
if not child._state.adding: | ||
raise ValueError( | ||
"Attempted to add a tree node that is \ | ||
already in the database.\ | ||
bulk_add_children can only be used to add new pages, not to update existing pages" | ||
) | ||
# Get the list of existing children to check for duplicate slugs | ||
existing_pages = self.get_children() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this value isn't used for anything else, how about just pulling the slugs out of the database in |
||
|
||
try: | ||
self._check_unique(children, existing_pages) | ||
except ValidationError as e: | ||
raise e | ||
|
||
# Process the child nodes | ||
self._process_child_nodes(children) | ||
|
||
# Load the pages into the database | ||
pages = Page.objects.bulk_create(children) | ||
|
||
return pages | ||
|
||
@property | ||
def revisions(self): | ||
# Always use the specific page instance when querying for revisions as | ||
|
@@ -3456,7 +3614,8 @@ | |
|
||
def user_can_access_editor(self, obj, user): | ||
"""Returns True if a user who would not normally be able to access the editor for the object should be able to if the object is currently on this task. | ||
Note that returning False does not remove permissions from users who would otherwise have them.""" | ||
Note that returning False does not remove permissions from users who would otherwise have them. | ||
""" | ||
return False | ||
|
||
def locked_for_user(self, obj, user): | ||
|
@@ -3468,12 +3627,14 @@ | |
|
||
def user_can_lock(self, obj, user): | ||
"""Returns True if a user who would not normally be able to lock the object should be able to if the object is currently on this task. | ||
Note that returning False does not remove permissions from users who would otherwise have them.""" | ||
Note that returning False does not remove permissions from users who would otherwise have them. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's try to keep unrelated changes out of the PR if possible. It may help to disable any editor-level auto formatting when working on different projects. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sorry about that. I'm not sure why this happened. I only used Ruff and Black for formatting because the checks usually fail when it's not formatted properly. Could it be because of that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The black formatter is reformatting the file and causing this. But if I don't reformat the file won't the checks fail? |
||
""" | ||
return False | ||
|
||
def user_can_unlock(self, obj, user): | ||
"""Returns True if a user who would not normally be able to unlock the object should be able to if the object is currently on this task. | ||
Note that returning False does not remove permissions from users who would otherwise have them.""" | ||
Note that returning False does not remove permissions from users who would otherwise have them. | ||
""" | ||
return False | ||
|
||
def get_actions(self, obj, user): | ||
|
@@ -4288,7 +4449,8 @@ | |
@transaction.atomic | ||
def cancel(self, user=None, resume=False, comment=""): | ||
"""Cancel the task state and update the workflow state. If ``resume`` is set to True, then upon update the workflow state | ||
is passed the current task as ``next_task``, causing it to start a new task state on the current task if possible""" | ||
is passed the current task as ``next_task``, causing it to start a new task state on the current task if possible | ||
""" | ||
self.status = self.STATUS_CANCELLED | ||
self.finished_at = timezone.now() | ||
self.comment = comment | ||
|
@@ -4305,7 +4467,8 @@ | |
|
||
def copy(self, update_attrs=None, exclude_fields=None): | ||
"""Copy this task state, excluding the attributes in the ``exclude_fields`` list and updating any attributes to values | ||
specified in the ``update_attrs`` dictionary of ``attribute``: ``new value`` pairs""" | ||
specified in the ``update_attrs`` dictionary of ``attribute``: ``new value`` pairs | ||
""" | ||
exclude_fields = ( | ||
self.default_exclude_fields_in_copy | ||
+ self.exclude_fields_in_copy | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could use
enumerate()
here to handle thestep
value for you