Mercurial > hg > trac > jungle > src > mercurial-plugin
changeset 2:bbb6e28dc488 0.13
Add documentation to `MercurialNode` `find_dirctx`.
As this is the heart of the optimizations for browsing speed,
better make it easier to grasp (did I really write that code?).
Also make the method private, as it's really an implementation
detail and shouldn't be part of the public API.
author | Christian Boos <christian.boos@free.fr> |
---|---|
date | Sun, 10 Jun 2012 00:36:16 +0200 |
parents | b30690538379 |
children | 8af21bda2b3e |
files | tracext/hg/backend.py |
diffstat | 1 files changed, 33 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/tracext/hg/backend.py Sat Jun 09 19:34:03 2012 +0200 +++ b/tracext/hg/backend.py Sun Jun 10 00:36:16 2012 +0200 @@ -884,8 +884,8 @@ if not dirctx: # we need to find the most recent change for a file below dir str_dir = str_path + '/' - dirctxs = self.find_dirctx(changectx.rev(), [str_dir,], - {str_dir: str_entries}) + dirctxs = self._find_dirctx(changectx.rev(), [str_dir,], + {str_dir: str_entries}) dirctx = dirctxs.values()[0] if not kind: @@ -906,13 +906,13 @@ self.created_rev = created_rev self.data = None - def find_dirctx(self, max_rev, str_dirnames, str_entries): + def _find_dirctx(self, max_rev, str_dirnames, str_entries): """Find most recent modification for each given directory path. :param max_rev: find no revision more recent than this one :param str_dirnames: directory paths to consider - (as `str` ending with '/') - :param str_entries: optionally maps directories to their file content + (list of `str` ending with '/') + :param str_entries: maps each directory to the files it contains :return: a `dict` with `str_dirnames` as keys, `changectx` as values @@ -929,13 +929,14 @@ each directory; this is much faster but can still be slow if some folders are only modified in the distant past - It is possible to combine both approach, and this can yield - excellent results in some cases (e.g. browsing the Linux repos - @ 118733 takes several minutes with the first approach, 11s - with the second, but only 1.2s with the hybrid approach) + It is possible to combine both approaches, and this can + produce excellent results in some cases, for example browsing + the root of the Hg mirror of the Linux repository (at revision + 118733) takes several minutes with the first approach, 11s + with the second, but only 1.2s with the hybrid approach. Note that the specialized scan of the changelog we do below is - more efficient than the general cmdutil.walkchangerevs here. + more efficient than the general cmdutil.walkchangerevs. """ str_dirctxs = {} repo = self.repos.repo @@ -946,16 +947,27 @@ for str_file in ctx.files(): for str_dir in str_dirnames[:]: if str_file.startswith(str_dir): + # rev for str_dir was found using first strategy str_dirctxs[str_dir] = ctx str_dirnames.remove(str_dir) - if not str_dirnames: # if nothing left to find + if not str_dirnames: # nothing left to find return str_dirctxs - # in parallel, try the filelog strategy (the 463, 2, 40 + + # In parallel, try the filelog strategy (the 463, 2, 40 # values below look a bit like magic numbers; actually # they were selected by testing the plugin on the Linux # and NetBeans repositories) - if r % 463 == 0: - k = max(2, 40 / len(str_dirnames)) + + # only use the filelog strategy every `n` revs + n = 463 + + # k, the number of files to examine per directory, + # will be comprised between `min_files` and `max_files` + min_files = 2 + max_files = 40 # (will be the max if there's only one dir left) + + if r % n == 0: + k = max(min_files, max_files / len(str_dirnames)) for str_dir in str_dirnames[:]: str_files = str_entries[str_dir] dr = str_dirctxs.get(str_dir, 0) @@ -966,9 +978,13 @@ pass # that file was not on this revision `r` str_files = str_files[k:] if str_files: + # not all files for str_dir seen yet, + # store max rev found so far str_entries[str_dir] = str_files str_dirctxs[str_dir] = dr else: + # all files for str_dir were examined, + # rev found using filelog strategy str_dirctxs[str_dir] = repo[dr] str_dirnames.remove(str_dir) if not str_dirnames: @@ -1032,14 +1048,13 @@ # pre-computing the changectx for the last change in each sub-directory if str_dirnames: - dirctxs = self.find_dirctx(self.created_rev, str_dirnames, - str_entries) + dirctxs = self._find_dirctx(self.created_rev, str_dirnames, + str_entries) else: dirctxs = {} for str_entry in str_entries: - yield self.subnode(str_entry.rstrip('/'), - dirctxs.get(str_entry, None)) + yield self.subnode(str_entry.rstrip('/'), dirctxs.get(str_entry)) def get_history(self, limit=None): repo = self.repos.repo