#!/usr/bin/env python2
#
# reposurgeon - a repository surgeon.
#
# By ESR, October 2010.  BSD terms apply.
#
# Requires Python 2.7.2 or newer.
#
from __future__ import print_function #unicode_literals

import sys, os, cmd, tempfile, subprocess, glob, hashlib, cProfile
import re, sre_constants, signal, shutil, copy, shlex, collections, uuid
import cgi, bz2, codecs, time, calendar, unittest, itertools, operator
import functools, filecmp, datetime, difflib, commands
import email.message, email.parser

# This import only works on Unixes.  The intention is to enable
# Ctrl-P, Ctrl-N, and friends in Cmd.
try:
    import readline
except ImportError:
    pass

# This import will only work if python-hglib is installed.
# It is used by the HgExtractor to improve performance.
try:
    import hglib
except ImportError:
    hglib = None

version="3.29"

#
# This code is intended to be hackable to support for special-purpose or
# custom operations, though it's even better if you can come up with a new
# surgical primitive general enough to ship with the stock version.  For
# either case, here's a guide to the architecture.
#
# The core classes are largely about deserializing and reserializing import
# streams.  In between these two operations the repo state lives in a
# fairly simple Python object, Repository. The main part of Repository
# is just a list of events - Commits, Blobs, Tags, Resets, and Passthroughs.
# These are straightforward representations of the command types in an
# import stream, with Passthrough as a way of losslessly conveying lines
# the parser does not recognize.
#
#  +-------------+    +---------+    +-------------+
#  | Deserialize |--->| Operate |--->| Reserialize |
#  +-------------+    +---------+    +-------------+
#
# The general theory of reposurgeon is: you deserialize, you do stuff
# to the event list that preserves correctness invariants, you
# reserialize.  The "do stuff" is mostly not in the core classes, but
# there is one major exception.  The primitive to delete a commit and
# squash its fileops forwards or backwards is seriously intertwined
# with the core classes and actually makes up almost 50% of Repository
# by line count.
#
# The rest of the surgical code lives outside the core classes. Most
# of it lives in the RepoSurgeon class (the command interpreter) or
# the RepositoryList class (which encapsulated by-name access to a list
# of repositories and also hosts surgical operations involving
# multiple repositories). A few bits, like the repository reader and
# builder, have enough logic that's independent of these
# classes to be factored out of it.
#
# In designing new commands for the interpreter, try hard to keep them
# orthogonal to the selection-set code. As often as possible, commands
# should all have a similar form with a (single) selection set argument.
#
# VCS is not a core class.  The code for manipulating actual repos is bolted
# on the the ends of the pipeline, like this:
#
#  +--------+    +-------------+    +---------+    +-----------+    +--------+
#  | Import |--->| Deserialize |--->| Operate |--->| Serialize |--->| Export |
#  +--------+    +-------------+ A  +---------+    +-----------+    +--------+
#       +-----------+            |
#       | Extractor |------------+
#       +-----------+
#
# The Import and Export boxes call methods in VCS.
#
# Extractor classes build the deserialized internal representation directly.
# Each extractor class is a set of VCS-specific methods to be used by the
# RepoStreamer driver class.  Key detail: when a repository is recognized by
# an extractor it sets the repository type to point to the corresponding
# VCS instance.
#

class VCS:
    "Class representing a version-control system."
    def __init__(self, name,
                 subdirectory,
                 exporter,
                 styleflags,
                 properties,
                 initializer,
                 lister,
                 importer,
                 checkout,
                 preserve,
                 authormap,
                 ignorename,
                 dfltignores,
                 cookies,
                 project,
                 notes):
        self.name = name
        self.subdirectory = subdirectory
        self.exporter = exporter
        self.styleflags = styleflags
        self.properties = properties
        self.initializer = initializer
        self.lister = lister
        self.importer = importer
        self.checkout = checkout
        self.preserve = preserve
        self.authormap = authormap
        self.ignorename = ignorename
        self.dfltignores = dfltignores
        self.cookies = cookies
        self.project = project
        self.notes = notes
    bare_numeric = r"[0-9]+"
    dotted_numeric = r"[0-9]+(?:\.[0-9]+)+"
    nobug = r"(?<!bug )"        # Feeble, but lookbehinds must be fixed-width.
    def __str__(self):
        realignores = [line for line in self.dfltignores.split(b'\n') if not line.startswith(b"# ")]
        return b"         Name: {self.name}\n" \
               b" Subdirectory: {self.subdirectory}\n" \
               b"     Exporter: {self.exporter}\n" \
               b" Export-Style: {{{styleflags}}}\n" \
               b"   Properties: {self.properties!r}\n" \
               b"  Initializer: {self.initializer}\n" \
               b"       Lister: {self.lister}\n" \
               b"     Importer: {self.importer}\n" \
               b"     Checkout: {self.checkout}\n" \
               b"     Preserve: {{{preserve}}}\n" \
               b"    Authormap: {self.authormap}\n" \
               b"   Ignorename: {self.ignorename}\n" \
               b"      Ignores: {{{ignores}}}\n" \
               b"      Project: {self.project}\n" \
               b"        Notes: {{{notes}}}\n".format(
                       self = self,
                       styleflags = b", b".join(self.styleflags),
                       preserve = b", b".join(self.preserve),
                       ignores = b" ".join(realignores).strip(),
                       notes = self.notes.strip())

# Most knowledge about specific version-control systems lives in the
# following class list. Exception; there's a git-specific hook in the
# repo reader; also see the extractor classes; also see the dump method
# in the Blob() class.
# The members are, respectively:
#
# * Name of its characteristic subdirectory.
# * Command to export from the VCS to the interchange format
# * Import/export style flags.
#     "no-nl-after-commit" = no extra NL after each commit
#     "nl-after-comment" = inserts an extra NL after each comment
#     "export-progress" = exporter generates its own progress messages,
#                         no need for baton prompt.
#     "import-defaults" = Import sets default ignores
# * Flag specifying whether it handles per-commit properties on import
# * Command to initialize a new repo
# * Command to import from the interchange format
# * Command to check out working copies of the repo files.
# * Default preserve set (e.g. config & hook files; parts can be directories).
# * Likely location for an importer to drop an authormap file
# * Command to list files under repository control.
#
# Note that some of the commands used here are plugins or extensions
# that are not part of the basic VCS. Thus these may fail when called;
# we need to be prepared to cope with that.
#
# %(tempfile)s in a command gets substituted with the name of a
# tempfile that the calling code will know to read or write from as
# appropriate after the command is done.  If your exporter can simply
# dump to stdout, or your importer read from stdin, leave out the
# %(tempfile)s; reposurgeon will popen(3) the command, and it will
# actually be slightly faster (especially on large repos) because it
# won't have to wait for the tempfile I/O to complete.
#
# %(basename) is replaced with the basename of the repo directory.
#
vcstypes = [
    VCS(name=b"git",
        subdirectory=b".git",
        exporter=b"git fast-export --signed-tags=verbatim --tag-of-filtered-object=drop --all",
        styleflags={},
        properties=False,
        initializer=b"git init --quiet",
        importer=b"git fast-import --quiet",
        checkout=b"git checkout",
        lister=b"git ls-files",
        preserve=set([b'.git/config', b'.git/hooks']),
        authormap=b".git/cvs-authors",
        ignorename=b".gitignore",
        dfltignores=b"",        # Has none
        cookies=(r"\b[0-9a-f]{6}\b", r"\b[0-9a-f]{40}\b",),
        project=b"http://git-scm.com/",
        notes=b"The authormap is not required, but will be used if present."),
    #
    VCS(name=b"bzr",
        subdirectory=b".bzr",
        exporter=b"bzr fast-export --no-plain %(basename)s",
        styleflags={b"export-progress", b"no-nl-after-commit", b"nl-after-comment"},
        properties=True,
        initializer=None,
        lister=None,
        importer=b"bzr fast-import -",
        checkout=b"bzr checkout",
        preserve=set(),
        authormap=None,
        project=b"http://bazaar.canonical.com/en/",
        ignorename=b".bzrignore",
        dfltignores=b"""
# A simulation of bzr default ignores, generated by reposurgeon.
*.a
*.o
*.py[co]
*.so
*.sw[nop]
*~
.#*
[#]*#
__pycache__
bzr-orphans
# Simulated bzr default ignores end here
""",
        cookies=(VCS.nobug + VCS.bare_numeric,),
        notes=b"Requires the bzr-fast-import plugin."),
    VCS(name=b"hg",
        subdirectory=b".hg",
        exporter=None,
        styleflags={b"import-defaults", b"nl-after-comment", b"export-progress"},
        properties=False,
        initializer=b"hg init",
        lister=b"hg status -macn",
        importer=b"hg fastimport %(tempfile)s",
        checkout=b"hg checkout",
        preserve={b".hg/hgrc"},
        authormap=None,
        ignorename=b".hgignore",
        dfltignores=b"",
        cookies=(VCS.bare_numeric, r"\b[0-9a-f]{40}\b"),
        project=b"http://mercurial.selenic.com/",
        notes=b"""The hg fastimport method is not part of stock Mercurial.

If there is no branch named 'master' in a repo when it is read, the hg 'default'
branch is renamed to 'master'.

The extractor for the 'read' command will be much faster if the 'hglib' Python
library is installed.  See <https://mercurial.selenic.com/wiki/PythonHglib>.
"""),
    # Styleflags may need tweaking for round-tripping
    VCS(name=b"darcs",
        subdirectory=b"_darcs",
        exporter=b"darcs fastconvert export",
        styleflags=set(),
        properties=False,
        initializer=None,
        lister=b"darcs show files",
        importer=b"darcs fastconvert import",
        checkout=None,
        preserve=set(),
        authormap=None,
        ignorename=b"_darcs/prefs/boring",
        dfltignores=b"""
# A simulation of darcs default ignores, generated by reposurgeon.
# haskell (ghc) interfaces
*.hi
*.hi-boot
*.o-boot
# object files
*.o
*.o.cmd
# profiling haskell
*.p_hi
*.p_o
# haskell program coverage resp. profiling info
*.tix
*.prof
# fortran module files
*.mod
# linux kernel
*.ko.cmd
*.mod.c
*.tmp_versions
# *.ko files aren't boring by default because they might
# be Korean translations rather than kernel modules
# *.ko
# python, emacs, java byte code
*.py[co]
*.elc
*.class
# objects and libraries; lo and la are libtool things
*.obj
*.a
*.exe
*.so
*.lo
*.la
# compiled zsh configuration files
*.zwc
# Common LISP output files for CLISP and CMUCL
*.fas
*.fasl
*.sparcf
*.x86f
### build and packaging systems
# cabal intermediates
*.installed-pkg-config
*.setup-config
# standard cabal build dir, might not be boring for everybody
# dist
# autotools
autom4te.cache
config.log
config.status
# microsoft web expression, visual studio metadata directories
*.\\_vti_cnf
*.\\_vti_pvt
# gentoo tools
*.revdep-rebuild.*
# generated dependencies
.depend
### verion control
# darcs
_darcs
.darcsrepo
*.darcs-temp-mail
-darcs-backup[[:digit:]]+
# gnu arch
+
,
vssver.scc
*.swp
MT
{arch}
*.arch-ids
# bitkeeper
BitKeeper
ChangeSet
### miscellaneous
# backup files
*~
*.bak
*.BAK
# patch originals and rejects
*.orig
*.rej
# X server
..serverauth.*
# image spam
\\#
Thumbs.db
# vi, emacs tags
tags
TAGS
# core dumps
core
# partial broken files (KIO copy operations)
*.part
# mac os finder
.DS_Store
# Simulated darcs default ignores end here
""",
        cookies=[],
        project=b"http://darcs.net/",
        notes=b"Assumes no boringfile preference has been set."),
    # Styleflags may need tweaking for round-tripping
    VCS(name=b"mtn",
        subdirectory=b"_MTN",
        exporter=b"mtn git_export",
        styleflags=set(),
        properties=False,
        initializer=None,
        lister=b"mtn list known",
        importer=None,
        checkout=None,
        preserve=set(),
        authormap=None,
        ignorename=b".mtn_ignore",      # Assumes default hooks
        dfltignores=b"""
*.a
*.so
*.o
*.la
*.lo
^core
*.class
*.pyc
*.pyo
*.g?mo
*.intltool*-merge*-cache
*.aux
*.bak
*.orig
*.rej
%~
*.[^/]**.swp
*#[^/]*%#
*.scc
^*.DS_Store
/*.DS_Store
^desktop*.ini
/desktop*.ini
autom4te*.cache
*.deps
*.libs
*.consign
*.sconsign
CVS
*.svn
SCCS
_darcs
*.cdv
*.git
*.bzr
*.hg
""",
        cookies=[],
        project=b"http://www.monotone.ca/",
        notes=b"Exporter is buggy, occasionally emitting negative timestamps."),
    # Export is experimental and doesn't round-trip
    VCS(name=b"svn",
        subdirectory=b"locks",
        exporter=b"svnadmin dump .",
        styleflags={b"import-defaults", b"export-progress"},
        properties=False,
        initializer=b"svnadmin create .",
        importer=b"svnadmin load .",
        checkout=None,
        lister=None,
        preserve={b"hooks"},
        authormap=None,
        ignorename=None,
        # Note dangerous hack here: the leadung slashes are there to mark
        # lines whuch should *not* be anchored, and will be removed later
        # in processsing.
        dfltignores=b"""\
# A simulation of Subversion default ignores, generated by reposurgeon.
/*.o
/*.lo
/*.la
/*.al
/*.libs
/*.so
/*.so.[0-9]*
/*.a
/*.pyc
/*.pyo
/*.rej
/*~
/*.#*
/.*.swp
/.DS_store
# Simulated Subversion default ignores end here
""",
        cookies=(
    r"\b(?:SVN|svn|Subversion|subversion|rev|version)\s+"+VCS.bare_numeric,
                 "r" + VCS.bare_numeric,
                 VCS.nobug + r"\w" + VCS.bare_numeric + r"\w"),
        project=b"http://subversion.apache.org/",
        notes=b"Run from the repository, not a checkout directory."),
    VCS(name=b"cvs",
        subdirectory=b"Attic",
        exporter=b"find . -name '*,v' -print | cvs-fast-export --reposurgeon",
        styleflags={b"import-defaults", b"export-progress"},
        properties=False,
        initializer=None,
        importer=None,
        checkout=None,
        lister=None,
        preserve=set(),
        authormap=None,
        ignorename=None,
        dfltignores=b"""\
# A simulation of cvs default ignores, generated by reposurgeon.
tags
TAGS
.make.state
.nse_depinfo
*~
#*
.#*
,*
_$*
*$
*.old
*.bak
*.BAK
*.orig
*.rej
.del-*
*.a
*.olb
*.o
*.obj
*.so
*.exe
*.Z
*.elc
*.ln
core
# Simulated cvs default ignores end here
""",
        cookies=(r"(?:CVS|cvs|rev|version)\s*" + VCS.dotted_numeric,
                 VCS.dotted_numeric),
        project=b"http://www.catb.org/~esr/cvs-fast-export",
        notes=b"Requires cvs-fast-export."),
    VCS(name=b"rcs",
        subdirectory=b"RCS",
        exporter=b"find . -name '*,v' -print | cvs-fast-export -k --reposurgeon",
        styleflags={b"export-progress"},
        properties=False,
        initializer=None,
        importer=None,
        checkout=None,
        lister=None,
        preserve=set(),
        authormap=None,
        ignorename=None,
        dfltignores=b"",        # Has none
        cookies=(r"(?:RCS|rcs|rev|version)\s*" + VCS.dotted_numeric,
                 VCS.dotted_numeric),
        project=b"http://www.catb.org/~esr/cvs-fast-export",
        notes=b"Requires cvs-fast-export."),
    VCS(name=b"src",
        subdirectory=b".src",
        exporter=b"src fast-export",
        styleflags={},
        properties=False,
        initializer=b"src init",
        importer=None,
        checkout=None,
        lister="src ls",
        preserve=set(),
        authormap=None,
        ignorename=None,
        dfltignores=b"",        # Has none
        cookies=(VCS.nobug + VCS.bare_numeric,),
        project=b"http://catb.org/~esr/src",
        notes=b""),
    ]

# Import and export filter methods for VCSes that use magic files rather
# than magic directories. So far there is only one of these.
#
# Each entry maps a read/write option to an (importer, exporter) pair.
# The input filter must be an *exporter from* that takes an alien file
# and emits a fast-import stream on standard output.  The exporter
# must be an *importer to* that takes an import stream on standard input
# and produces a named alien file.
#
file_filters = {
    "fossil" : (b"fossil export --git %s", b"fossil import --git %s"),
    }

# How to write extractor classes:
#
# Clone one of the existing ones and mutate.
#
# Significant fact: None of the get_* methods for extracting information about
# a revision is called until after checkout has been called on that revision.
#
# Most methods take a native revision ID as argument. The value and type of the
# ID don't matter to any of the code that will call the extractor, except that
# IDs must be hashable so they can be dictionary keys.
#
# The 'name', b'subdirectory', and 'visible' members must be set. The
# subdirectory member is how an extractor recognizes what repositories
# it can consume.  If the visible member is false, the 'read' command
# will ignore the existence of the extractor.
#
# The strings returned by get_committer() and get_authors() should look like
#
# J. Random User <random@foobar> 2011-11-29T10:13:32Z
#
# that is, a free text name followed by an email ID followed by a date.
# The date specification can be anything Attribution() can parse; in
# particular, RFC3339 dates are good, so are RFC822 (email) dates,
# and so is git's native integer-Unix-timestamp/timezone pairs.

class Extractor(object):
    "Base class for repository extractors."
    # Written around commonalities between git and hg; extending further
    # might need structural changes.
    name = "extractor-base"
    subdirectory = None # for use in detecting corresponding VCS
    visible = False # report in list of extractors; use even if not preferred
    properties = False # does this VCS support commit properties?
    ignorename = None
    def __init__(self):
        self.revlist = [] # commit identifiers, oldest first
        self.parents = {} # commit -> [parent-commit, ...]
        self.meta = {} # commit -> {'ci':committer, 'ai':author, 'branch':color}
        self.refs = {} # 'refs/class/name' -> commit
        self.tags = [] # Tag objects (annotated tags only)
    def analyze(self, baton):
        "Analyze a repository for streaming."
        self.find_revision_ids(baton)
        baton.twirl()
        self.find_commit_data(baton)
        baton.twirl()
        self.find_all_references(baton)
        baton.twirl()
        self.color_branches(baton)
        baton.twirl()
    def find_revision_ids(self, baton):
        "Get the topologically-ordered list of revisions and parents"
        # fill in self.revlist
        # fill in self.parents
        assert baton is not None # pacify pylint
    def find_commit_data(self, baton):
        "Get all other per-commit data except branch IDs"
        # fill in self.meta[]['ci']
        # fill in self.meta[]['ai']
        assert baton is not None # pacify pylint
    def find_all_references(self, baton):
        "Find all branch heads and tags"
        # fill in self.refs
        # fill in self.tags
        assert baton is not None # pacify pylint
    def color_branches(self, baton):
        """Color branches in the order the tips occur.  Emulate the
        git-export order."""
        for refname, refobj in sorted(self.refs.iteritems(),
                                      key=lambda ref: self.revlist.index(ref[1])):
            self.__branch_color(refobj, refname)
        uncolored = [revision for revision in self.revlist if 'branch' not in self.meta[revision]]
        if uncolored:
            if verbose >= 1:
                raise Fatal(b"missing branch attribute for: %s" % uncolored)
            else:
                raise Fatal(b"some branches do not have local ref names.")
        assert baton is not None # pacify pylint
    def __branch_color(self, rev, color):
        if rev.startswith(b"ref"):
            return
        while not 'branch' in self.meta[rev]:
            self.meta[rev][b'branch'] = color
            parents = self.get_parents(rev)
            if not parents:
                break
            elif len(parents) == 1:
                # This case avoids blowing Python's stack by recursing
                # too deep on large repos.
                rev = parents[0]
            else:
                for parent in parents:
                    self.__branch_color(parent, color)
                break
    def pre_extract(self, repo):
        "Hook for any setup actions required before streaming."
        assert repo is not None  # Pacify pylint
    def post_extract(self, repo):
        "Hook for any cleanup actions required after streaming."
        if not self.properties:
            for event in repo.commits():
                event.properties = collections.OrderedDict()
    def isclean(self):
        "Return True if repo has no unsaved changes."
        return True
    def get_revlist(self):
        "Return a list of commit ID strings in commit timestamp order."
        return self.revlist
    def get_taglist(self):
        "Return a list of tag name strings."
        return self.tags
    def iter_resets(self):
        "Return an iterator yielding (reset name, revision) pairs."
        return (item for item in self.refs.iteritems() if "/tags/" not in item[0])
    def checkout(self, rev, filemap):
        "Check the directory out to a specified revision, return a manifest."
        assert filemap is not None # pacify pylint
        assert rev is not None # pacify pylint
        return []
    def cleanup(self, rev, issued):
        "Cleanup after checkout."
        assert rev and (issued is not None) # Pacify pylint
    def get_parents(self, rev):
        "Return the list of commit IDs of a commit's parents."
        return self.parents[rev]
    def get_branch(self, rev):
        return self.meta[rev][b'branch']
    def get_comment(self, rev):
        "Return a commit's change comment as a string."
        assert rev is not None # pacify pylint
        return None
    def get_committer(self, rev):
        "Return the committer's ID/date as a string."
        return self.meta[rev][b'ci']
    def get_authors(self, rev):
        "Return the author's name and email address as a string."
        return [self.meta[rev][b'ai']]
    def get_properties(self, rev):
        "Return a list of properties for the commit."
        assert rev is not None # Pacify pylint
        if self.properties:
            raise NotImplementedError()
        else:
            return collections.OrderedDict()

class GitExtractor(Extractor):
    "Repository extractor for the git version-control system."
    # Regardless of what revision and branch was current at start,
    # after the git extractor runs the head revision on the master branch
    # will be checked out.
    #
    # The git extractor does not attempt to recover N ops,
    # symbolic links, gitlinks, or directory fileops.
    #
    # To be streamed, a git repo must have <emphasis>local</emphasis>
    # refs to all branches - in particular, local tracking branches
    # corresponding to all remotes.
    #
    # Some of these limitations could be fixed, but the git extractor
    # is not intended to replace git-fast-export; it only exists as a
    # test for the generic RepoStreamer code and a model for future
    # extractors.
    name = "git-extractor"
    vcstype = next(vcstype for vcstype in vcstypes if vcstype.name == "git")
    subdirectory = ".git"
    visible = False
    properties = False
    ignorename = ".gitignore"
    def find_revision_ids(self, baton):
        assert baton is not None # pacify pylint
        with popen_or_die(b"git log --all --topo-order --reverse --format='%H %P'") as fp:
            for line in fp:
                fields = line.strip().split()
                self.revlist.append(fields[0])
                self.parents[fields[0]] = fields[1:]
    def find_commit_data(self, baton):
        assert baton is not None # pacify pylint
        with popen_or_die(b"git log --all --reverse --date=raw --format='%H|%cn <%ce> %cd|%an <%ae> %ad'") as fp:
            for line in fp:
                (h, ci, ai) = line.strip().split(b'|')
                self.meta[h] = {b'ci':ci, b'ai':ai}
    def find_all_references(self, baton):
        for root, dirs, files in os.walk(b".git/refs"):
            for leaf in files:
                assert dirs is not None  # Pacify pylint
                ref = os.path.join(root, leaf)
                with open(ref, b"rb") as fp:
                    self.refs[ref[5:]] = fp.read().strip()
        baton.twirl()
        with popen_or_die(b"git tag -l") as fp:
            for line in fp:
                tag = line.strip()
                with popen_or_die(b"git rev-parse %s" % tag) as fp:
                    taghash = fp.read().strip()
                # Annotated tags are first-class objects with their
                # own hashes.  The hash of a lightweight tag is just
                # the commit it points to. Handle both cases.
                objecthash = taghash
                with popen_or_die(b"git cat-file -p %s" % tag) as fp:
                    comment = None
                    tagger = None
                    for line in fp:
                        line = line.strip()
                        if line.startswith(b"tagger "):
                            tagger = line[len(b"tagger "):]
                        elif line.startswith(b"object"):
                            objecthash = line.split()[1]
                        elif comment is None and not line:
                            comment = b""
                        elif isinstance(comment, str):
                            comment += line + b"\n"
                            if objecthash != taghash:
                                # committish isn't a mark; we'll fix that later
                                self.tags.append(Tag(None,
                                                     name=tag,
                                                     tagger=Attribution(tagger),
                                                     comment=comment,
                                                     committish=objecthash))
                    self.refs["refs/tags/" + tag] = objecthash
    def __metadata(self, rev, fmt):
        with popen_or_die(b"git log -1 --format='%s' %s" % (fmt, rev)) as fp:
            return fp.read()[:-1]
    def post_extract(self, repo):
        super(GitExtractor, self).post_extract(repo)
        os.system(b"git checkout --quiet master")
    def isclean(self):
        "Return True if repo has no unsaved changes."
        return not capture(b"git ls-files --modified")
    def checkout(self, rev, filemap):
        "Check the directory out to a specified revision."
        assert filemap is not None # pacify pylint
        os.system(b"git checkout --quiet %s" % rev)
        manifest = capture(b"git ls-files").split()
        return manifest
    def get_comment(self, rev):
        "Return a commit's change comment as a string."
        return self.__metadata(rev, b"%B")

class HgExtractor(Extractor):
    "Repository extractor for the hg version-control system."
    # Regardless of what revision and branch was current at start,
    # after the hg extractor runs the tip (most recent revision on any branch)
    # will be checked out.
    name = "hg-extractor"
    vcstype = next(vcstype for vcstype in vcstypes if vcstype.name == "hg")
    subdirectory = ".hg"
    visible = True
    properties = False
    ignorename = ".hgignore"
    def __init__(self):
        super(HgExtractor, self).__init__()
        self.hgclient = None
    class _hg_or_die:
        def __init__(self, client, *cmdline):
            self.client = client
            self.cmdline = cmdline
            if self.cmdline[0] == b"hg":
                self.cmdline.pop(0)
            self.fp = hglib.util.BytesIO(self.client.rawcommand(self.cmdline))
        def __enter__(self):
            return self.fp
        def __exit__(self, extype, value, traceback_unused):
            if extype:
                if verbose:
                    complain(b"fatal exception in _hg_or_die.")
            if self.fp.close() is not None and not extype:
                raise Fatal(b"%s returned error." % (self.cmdline,))
            return False
    def hg_or_die(self, *cmdline):
        if hglib is not None and self.hgclient is not None:
            return self._hg_or_die(self.hgclient, *cmdline)
        else:
            # We know this quoting is sufficient, because the only variables
            # we ever pass into this function are revision IDs, which are
            # shell-safe.  It'd be nice to use pipes.quote or shlex.quote, but
            # pipes.quote is deprecated in 2.7 and shlex.quote doesn't appear
            # until 3.3.
            return popen_or_die(b"hg "+b" ".join([b"'%s'"%(s,) for s in cmdline]))
    def hg_capture(self, *cmdline):
        with self.hg_or_die(*cmdline) as fp:
            output = fp.read()
        return output
    def analyze(self, baton):
        if hglib is not None:
            self.hgclient = hglib.open(os.curdir)
        super(HgExtractor, self).analyze(baton)
    def find_revision_ids(self, baton):
        "Get the topologically-ordered list of revisions and parents"
        assert baton is not None # pacify pylint
        # hg changesets can only have up to two parents
        # we have to use short (12-nibble) hashes because that's all "hg tags"
        # and "hg branches" give us.  Hg's CLI is rubbish
        with self.hg_or_die(b"log", b"--template", b"{node|short} {p1node|short} {p2node|short}\\n") as fp:
            for line in fp:
                fields = line.strip().split()
                self.revlist.append(fields[0])
                # non-existent parents are given all-0s hashes.
                # Did I mention that Hg's CLI is rubbish?
                self.parents[fields[0]] = [f for f in fields[1:] if f != '0'*12]
        self.revlist = list(reversed(self.revlist))
    def find_commit_data(self, baton):
        "Get all other per-commit data except branch IDs"
        assert baton is not None # pacify pylint
        with self.hg_or_die(b"log", b"--template", b"{node|short}|{author} {date|rfc822date}\\n") as fp:
            for line in fp:
                (h, ci) = line.strip().split(b'|')
                # Because hg doesn't store separate author and committer info,
                # we just use the committer for both.
                self.meta[h] = {b'ci':ci, b'ai':ci}
    def find_all_references(self, baton):
        "Find all branch heads and tags"
        assert baton is not None # pacify pylint
        # both branches and tags output "name      num:hash" lines
        # branches may also append " (inactive)"
        ref_re = re.compile(r'(\S+)\s+\d+:([0-9a-fA-F]+)(?: \(inactive\))?')
        with self.hg_or_die(b"branches") as fp:
            for line in fp:
                m = ref_re.match(line)
                if m is None:
                    raise Recoverable(b"Unreadable 'hg branches' line: %r" % line)
                n, h = m.groups()
                self.refs['refs/heads/%s'%n] = h
        with self.hg_or_die(b"tags") as fp:
            for line in fp:
                m = ref_re.match(line)
                if m is None:
                    raise Recoverable(b"Unreadable 'hg tags' line: %r" % line)
                n, h = m.groups()
                if n == 'tip': # pseudo-tag for most recent commit
                    continue # We don't want it
                self.refs['refs/tags/%s'%n] = h
        # We have no annotated tags, so self.tags = []
        # Conceivably it might be better to treat the commit message that
        # creates the tag as an annotation, but that's a job for the surgeon
        # later, not the extractor now.
    def post_extract(self, repo):
        super(HgExtractor, self).post_extract(repo)
        self.hg_capture(b"update", b"-C", b"tip")
        if not "refs/heads/master" in repo.branchset():
            for event in repo:
                if isinstance(event, Commit):
                    if event.branch == "refs/heads/default":
                        event.set_branch("refs/heads/master")
                elif isinstance(event, Reset):
                    if event.ref == "refs/heads/default":
                        event.ref = "refs/heads/master"
        if self.hgclient is not None:
            # make sure we don't pick it up again later, since the cwd may change
            self.hgclient.close()
            self.hgclient = None
    def isclean(self):
        "Return True if repo has no unsaved changes."
        return not self.hg_capture(b"status", b"--modified")
    def checkout(self, rev, filemap):
        "Check the directory out to a specified revision, return a manifest."
        assert filemap is not None # pacify pylint
        self.hg_capture(b"update", b"-C", rev)
        manifest = self.hg_capture(b"manifest").split()
        return manifest
    def get_comment(self, rev):
        "Return a commit's change comment as a string."
        return self.hg_capture(b"log", b"-r", rev, b"--template", b"{desc}\\n")

# More extractors go here

extractors = [GitExtractor(), HgExtractor()]

# No user-serviceable parts below this line

class Fatal(Exception):
    "Unrecoverable error."
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

verbose         = 0
DEBUG_SVNDUMP   = 2    # Debug Subversion dumping
DEBUG_TOPOLOGY  = 2    # Debug repo-extractor logic (coarse-grained)
DEBUG_EXTRACT   = 2    # Debug repo-extractor logic (fine-grained)
DEBUG_FILEMAP   = 3    # Debug building of filemaps
DEBUG_DELETE    = 3    # Debug canonicalization after deletes
DEBUG_IGNORES   = 3    # Debug ignore generation
DEBUG_SVNPARSE  = 4    # Lower-level Subversion parsing details
DEBUG_EMAILIN   = 4    # Debug event round-tripping through mailbox_{out|in}
DEBUG_SHUFFLE   = 4    # Debug file and directory handling
DEBUG_COMMANDS  = 5    # Show commands as they are executed
DEBUG_UNITE     = 5    # Debug mark assignments in merging
DEBUG_LEXER     = 6    # Debug selection-language parsing
quiet = False

global_options = {}

def whoami():
    "Ask various programs that keep track of who you are who you are."
    # Git version-control system
    (nameerr, nameout) = commands.getstatusoutput("git config user.name")
    (emailerr, emailout) = commands.getstatusoutput("git config user.email")
    if nameerr == 0 and nameout and emailerr == 0 and emailout:
        return (nameout, emailout)
    # Various random configs
    for (fn, mine) in (
        ("~/.hgrc", r"username\s*=\s*(.*)\s<([^>]*)>"),                # Mercurial
        ("~/.lynxrc", r"personal_mail_address\s*=\s*(.*)\s<([^>]*)>")  # Lynx
        ):
        fn = os.path.expanduser(fn)
        if os.path.exists(fn):
            for line in open(fn):
                m = re.search(mine, line)
                if m:
                    return (m.group(1), m.group(2))
    # Out of alternatives
    raise Fatal("can't deduce user identity!")

def screenwidth():
    "Return the current width of the terminal window."
    with popen_or_die(b'stty size', b"rb") as tp:
        return int(tp.read().split()[1])

def debug_enable(level):
    "Hook for debug filtering."
    return verbose >= level

def nuke(directory, legend):
    "Remove a (large) directory, with a progress indicator."
    with Baton(legend, enable=debug_enable(DEBUG_SHUFFLE)) as baton:
        for root, dirs, files in os.walk(directory, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
                baton.twirl()
            for name in dirs:
                os.rmdir(os.path.join(root, name))
                baton.twirl()
    try:
        os.rmdir(directory)
    except OSError:
        pass

def rfc3339(t):
    "RFC3339 string from Unix time."
    return time.strftime(b"%Y-%m-%dT%H:%M:%SZ", time.gmtime(t))

def complain(msg):
    sys.stdout.flush()
    sys.stderr.write(b"reposurgeon: %s\n" % msg)
    sys.stderr.flush()

def announce(msg):
    sys.stdout.write(b"reposurgeon: %s\n" % msg)

def pacify_pylint(_unused):
    "Head off spurious unused-variable warnings."
    pass

def memoize_iterator(iterator_f, mem_attr = None):
    """From a class method returning an iterator, create
       one which caches the iterator results and replays
       them later. Arguments:
        - iterator_f: the *unbound* class method
        - mem_attr:   the name of the attribute on the class
                      instance that stores the cache
                      (default: _mem_attr_<function name>)
    """
    if mem_attr is None:
        mem_attr = "_mem_attr_" + iterator_f.__name__
    # Define the caching iterator
    def f(self):
        # Obtain the store or create a new one
        # The cache is
        #    - cache: a list containing all values already
        #             yielded by the iterator,
        #    - it:    the iterator, ready to yield the next
        #             uncached value, or already at its end.
        try:
            cache, it = getattr(self, mem_attr, None)
        except (TypeError, ValueError):
            cache = []; it = iterator_f(self)
            setattr(self, mem_attr, (cache, it))
        # Yield values from the list, enlarging the latter
        # if necessary. We use an infinite loop over all
        # integers; when there are no more values available
        # to enlarge the list, the call to next(it) will
        # raise StopIteration which will bubble through our
        # caller and tell him that we have reached our end.
        for pos in itertools.count():
            if len(cache) <= pos: cache.append(next(it))
            yield cache[pos]
    # update_wrapper ensures that f gets all interesting
    # attributes of iterator_f (especially the docstring)
    try:
        return functools.update_wrapper(f, iterator_f)
    except AttributeError:
        # Cython doesn't support setting name or docstring
        return f

def memoized_iterator(mem_attr = None):
    # This is curryification: the goal is that memoized_iterator(A)(f)
    # is equivalent to memoize_iterator(f, A). The reason is that
    # function decorators need to take only the function as argument.
    # In other words, memoized_iterator is a function factory.
    return functools.partial(memoize_iterator, mem_attr = mem_attr)

class Baton:
    "Ship progress indications to stdout."
    def __init__(self, prompt, endmsg='done', enable=False):
        self.prompt = prompt
        self.endmsg = endmsg
        self.countfmt = None
        self.counter = 0
        if enable:
            self.stream = sys.stdout
        else:
            self.stream = None
        self.count = 0
        self.time = 0
    def __enter__(self):
        if self.stream:
            self.stream.write(self.prompt + "...")
            if os.isatty(self.stream.fileno()):
                self.stream.write(b" \b")
            self.stream.flush()
        self.count = 0
        self.time = time.time()
        return self
    def startcounter(self, countfmt, initial=1):
        self.countfmt = countfmt
        self.counter = initial
    def bumpcounter(self):
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if self.countfmt:
                update = self.countfmt % self.counter
                self.stream.write(update + (b"\b" * len(update)))
                self.stream.flush()
            else:
                self.twirl()
        self.counter = self.counter + 1
    def endcounter(self):
        if self.stream:
            w = len(self.countfmt % self.count)
            self.stream.write((b" " * w) + (b"\b" * w))
            self.stream.flush()
        self.countfmt = None
    def twirl(self, ch=None):
        "One twirl of the baton."
        if self.stream is None:
            return
        if os.isatty(self.stream.fileno()):
            if ch:
                self.stream.write(ch)
                self.stream.flush()
                return
            else:
                update = "-/|\\"[self.count % 4]
                self.stream.write(update + (b"\b" * len(update)))
                self.stream.flush()
        self.count = self.count + 1
    def __exit__(self, extype, value_unused, traceback_unused):
        if extype == KeyboardInterrupt:
            self.endmsg = "interrupted"
        if extype == Fatal:
            self.endmsg = "aborted by error"
        if self.stream:
            self.stream.write(b"...(%2.2f sec) %s.\n" \
                              % (time.time() - self.time, self.endmsg))
        return False

class RepoSurgeonEmail(email.message.Message, object):
    "Specialized email message with a distinguishing starter."
    Divider = 78 * "-"
    __hash__ = None
    def __init__(self, **kwargs):
        email.message.Message.__init__(self, **kwargs)
        self.set_unixfrom(RepoSurgeonEmail.Divider)
    @staticmethod
    def readmsg(fp):
        msg = ''
        firstline = fp.readline()
        if not firstline:
            return None
        elif not firstline.startswith(RepoSurgeonEmail.Divider):
            msg = firstline
        while True:
            line = fp.readline()
            if not line:
                break
            if line.startswith(RepoSurgeonEmail.Divider):
                break
            msg += line
        return msg
    def __str__(self):
        out = super(RepoSurgeonEmail, self).as_string(unixfrom=True)
        out = out.replace(b"\n--", b"\n.--")
        out = out.replace(b"\n>From", b"\nFrom")
        return out

class Date(object):
    "A time/date in UTC. Preserves the original TZ information and uses it to convert back when formatting."
    __slots__ = ("timestamp", "tz_offset", "orig_tz_string")
    __hash__ = None
    date_re = re.compile(r"[0-9]+\s*[+-][0-9]+$")
    subsecond_re = re.compile(r"\.[0-9]+Z")
    offset_re = re.compile(r"^([-+]?)([0-9]{2})([0-9]{2})$")
    def __init__(self, text, error=Fatal):
        "Recognize date formats that exporters or email programs might emit."
        # Special case: we may want current time
        if text is None:
            self.timestamp = int(time.time())
            self.tz_offset = 0
            self.orig_tz_string = "+0000"
            return
        # Otherwise, look for git's preferred format, which is a timestamp
        # in UTC followed by an offset to be used as a hint for what
        # timezone to display the date in when converting to other
        # formats
        text = text.strip()
        if Date.date_re.match(text):
            (self.timestamp, self.orig_tz_string) = text.split()
            self.tz_offset = Date.secondsFromOffsetString(self.orig_tz_string)
            self.timestamp = int(self.timestamp)
            return
        # If that didn't work, look for an RFC822 date, which git also
        # accepts. Note, there could be edge cases that Python's parser
        # handles but git doesn't.
        try:
            dt = email.utils.parsedate_tz(text)
            self.tz_offset = dt[9]
            self.timestamp = int(calendar.timegm(dt) - self.tz_offset)
            self.orig_tz_string = text.split()[5]
            return
        except TypeError:
            # time.mktime throws this when it gets None:
            # TypeError: argument must be 9-item sequence, not None
            pass
        # Also accept RFC3339 dates in Zulu time, just because I like them.
        try:
            # Discard subsecond precision, import-stream format can't use it.
            text = re.sub(Date.subsecond_re, b"Z", text)
            rfc3339date = time.strptime(text, b"%Y-%m-%dT%H:%M:%SZ")
            self.timestamp = calendar.timegm(rfc3339date)
            self.orig_tz_string = "+0000"
            self.tz_offset = 0
            return
        except ValueError:
            # time.strptime() throws this
            # "time data 'xxxxxx' does not match format '%Y-%m-%dT%H:%M:%S'"
            pass
        # Date format not recognized
        raise error(b"'%s' is not a valid timestamp" % text)
    @staticmethod
    def secondsFromOffsetString(text):
        m = re.match(Date.offset_re, text)
        if m is not None:
            sign = -1 if m.group(1) == "-" else 1
            hours = int(m.group(2))
            mins = int(m.group(3))
            if hours < -14 or hours > 13 or mins > 59:
                complain(b"dubious UTC offset '%s'." % text)
            return (hours * 60 + mins) * 60 * sign
        else:
            complain(b"invalid UTC offset '%s', assuming +0000 instead." % text)
            return 0
    def rfc3339(self):
        return rfc3339(self.timestamp)
    def rfc822(self):
        "Format as an RFC822 timestamp."
        return time.strftime(b"%a %d %b %Y %H:%M:%S", time.gmtime(self.timestamp + self.tz_offset)) + " " + self.orig_tz_string
    def delta(self, other):
        return other.timestamp - self.timestamp
    @staticmethod
    def tzresolve(tz, tm):
        "Hacky way to beat the Unix timezone database into resolving TZ names."
        if tz[0] in "+-":
            return tz
        oldtz = os.getenv(b"TZ")
        try:
            os.putenv(b"TZ", tz)
            time.tzset()
            localdate = datetime.datetime.fromtimestamp(tm)
            gmdate = datetime.datetime.utcfromtimestamp(tm)
        finally:
            os.putenv(b"TZ", oldtz or b"")
            time.tzset()
        if localdate < gmdate:
            sgn = "-"
            seconds = (gmdate - localdate).seconds
        else:
            sgn = "+"
            seconds = (localdate - gmdate).seconds
        return sgn + (b"%02d" % (seconds / 3600)) + str(b"%02d" % abs(seconds % 3600))
    def __str__(self):
        "Format as a git timestamp."
        return str(self.timestamp) + " " + self.orig_tz_string
    def __eq__(self, other):
        return self.timestamp == other.timestamp
    def __ne__(self, other):
        return self.timestamp != other.timestamp
    def __lt__(self, other):
        return self.timestamp < other.timestamp

class DateTests(unittest.TestCase):
    def test_conversion(self):
        def do_test(init, formats):
            date = Date(init)
            for (func, result) in formats.items():
                self.assertEqual(getattr(date, func)(), result)
        data = [[b'2010-10-27T18:43:32Z',
                 { b'rfc3339': b"2010-10-27T18:43:32Z",
                   b'rfc822': b"Wed 27 Oct 2010 18:43:32 +0000",
                   b'__str__': b"1288205012 +0000" }],
                [b'1288205012 +0000',
                 { b'rfc3339': b"2010-10-27T18:43:32Z",
                   b'rfc822': b"Wed 27 Oct 2010 18:43:32 +0000",
                   b'__str__': b"1288205012 +0000" }],
                [b'Wed 27 Oct 2010 18:43:32 +0000',
                 { b'rfc3339': b"2010-10-27T18:43:32Z",
                   b'rfc822': b"Wed 27 Oct 2010 18:43:32 +0000",
                   b'__str__': b"1288205012 +0000" }]]
        for init, formats in data:
            do_test(init, formats)
    def test_equality(self):
        d1 = Date(b'2010-10-27T18:43:32Z')
        d2 = Date(b'1288205012 +0000')
        d3 = Date(b'Wed 27 Oct 2010 18:43:32 +0000')
        self.assertEqual(d1, d1)
        self.assertEqual(d1, d2)
        self.assertEqual(d1, d3)
        self.assertEqual(d2, d2)
        self.assertEqual(d2, d3)
        self.assertEqual(d3, d3)
        self.assertEqual(Date.tzresolve(b"EST", int(time.time())), b"-0500")
        self.assertEqual(Date.tzresolve(b"-0500", int(time.time())), b"-0500")
        self.assertIn(Date.tzresolve(b"Europe/Warsaw", int(time.time())), (b"+0100", b"+0200"))
    def test_inequality(self):
        d1 = Date(b'Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date(b'Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date(b'Wed 27 Oct 2010 18:43:32 +0100')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        self.assertNotEqual(d1, d3)
        self.assertTrue(d1 > d3)
        self.assertTrue(d3 < d1)
        self.assertNotEqual(d2, d3)
        self.assertTrue(d2 > d3)
        self.assertTrue(d3 < d2)
        d1 = Date(b'2010-10-27T18:43:32Z')
        d2 = Date(b'2010-10-27T18:43:33Z')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
        d1 = Date(b'1288205012 +0000')
        d2 = Date(b'1288205013 +0000')
        self.assertNotEqual(d1, d2)
        self.assertTrue(d1 < d2)
        self.assertTrue(d2 > d1)
    def test_deltas(self):
        d1 = Date(b'Wed 27 Oct 2010 18:43:32 +0000')
        d2 = Date(b'Wed 27 Oct 2010 18:43:33 +0000')
        d3 = Date(b'Wed 27 Oct 2010 18:43:32 +0100')
        self.assertEqual(d1.delta(d2), 1)
        self.assertEqual(d2.delta(d3), -3601)
        self.assertEqual(d3.delta(d1), 3600)
        self.assertEqual(d1.delta(d1), 0)

class Attribution(object):
    "Represents an attribution of a repo action to a person and time."
    __slots__ = ("name", "email", "date")
    __hash__ = None
    attribution_re = re.compile(r"([^<]*\s*)<([^>]*)>(\s*.*)")
    @staticmethod
    def parseaddr(line):
        m = Attribution.attribution_re.match(line)
        if m:
            return (m.group(1).strip(), m.group(2), m.group(3).strip())
        else:
            raise Fatal(b"malformed attribution '%s'" % line)
    def __init__(self, operson=None):
        self.name = self.email = self.date = None
        if operson:
            # Deal with a cvs2svn artifact
            person = operson.replace(b"(no author)", b"no-author")
            (self.name, self.email, self.date) = Attribution.parseaddr(person)
            try:
                self.date = Date(self.date)
            except (ValueError, IndexError):
                raise Fatal(b"malformed attribution date '%s' in '%s'" \
                            % (self.date, operson))
    def email_out(self, _modifiers, msg, hdr):
        "Update an RC822 message object with a representation of this."
        msg[hdr] = self.name + " <" + self.email + ">"
        msg[hdr + "-Date"] = self.date.rfc822()
    def remap(self, authors):
        "Remap the attribution name."
        for (local, (name, mail, timezone)) in authors.iteritems():
            if self.email.lower().startswith(local + "@") or self.email.lower() == local or (not self.email and self.name.lower() == local):
                self.name = name
                self.email = mail
                if timezone:
                    self.date.orig_tz_string = Date.tzresolve(timezone, self.date.timestamp)
                break
    def action_stamp(self):
        return self.date.rfc3339() + b"!" + self.email
    def __eq__(self, other):
        "Compare attributions after canonicalization."
        return (self.name == other.name
                and self.email == other.email
                and self.date == other.date)
    def who(self):
        return self.name + b" <" + self.email + b">"
    def __str__(self):
        return self.name + b" <" + self.email + b"> " + str(self.date)

class Blob(object):
    "Represent a detached blob of data referenced by a mark."
    __slots__ = ("repo", "mark", "pathlist", "colors",
                 "cookie", "start", "size", "deletehook")
    __hash__ = None
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None
        self.pathlist = []      # Set of in-repo paths associated with this blob
        self.colors = []
        self.cookie = None
        self.start = None
        self.size = 0
        self.deletehook = None
    def id_me(self):
        "ID this blob for humans."
        return b"blob@%s" % self.mark
    def paths(self, _pathtype=None):
        "For uniformity with commits and fileops."
        return self.pathlist
    def blobfile(self, create=False):
        "File where the content lives."
        stem = repr(id(self))
        parts = (b"blobs", stem[:3], stem[3:6], stem[6:])
        if create:
            for d in range(len(parts)-1):
                partial = os.path.join(self.repo.subdir(), *parts[:d+1])
                if not os.path.exists(partial):
                    os.mkdir(partial)
        return os.path.join(self.repo.subdir(), *parts)
    def hasfile(self):
        "Does this blob have its own file?"
        return not self.repo.seekstream or self.start is None
    def materialize(self):
        "Materialize this content as a separate file, if it isn't already."
        if not self.hasfile():
            self.set_content(self.get_content())
        return self.blobfile()
    def get_content(self):
        "Get the content of the blob as a string."
        if not self.hasfile():
            self.repo.seekstream.seek(self.start)
            return self.repo.seekstream.read(self.size)
        elif global_options["compressblobs"]:
            with bz2.BZ2File(self.blobfile(), b"rb") as rfp:
                return rfp.read()
        else:
            with open(self.blobfile(), b"rb") as rfp:
                return rfp.read()
    def set_mark(self, mark):
        "Set the blob's mark."
        self.mark = mark
        self.repo._mark_to_object[mark] = self
        return mark
    def forget(self):
        "De-link this commit from its repo."
        self.repo = None
    def set_content(self, text, tell=None):
        "Set the content of the blob from a string."
        self.start = tell
        self.size = len(text)
        if self.hasfile():
            if global_options["compressblobs"]:
                with bz2.BZ2File(self.blobfile(create=True), b"wb") as wfp:
                    return wfp.write(text)
            else:
                with open(self.blobfile(create=True), b"wb") as wfp:
                    wfp.write(text)
    def moveto(self, repo):
        "Change the repo this blob is associated with."
        if self.hasfile():
            oldloc = self.blobfile()
            self.repo = repo
            newloc = self.blobfile(create=True)
            if debug_enable(DEBUG_SHUFFLE):
                announce(b"blob rename calls os.rename(%s, %s)" % (oldloc, newloc))
            os.rename(oldloc, newloc)
        return self
    def clone(self, repo):
        "Clone a copy of this blob, pointing at the same file."
        c = copy.copy(self)
        c.repo = repo
        c.colors = []
        if self.hasfile():
            if debug_enable(DEBUG_SHUFFLE):
                announce(b"blob clone for %s (%s) calls os.link(): %s -> %s" % (self.mark, self.pathlist, self.blobfile(), c.blobfile()))
            os.link(self.blobfile(), c.blobfile(create=True))
        return c
    def dump(self, vcs=None, options=None, realized=None, internals=None):
        pacify_pylint(options)
        pacify_pylint(realized)
        pacify_pylint(internals)
        if self.hasfile() and not os.path.exists(self.blobfile()):
            return ''
        else:
            content = self.get_content()
            if vcs is None and self.repo.vcs and self.repo.vcs.importer:
                vcs = self.repo.vcs
            return "blob\nmark %s\ndata %d\n%s\n" % (self.mark, len(content), content)
    def __str__(self):
        return self.dump()

class Tag(object):
    "Represents an annotated tag."
    __slots__ = ("repo", "name", "color", "committish",
                 "target", "tagger", "comment", "deletehook")
    __hash__ = None
    def __init__(self, repo=None,
                 name=None, committish=None, target=None, tagger=None, comment=None):
        self.repo = None
        self.name = name
        self.color = None
        self.committish = None
        self.target = None
        self.remember(repo, committish=committish, target=target)
        self.tagger = tagger
        self.comment = comment
        self.deletehook = None
    def remember(self, repo, committish=None, target=None):
        "Remember an attachment to a repo and commit."
        self.repo = repo
        if target is not None:
            self.target = target
            self.committish = target.mark
        else:
            self.committish = committish
            if self.repo:
                self.target = self.repo.objfind(self.committish)
        if self.target:
            self.target.attachments.append(self)
    def forget(self):
        "Forget this tag's attachment to its commit and repo."
        if self.target:
            try:
                self.target.attachments.remove(self)
            except ValueError:
                pass
            self.target = None
        self.repo = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this tag for humans."
        return "tag@%s (%s)" % (self.mark, self.name)
    def tags(self, _modifiers, eventnum, _cols):
        "Enable do_tags() to report tags."
        return "%6d\ttag\t%s" % (eventnum+1, self.name)
    def email_out(self, modifiers, eventnum, filter_regexp=None):
        "Enable do_mailbox_out() to report tag metadata."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg["Tag-Name"] = self.name
        if self.tagger:
            self.tagger.email_out(modifiers, msg, b"Tagger")
        msg.set_payload(self.comment)
        if self.comment and not self.comment.endswith(b"\n"):
            complain(b"in tag %s, comment was not LF-terminated." % self.name)
        if filter_regexp:
            for key in msg.keys():
                if not filter_regexp.match(key + ":"):
                    del msg[key]
        return str(msg)
    def email_in(self, msg, fill=False):
        "Update this Tag from a parsed email message."
        if "Tag-Name" not in msg:
            raise Fatal(b"update to tag %s is malformed" % self.name)
        modified = False
        newname = msg["Tag-Name"]
        if self.name != newname:
            if debug_enable(DEBUG_EMAILIN):
                announce(b"in tag %d, Tag-Name is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.name), repr(newname)))
            self.name = newname
            modified = True
        if "Tagger" in msg:
            try:
                (newname, newemail, _extra) = Attribution.parseaddr(msg["Tagger"])
            except ValueError:
                raise Fatal(b"malformed Tagger field")
            if not newname or not newemail:
                raise Fatal(b"can't recognize address in Tagger: %s" % msg[b'Tagger'])
            else:
                if self.tagger.name != newname or self.tagger.email != newemail:
                    (self.tagger.name, self.tagger.email) = (newname, newemail)
                    if debug_enable(DEBUG_EMAILIN):
                        announce(b"in tag %d, Tagger is modified" \
                              % (int(msg["Event-Number"])))
                    modified = True
            if "Tagger-Date" in msg:
                date = Date(msg["Tagger-Date"])
                if self.tagger.date is None or date != self.tagger.date:
                    # Yes, display this unconditionally
                    if self.repo:
                        announce(b"in %s, Tagger-Date is modified '%s' -> '%s' (delta %d)" \
                             % (self.id_me(),
                                self.tagger.date, date,
                                self.tagger.date.delta(date)))
                    self.tagger.date = date
                    modified = True
        newcomment = msg.get_payload()
        if global_options["canonicalize"]:
            newcomment = newcomment.strip().replace(b"\r\n", b"\n") + b'\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce(b"in tag %d, comment is modified %s -> %s" \
                      % (int(msg["Event-Number"]), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        if fill:
            modified = True
            if self.tagger.date is None:
                self.tagger.date = Date(None)
            if self.tagger.name is None:
                (self.tagger.name, self.tagger.email) = whoami()
        return modified
    def undecodable(self, codec=b"utf-8"):
        "Does this tag have undecodable i18n sequences in it?"
        try:
            self.name.decode(codec, b"strict")
            self.tagger.name.decode(codec, b"strict")
            self.comment.decode(codec, b"strict")
            return False
        except UnicodeError:
            return True
    @staticmethod
    def branchname(tagname):
        "Return the full branch reference corresponding to a tag."
        fulltagname = tagname
        if tagname.count(b"/") == 0:
            fulltagname = "tags/" + fulltagname
        if not fulltagname.startswith(b"refs/"):
            fulltagname = "refs/" + fulltagname
        return fulltagname
    def dump(self, vcs=None, options=None, realized=None, internals=None):
        "Dump this tag in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        pacify_pylint(realized)
        pacify_pylint(internals)
        parts = ["tag %s\nfrom %s\n" % (self.name, self.committish)]
        if self.tagger:
            parts.append(b"tagger %s\n" % self.tagger)
        parts.append(b"data %d\n%s\n" % (len(self.comment or b""), self.comment or b""))
        return b"".join(parts)
    def __str__(self):
        return self.dump()

class Reset(object):
    "Represents a branch creation."
    __slots__ = ("repo", "ref", "committish", "target", "deletehook", "color")
    __hash__ = None
    def __init__(self, repo, ref=None, committish=None, target=None):
        self.repo = None
        self.ref = ref
        self.committish = None
        self.target = None
        self.remember(repo, committish=committish, target=target)
        self.deletehook = None
        self.color = None
    def remember(self, repo, committish=None, target=None):
        "Remember an attachment to a repo and commit."
        self.repo = repo
        if target is not None:
            self.target = target
            self.committish = target.mark
        else:
            self.committish = committish
            if self.repo:
                self.target = self.repo.objfind(self.committish)
        if self.target:
            self.target.attachments.append(self)
    def forget(self):
        "Forget this reset's attachment to its commit and repo."
        if self.target:
            try:
                self.target.attachments.remove(self)
            except ValueError:
                pass
            self.target = None
        self.repo = None
    def moveto(self, repo):
        "Change the repo this reset is associated with."
        self.repo = repo
    def tags(self, _modifiers, eventnum, _cols):
        "Enable do_tags() to report resets."
        return "%6d\treset\t%s" % (eventnum+1, self.ref)
    def dump(self, vcs=None, options=None, realized=None, internals=None):
        "Dump this reset in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        pacify_pylint(internals)
        if realized is not None:
            if '^' in self.ref:
                branch = self.ref.split(b"^")[0]
            else:
                branch = self.ref
            realized[branch] = True
        st = "reset %s\n" % self.ref
        if not self.committish:
            return st
        return st + "from %s\n\n" % self.committish
    def __str__(self):
        return self.dump()

class FileOp(object):
    "Represent a primitive operation on a file."
    __slots__ = ("repo", "op", "committish", "source", "target",
                 "mode", "path", "ref", "inline",
                 "sourcedelete", "targetdelete")
    __hash__ = None
    modify_re = re.compile(r"(M) ([0-9]+) (\S+) (.*)")
    sortkey_sentinel = chr(ord(b"/") + 1)
    def __init__(self, repo):
        self.repo = repo
        self.op = None
        self.committish = None
        self.source = None
        self.target = None
        self.mode = None
        self.path = None
        self.ref = None
        self.inline = None
    def setOp(self, op):
        self.op = op
    @staticmethod
    def sortkey(fileop):
        "Compute a key suited for sorting FileOps as git fast-export does."
        # As it says, 'Handle files below a directory first, in case they are
        # all deleted and the directory changes to a file or symlink.'
        # First sort the renames last, then sort lexicographically
        # We append a sentinel to make sure "a/b/c" < "a/b" < "a".
        return (fileop.op == "R",
                (fileop.path or fileop.source or b"") + \
                        fileop.sortkey_sentinel)
    def construct(self, *opargs):
        if opargs[0] == b"M":
            (self.op, self.mode, self.ref, self.path) = opargs
            if isinstance(self.mode, int):
                self.mode = "%06o" % self.mode
        elif opargs[0] == b"D":
            (self.op, self.path) = opargs
        elif opargs[0] == b"N":
            (self.op, self.ref, self.committish) = opargs
        elif opargs[0] in (b"R", b"C"):
            (self.op, self.source, self.target) = opargs
        elif opargs[0] == b"deleteall":
            self.setOp(b"deleteall")
        else:
            raise Fatal(b"unexpected fileop %s" % opargs[0])
        return self
    def parse(self, opline):
        if opline.startswith(b"M"):
            m = FileOp.modify_re.match(opline)
            if not m:
                raise Fatal(b"bad format of M line: %s" % repr(opline))
            (self.op, self.mode, self.ref, self.path) = m.groups()
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
        elif opline[0] == b"N":
            try:
                opline = opline.replace(b"'", r"\'")
                (self.op, self.ref, self.committish) = shlex.split(opline)
            except ValueError:
                raise Fatal(b"ill-formed fileop %s" % repr(opline))
        elif opline[0] == b"D":
            (self.op, self.path) = (b"D", opline[2:].strip())
            if self.path[0] == '"' and self.path[-1] == '"':
                self.path = self.path[1:-1]
        elif opline[0] in (b"R", b"C"):
            try:
                opline = opline.replace(b"'", r"\'")
                (self.op, self.source, self.target) = shlex.split(opline)
            except ValueError:
                raise Fatal(b"ill-formed fileop %s" % repr(opline))
        elif opline == "deleteall":
            self.op = "deleteall"
        else:
            raise Fatal(b"unexpected fileop %s while parsing" % opline)
        return self
    def paths(self, pathtype=None):
        "Return the set of all paths touched by this file op."
        if not pathtype:
            pathtype = set(('M', 'D', 'R', 'C', 'N'))
        if self.op not in pathtype:
            return set()
        if self.op in (b"M", b"D"): return {self.path}
        if self.op in (b"R", b"C"): return {self.source, self.target}
        # Ugh...this isn't right for deleteall, but since we don't expect
        # to see that except at branch tips we'll ignore it for now.
        if self.op in (b"N", b"deleteall"): return set()
        raise Fatal(b"unknown fileop type")
    def relevant(self, other):
        "Do two fileops touch the same file(s)?"
        if self.op == "deleteall" or other.op == "deleteall":
            return True
        else:
            return self.paths() & other.paths()
    def dump(self, vcs=None, options=None):
        "Dump this fileop in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        if self.op == b"M":
            showmode = self.mode
            if isinstance(self.mode, int):
                showmode = "%06o" % self.mode
            parts = [" ".join((self.op, showmode, self.ref)), b" "]
            if len(self.path.split()) > 1:
                parts.extend((b'"', self.path, b'"'))
            else:
                parts.append(self.path)
            if self.ref == 'inline':
                parts.append(b"\ndata %d\n%s" % (len(self.inline), self.inline))
        elif self.op == b"N":
            parts = [" ".join((self.op, self.ref, self.committish)), b"\n"]
            if self.ref == 'inline':
                parts.append(b"data %d\n%s" % (len(self.inline), self.inline))
        elif self.op == b"D":
            parts = [b"D "]
            if len(self.path.split()) > 1:
                parts.extend((b'"', self.path, b'"'))
            else:
                parts.append(self.path)
        elif self.op in (b"R", b"C"):
            parts = [b'%s "%s" "%s"' %  (self.op, self.source, self.target)]
        elif self.op == "deleteall":
            parts = [self.op]
        else:
            raise Fatal(b"unexpected fileop %s while writing" % self.op)
        return b"".join(parts)
    def __str__(self):
        return self.dump(self.repo.vcs)

class Callout(object):
    "Stub object for callout marks in incomplete repository segments."
    __slots__ = ("mark", "branch", "_child_nodes")
    __hash__ = None
    def __init__(self, mark):
        self.mark = mark
        self.branch = None
        self._child_nodes = []
    def callout(self):
        return self.mark

class Commit(object):
    "Generic commit object."
    __slots__ = ("repo", "mark", "authors", "committer", "comment",
                 "branch", "fileops", "properties", "filemap", "color",
                 "legacy_id", "common", "splits", "deletehook", "attachments",
                 "_parent_nodes", "_child_nodes", "_pathset")
    __hash__ = None
    def __init__(self, repo=None):
        self.repo = repo
        self.mark = None             # Mark name of commit (may be None)
        self.authors = []            # Authors of commit
        self.committer = None        # Person responsible for committing it.
        self.comment = None          # Commit comment
        self.branch = None           # branch name
        self.fileops = []            # blob and file operation list
        self.properties = collections.OrderedDict()         # commit properties (extension)
        self.filemap = None
        self.color = None
        self.legacy_id = None        # Commit's ID in an alien system
        self.common = None           # Used only by the Subversion parser
        self.splits = None           # split command increments this
                                     # to avoid creating multiple new commits
                                     # with duplicate marks
        self.deletehook = None       # Hook used during deletion operations
        self.attachments = []        # Tags pointing at this commit
        self._parent_nodes = []      # list of parent nodes
        self._child_nodes = []       # list of child nodes
        self._pathset = None
    def index(self):
        "Our 0-origin index in our repo."
        return self.repo.index(self)
    def id_me(self):
        "ID this commit for humans."
        myid = "commit@%s" % self.mark
        if self.legacy_id:
            myid += "=<%s>" % self.legacy_id
        return myid
    def when(self):
        "Imputed timestamp for sorting after unites."
        return self.committer.date.timestamp
    def set_branch(self, branch):
        "Set the repo's branch field, optimizing for fast comparisons."
        self.branch = intern(branch)
    def operations(self):
        "Fileops associated with this commit; hides how this is represented."
        return self.fileops
    def set_operations(self, ops):
        "Replace the set of fileops associated with this commit."
        self.fileops = ops
    def append_operation(self, op):
        "Append to the set of fileops associated with this commit."
        self.fileops.append(op)
    def prepend_operation(self, op):
        "Prepend to the set of fileops associated with this commit."
        self.fileops.insert(0, op)
    def sort_operations(self):
        "Sort fileops the same way git-fast-export does."
        self.fileops.sort(key=FileOp.sortkey)
    def clone(self, repo=None):
        "Clone this commit, without its fileops, color and children."
        c = copy.copy(self)
        c.committer = copy.deepcopy(self.committer)
        c.authors = copy.deepcopy(self.authors)
        c.set_operations([])
        c.filemap = None
        c._pathset = None
        c.color = None
        if repo is not None:
            c.repo = repo
        c._child_nodes = []
        # use the encapsulation to set parents instead of relying
        # on the copy, so that Commit can do its bookkeeping.
        c._parent_nodes = [] # avoid confusing set_parents()
        c.set_parents(list(self.parents()))
        return c
    def showlegacy(self):
        "Show a legacy ID in the expected form for the ancestral system."
        if not self.legacy_id:
            return None
        # Special case for Subversion
        if self.repo and self.repo.vcs and self.repo.vcs.name == b"svn":
            return b"r" + self.legacy_id
        else:
            return self.legacy_id
    def lister(self, _modifiers, eventnum, cols):
        "Enable do_list() to report commits."
        topline = self.comment.split(b"\n")[0]
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        if self.legacy_id:
            legacy = "<%s>" % self.legacy_id
            summary += "%6s " % legacy
        report = (summary + topline)
        if cols:
            report = report[:cols]
        return report
    def tip(self, _modifiers, eventnum, cols):
        "Enable do_tip() to report deduced branch tips."
        summary = "%6d %s %6s " % \
                      (eventnum+1, self.committer.date.rfc3339(), self.mark)
        report = (summary + self.head())
        if cols:
            report = report[:cols]
        return report
    def stamp(self, _modifiers, _eventnum, cols):
        "Enable do_stamp() to report action stamps."
        report = b"<" + self.action_stamp() + b"> " + self.comment.split(b"\n")[0]
        if cols:
            report = report[:cols]
        return report
    def tags(self, _modifiers, eventnum, _cols):
        "Enable do_tags() to report tag tip commits."
        if not self.branch or not b"/tags/" in self.branch:
            return
        if self.has_children():
            successor_branches = {child.branch for child in self.children() if child.parents()[0] == self}
            if len(successor_branches) == 1 and successor_branches.pop() == self.branch:
                return
        return b"%6d\tcommit\t%s" % (eventnum+1, self.branch)
    def email_out(self, modifiers, eventnum, filter_regexp=None):
        "Enable do_mailbox_out() to report commit metadate."
        msg = RepoSurgeonEmail()
        msg[b"Event-Number"] = str(eventnum+1)
        msg[b"Event-Mark"] = self.mark
        msg[b"Branch"] = self.branch
        msg[b"Parents"] = b" ".join(self.parent_marks())
        if self.authors:
            self.authors[0].email_out(modifiers, msg, b"Author")
            for (i, coauthor) in enumerate(self.authors[1:]):
                coauthor.email_out(msg, b"Author" + repr(2+i))
        self.committer.email_out(modifiers, msg, b"Committer")
        if self.legacy_id:
            msg[b"Legacy-ID"] = self.legacy_id
        for (name, value) in self.properties.iteritems():
            hdr = "-".join(s.capitalize() for s in name.split(b"-"))
            value = value.replace(b"\n", r"\n")
            value = value.replace(b"\t", r"\t")
            msg["Property-" + hdr] = value
        msg.set_payload(self.comment)
        if not self.comment.endswith(b"\n"):
            complain(b"in commit %s, comment was not LF-terminated." % self.mark)
        if filter_regexp:
            for key in msg.keys():
                if not filter_regexp.match(key + ":"):
                    del msg[key]
        return str(msg)
    def action_stamp(self):
        "Control how a commit stamp is made."
        # Prefer the author stamp because that doesn't change when patches
        # are replayed onto a repository, while the commit stamp will.
        if self.authors:
            self.authors[0].action_stamp()
        return self.committer.action_stamp()
    def email_in(self, msg, fill=False):
        "Update this commit from a parsed email message."
        modified = False
        if b"Branch" in msg:
            if self.branch != msg[b"Branch"]:
                modified = True
            self.set_branch(msg[b"Branch"])
        if b"Parents" in msg:
            if self.parent_marks() != msg[b"Parents"].split():
                modified = True
            self.set_parent_marks(msg[b"Parents"].split())
        if b"Committer" in msg:
            try:
                (newname, newemail, _extra) = Attribution.parseaddr(msg[b"Committer"])

            except ValueError:
                raise Fatal(b"malformed Committer field")
            if not newemail:
                raise Fatal(b"can't recognize address in Committer: %s" % msg[b"Committer"])
            else:
                if self.committer.name != newname or self.committer.email != newemail:
                    (self.committer.name, self.committer.email) = (newname, newemail)
                    # Yes, display this unconditionally
                    if self.repo:
                        announce(b"in %s, Committer is modified" % self.id_me())
                    modified = True
        if "Committer-Date" in msg:
            date = Date(msg[b"Committer-Date"])
            if self.committer.date is None or date != self.committer.date:
                # Yes, display this unconditionally
                if self.repo:
                    announce(b"in %s, Committer-Date is modified '%s' -> '%s' (delta %d)" \
                          % (self.id_me(),
                             self.committer.date, date,
                             self.committer.date.delta(date)))
                self.committer.date = date
                modified = True
        if b"Author" in msg:
            author_re = re.compile(b"Author[0-9]*$")
            # Potential minor bug here if > 10 authors;
            # lexicographic sort order doesn't match numeric
            # msg is *not* a dict so the .keys() is correct
            authorkeys = sorted(filter(author_re.match, msg.keys()))
            for i in range(len(authorkeys) - len(self.authors)):
                self.authors.append(Attribution())
            # Another potential minor bug: permuting the set of authors
            # will look like a modification, as old and new authors are
            # compaired pairwise rather than set equality being checked.
            # Possibly a feature if one thinks order is significant, but
            # I just did it this way because it was easier.
            for (i, hdr) in enumerate(authorkeys):
                try:
                    (newname, newemail, _extra) = Attribution.parseaddr(msg[hdr])
                except ValueError:
                    raise Fatal(b"malformed Author field")
                if not newemail:
                    raise Fatal(b"can't recognize address in %s: %s" % (hdr, msg[hdr]))
                else:
                    if self.authors[i].name != newname or self.authors[i].email != newemail:
                        (self.authors[i].name, self.authors[i].email) = (newname, newemail)
                        if debug_enable(DEBUG_EMAILIN):
                            announce(b"in commit %s, Author #%d is modified" \
                                  % (msg[b"Event-Number"], i+1))
                        modified = True
                if hdr + b"-Date" in msg:
                    date = Date(msg[hdr + b"-Date"])
                    if date != self.authors[i].date:
                        # Yes, display this unconditionally
                        if self.repo:
                            announce(b"in event %s, %s-Date #%d is modified" \
                                     % (msg[b"Event-Number"], hdr, i+1))
                        self.authors[i].date = date
                        modified = True
        if b"Legacy-ID" in msg:
            if msg[b"Legacy-ID"] != self.legacy_id:
                modified = True
                self.legacy_id = msg[b"Legacy-ID"]
        newprops = collections.OrderedDict()
        for prophdr in msg.keys():
            if not prophdr.startswith(b"Property-"): continue
            propkey = prophdr[9:].lower()
            propval = msg[prophdr]
            if propval == b"True":
                propval = True
            elif propval == b"False":
                propval = False
            else:
                propval = propval.replace(r"\n", b"\n")
                propval = propval.replace(r"\t", b"\t")
            newprops[propkey] = propval
        modified |= (newprops != self.properties)
        self.properties = newprops
        newcomment = msg.get_payload()
        if global_options[b"canonicalize"]:
            newcomment = newcomment.strip() + b'\n'
        if newcomment != self.comment:
            if debug_enable(DEBUG_EMAILIN):
                announce(b"in %s, comment is modified %s -> %s" \
                      % (self.id_me(), repr(self.comment), repr(newcomment)))
            modified = True
            self.comment = newcomment
        if fill:
            modified = True
            if self.committer.date is None:
                self.committer.date = Date(None)
            if self.committer.name is None:
                (self.committer.name, self.committer.email) = whoami()
        return modified
    def set_mark(self, mark):
        "Set the commit's mark."
        self.mark = mark
        self.repo._mark_to_object[mark] = self
        return mark
    def forget(self):
        "De-link this commit from its parents."
        self.set_parents([])
        for fileop in self.operations():
            if fileop.op == b'N':
                self.repo.inlines -=1
        self.repo = None
    def moveto(self, repo):
        "Change the repo this commit is associated with."
        for fileop in self.operations():
            fileop.repo = repo
            if fileop.op == b'N':
                self.repo.inlines -=1
                repo.inlines += 1
        self.repo = repo
    # Hide the parent list behind an interface, so that we can memoize
    # the computation, which is very expensive and frequently
    # performed.
    def parents(self):
        "Get a list of this commit's parents."
        return self._parent_nodes
    def parent_marks(self):
        return [x.mark for x in self._parent_nodes]
    def set_parent_marks(self, marks):
        self.set_parents([self.repo.objfind(x) for x in marks])
    def set_parents(self, parents):
        for parent in self._parent_nodes:
            # remove all occurences of self in old parent's children cache
            parent._child_nodes = [n for n in parent._child_nodes if n is not self]
        self._parent_nodes = parents
        assert all(self._parent_nodes)
        for parent in self._parent_nodes:
            parent._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def add_parent(self, mark):
        if isinstance(mark, Commit):
            newparent = mark
        else:
            newparent = self.repo.objfind(mark)
        if not newparent:
            raise Fatal(b"ill-formed stream: cannot resolve %s" % mark)
        self._parent_nodes.append(newparent)
        newparent._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def callout(self):
        "Generate a callout for this commit."
        return self.action_stamp()
    @staticmethod
    def is_callout(mark):
        "Is the specified mark field a callout?"
        return b"!" in mark
    def add_callout(self, mark):
        self._parent_nodes.append(Callout(mark))
    def insert_parent(self, idx, mark):
        newparent = self.repo.objfind(mark)
        assert(newparent)
        self._parent_nodes.insert(idx, newparent)
        newparent._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def remove_parent(self, event):
        # remove *all* occurences of event in parents
        self._parent_nodes = [n for n in self._parent_nodes if n is not event]
        # and all occurences of self in events children
        event._child_nodes = [n for n in event._child_nodes if n is not self]
        self.repo.invalidate_manifests()
    def replace_parent(self, e1, e2):
        self._parent_nodes[self._parent_nodes.index(e1)] = e2
        e1._child_nodes.remove(self)
        e2._child_nodes.append(self)
        self.repo.invalidate_manifests()
    def has_parents(self):
        return bool(self._parent_nodes)
    def has_callouts(self):
        return bool([c for c in self._parent_nodes if isinstance(c, Callout)])
    def children(self):
        "Get a list of this commit's children."
        return self._child_nodes
    def child_marks(self):
        return [x.mark for x in self._child_nodes]
    def has_children(self):
        "Predicate - does this commit have children?"
        return bool(self._child_nodes)
    def first_child(self):
        "Get the first child of this commit, or None if not has_children()."
        return self._child_nodes[0]
    def descended_from(self, other):
        "Is this commit a descendent of the specified other?"
        if not self.has_parents() or self.committer.date < other.committer.date:
            return False
        elif other in self.parents():
            return True
        else:
            return any(parent.descended_from(other) \
                        for parent in self.parents())
    def cliques(self):
        "Return a dictionary mapping filenames to associated M cliques."
        cliques = collections.defaultdict(list)
        for (i, fileop) in enumerate(self.operations()):
            if fileop.op == b"M": cliques[fileop.path].append(i)
        return cliques
    def fileop_dump(self):
        "Dump file ops without data or inlines; used for debugging only."
        print(b"commit %d, mark %s:" % (self.repo.find(self.mark)+1, self.mark))
        for (i, op) in enumerate(self.operations()):
            if op is not None:
                print(b"%d: %-20s" % (i, str(op)))
    def paths(self, pathtype=None):
        "Return the set of all paths touched by this commit."
        if self._pathset is None:
            self._pathset = set()
            for fileop in self.operations():
                self._pathset |= fileop.paths(pathtype)
        return self._pathset
    def invalidate_pathset_cache(self):
        "Force a rebuild on the next call to paths()."
        self._pathset = None
    def visible(self, path):
        "Is the specified path modified and not deleted in the ancestors?"
        ancestor = self
        while True:
            parents = ancestor.parents()
            if not parents:
                break
            else:
                ancestor = parents[0]
                for fileop in ancestor.operations():
                    if fileop.op == b"D" and fileop.path == path:
                        break
                    elif fileop.op == b"M" and fileop.path == path:
                        return ancestor
                    elif fileop.op in (b"R", b"C") and fileop.target == path:
                        return ancestor
        return None
    def manifest(self):
        "Return a map from paths to marks for files existing at this commit."
        self.repo._has_manifests = True
        sys.setrecursionlimit(max(
                sys.getrecursionlimit(),
                len(self.repo.events) * 2))
        return self._manifest()
    def _manifest(self):
        if self.filemap is not None:
            return self.filemap
        # Get the first parent manifest, or an empty one.
        try:
            ancestors = self.parents()[0]._manifest().snapshot()
        except IndexError:
            ancestors = PathMap()
        # Take own fileops into account.
        for fileop in self.operations():
            if fileop.op == b'M':
                ancestors[fileop.path] = (fileop.mode, fileop.ref, fileop.inline)
            elif fileop.op == b'D':
                if fileop.path in ancestors:
                    del ancestors[fileop.path]
            elif fileop.op == b'C':
                ancestors[fileop.target] = ancestors[fileop.source]
            elif fileop.op == b'R':
                ancestors[fileop.target] = ancestors[fileop.source]
                if fileop.source in ancestors:
                    del ancestors[fileop.source]
            elif fileop.op == b'deleteall':
                ancestors = PathMap()
        self.filemap = ancestors
        return ancestors
    def canonicalize(self):
        "Replace fileops by a minimal set of D and M with the same result."
        # If last fileop is a deleteall, only keep that.
        try:
            lastop = self.operations()[-1]
        except IndexError:
            return
        else:
            if lastop.op == b"deleteall":
                self.set_operations([lastop])
                return
        # Fetch the tree state before us...
        try:
            parent = self.parents()[0]
        except IndexError:
            parent = PathMap()
        else:
            parent = parent.manifest()
        # ... and after our file operations have been applied.
        current = self.manifest()
        # Get paths touched by non-deleteall operations.
        paths = self.paths()
        # Generate needed D fileops.
        if any(op.op == b"deleteall" for op in self.operations()):
            # Any file in the parent tree might disappear.
            check_delete = parent
        else:
            # Only files touched by non-deleteall ops might disappear.
            check_delete = paths
        new_ops = []
        self.set_operations(new_ops)
        for path in check_delete:
            if path in parent and path not in current:
                fileop = FileOp(self.repo)
                fileop.construct(b"D", path)
                new_ops.append(fileop)
        # Generate needed M fileops.
        # Only paths touched by non-deleteall ops can be changed.
        for path in paths:
            try:
                mode, mark, inline = current[path]
            except TypeError:
                continue
            if (mode, mark, inline) != parent[path]:
                fileop = FileOp(self.repo)
                fileop.construct(b"M", mode, mark, path)
                if mark == "inline":
                    fileop.inline = inline
                new_ops.append(fileop)
        # Finishing touches:
        self.sort_operations()
        self._pathset = None
    def alldeletes(self, killset=None):
        "Is this an all-deletes commit?"
        if killset is None:
            killset = {b"D", b"deleteall"}
        return all(fileop.op in killset for fileop in self.operations())
    def checkout(self, directory=None):
        "Make a directory with links to files in a specified checkout."
        if not directory:
            directory = os.path.join(self.repo.subdir(), self.mark)
        try:
            if not os.path.exists(directory):
                os.mkdir(directory)
            for (path, (_, mark, inline)) in self.manifest().iteritems():
                fullpath = os.path.join(directory, path)
                fulldir = os.path.dirname(fullpath)
                if not os.path.exists(fulldir):
                    os.makedirs(fulldir)

                if mark == "inline":
                    with open(fullpath, b"wb") as wfp:
                        wfp.write(inline)
                else:
                    blob = self.repo.objfind(mark)
                    if blob.hasfile():
                        os.link(blob.blobfile(), fullpath)
                    else:
                        with open(fullpath, b"wb") as wfp:
                            wfp.write(blob.get_content())
        except OSError:
            raise Recoverable(b"could not create checkout directory or files.")
        return directory
    def head(self):
        "Return the branch to which this commit belongs."
        if self.branch.startswith(b"refs/heads/") or not self.has_children():
            return self.branch
        rank = 0; child = None # pacify pylint
        for rank, child in enumerate(self.children()):
            if child.branch == self.branch:
                return child.head()
        if rank == 0:
            return child.head() # there was only one child
        raise Recoverable(b"can't deduce a branch head for %s" % self.mark)
    def references(self, mark):
        "Does this commit reference a specified blob mark?"
        for fileop in self.operations():
            if fileop.op == b'M' and fileop.ref == mark:
                return True
        return False
    def blob_by_name(self, name):
        "Look up file content by name."
        for fileop in self.operations():
            if fileop.op == b'M' and fileop.path == name:
                return self.repo.objfind(fileop.ref).get_content()
        return False
    def undecodable(self, codec=b"utf-8"):
        "Does this commit have undecodable i18n sequences in it?"
        try:
            self.committer.name.decode(codec, b"strict")
            for author in self.authors:
                author.name.decode(codec, b"strict")
            self.comment.decode(codec, b"strict")
            return False
        except UnicodeError:
            return True
    def delete(self, policy=None):
        "Delete this commit from its repository."
        self.repo.delete([self.index()], policy)
    def dump(self, vcs=None, options=None, realized=None, internals=None):
        "Dump this commit in import-stream format."
        pacify_pylint(options)
        if vcs is None and self.repo.vcs and self.repo.vcs.importer:
            vcs = self.repo.vcs
        parts = []
        options = options or []
        incremental = False
        if '--noincremental' not in options:
            if realized is not None and self.has_parents():
                if self.branch not in realized and self.parents()[0].branch not in realized:
                    incremental = True
        if incremental:
            parts.append(b"reset %s^0\n\n" % self.branch)
        parts.append(b"commit %s\n" % self.branch)
        if self.legacy_id:
            parts.append(b"#legacy-id %s\n" % self.legacy_id)
        if realized is not None:
            realized[self.branch] = True
        if self.mark:
            parts.append(b"mark %s\n" % self.mark)
        if self.authors:
            for author in self.authors:
                parts.append(b"author %s\n" % author)
        if self.committer:
            parts.append(b"committer %s\n" % self.committer)
        if self.comment is not None:
            comment = self.comment
            if options and b"--legacy" in options and self.legacy_id:
                comment += "\nLegacy-ID: %s\n" % self.legacy_id
            parts.append(b"data %d\n%s" % (len(comment), comment))
        if b"nl-after-comment" in self.repo.export_style():
            parts.append(b"\n")
        parents = self.parents()
        if parents:
            ancestor = parents[0]
            if (not internals and not incremental) or ancestor.mark in internals:
                parts.append(b"from %s\n" % ancestor.mark)
            elif b'--callout' in options:
                parts.append(b"from %s\n" % ancestor.callout())
        for ancestor in parents[1:]:
            if not internals or ancestor.mark in internals:
                nugget = ancestor.mark
            else:
                nugget = ancestor.callout()
            parts.append(b"merge %s\n" % nugget)
        if vcs and vcs.properties:
            for (name, value) in self.properties.iteritems():
                if value in (True, False):
                    if value:
                        parts.append(b"property %s\n" % name)
                else:
                    parts.append(b"property %s %d %s\n" % (name, len(str(value)), str(value)))
        parts.extend(op.dump(vcs) + b"\n" for op in self.operations())
        if not "no-nl-after-commit" in self.repo.export_style():
            parts.append(b"\n")
        return b"".join(parts)
    def __str__(self):
        return self.dump()

class Passthrough(object):
    "Represents a passthrough line."
    __slots__ = ("text", "deletehook", "color")
    __hash__ = None
    def __init__(self, line):
        self.text = line
        self.deletehook = None
        self.color = None
    def email_out(self, _modifiers, eventnum, _filter_regexp=None):
        "Enable do_mailbox_out() to report these."
        msg = RepoSurgeonEmail()
        msg["Event-Number"] = str(eventnum+1)
        msg.set_payload(self.text)
        return str(msg)
    def email_in(self, msg):
        self.text = msg.get_payload()
    def dump(self, vcs=True, options=None, realized=None, internals=None):
        "Dump this passthrough in import-stream format."
        pacify_pylint(vcs)
        pacify_pylint(options)
        pacify_pylint(realized)
        pacify_pylint(internals)
        return self.text
    def __str__(self):
        return self.dump()

# Generic extractor code begins here

class signature:
    "A file signature - file path, hash value of content and permissions."
    def __init__(self, path):
        self.path = path
        self.hashval = None
        self.perms = None
        if not os.path.isdir(path):
            with open(path, b"rb") as fp:
                self.hashval = hashlib.sha1(fp.read()).hexdigest()
            self.perms = os.stat(path).st_mode
            # Map to the restricted set of modes that are allowed in
            # the stream format.
            if self.perms & 0o100700 == 0o100700:
                self.perms = 0o100755
            elif self.perms & 0o100600 == 0o100600:
                self.perms = 0o100644
    def __eq__(self, other):
        #if debug_enable(DEBUG_EXTRACT):
        #    announce(b"%s == %s -> %s" % (str(self),
        #                                 str(other),
        #                                 self.__dict__ == other.__dict__))
        return self.__dict__ == other.__dict__
    def __ne__(self, other):
        return not signature.__eq__(self, other)
    def __str__(self):
        return "<%s:%s:%s>" % (self.path, b"%6o" % self.perms, self.hashval[:4])

def capture(command):
    "Run a specified command, capturing the output."
    if debug_enable(DEBUG_COMMANDS):
        announce(b"%s: capturing %s" % (rfc3339(time.time()), command))
    try:
        content = subprocess.check_output(command, shell=True)
    except (subprocess.CalledProcessError, OSError) as oe:
        raise Fatal(b"execution of '%s' failed: %s" % (command, oe))
    if debug_enable(DEBUG_COMMANDS):
        sys.stderr.write(content)
    return content

class PathMap(object):
    """Represent the set of filenames visible in a Subversion
    revision, using copy-on-write to keep the size of the structure in
    line with the size of the Subversion repository metadata."""
    __slots__ = ("shared", "maxid", "snapid", "store")
    __hash__ = None
    _self_value = object()
    def __init__(self, other = None):
        # The instance may be a child of several other PathMaps if |shared|
        # is True. |snapid| is an integer unique among related PathMaps,
        # and |maxid| is a list (for reference sharing) whose only value is
        # the maximum |snapid| of the collection. |store| is a dict mapping
        # single-component names to lists of values indexed by snapids. The
        # values which can be other PathMaps (for directories) or anything
        # except PathMaps and None (for files).
        if not isinstance(other, PathMap):
            self.store = {}
            self.maxid = [0]
            self.snapid = 0
        else:
            self.store = other.store
            self.maxid = other.maxid
            self.snapid = self.maxid[0] = self.maxid[0] + 1
        self.shared = False
    def snapshot(self):
        "Return a copy-on-write snapshot of the set."
        r = PathMap(self)
        if self.snapid < r.snapid - 1:
            # Late snapshot of an "old" PathMap. Restore values which may
            # have changed since. This is uncommon, don't over-optimize.
            for component in self.store: # _elt_items() would skip None
                r._elts_set(component, self._elts_get(component))
        for _, v in r._elts_items():
            if isinstance(v, PathMap):
                v.shared = True
        return r
    def copy_from(self, target_path, source_pathset, source_path):
        "Insert, at target_path, a snapshot of source_path in source_pathset."
        source_obj = source_pathset._find(source_path)
        if source_obj is None:
            return
        if source_obj is source_pathset:
            # Do not share toplevel instances, only inner ones
            source_obj = source_obj.snapshot()
        elif isinstance(source_obj, PathMap):
            source_obj.shared = True
        self._insert(target_path, source_obj)
    def ls_R(self, path):
        elt = self._find(path)
        if isinstance(elt, PathMap):
            return iter(elt)
        return iter(()) # empty iterator
    def __contains__(self, path):
        "Return true if path is present in the set as a file."
        elt = self._find(path)
        return not isinstance(elt, PathMap) and elt is not None
    def __getitem__(self, path):
        "Return the value associated with a specified path."
        elt = self._find(path)
        if elt is None or isinstance(elt, PathMap):
            # This is not quite like indexing, which would throw IndexError
            return None
        return elt
    def __setitem__(self, path, value):
        "Add a filename to the set, with associated value (not None)."
        assert value is not None
        self._insert(path, value)
    def __delitem__(self, path):
        """Remove a filename, or all descendents of a directory name,
        from the set."""
        basename, components = self._split_path(path)
        assert(not self.shared)
        for component in components:
            nxt = self._elts_get(component)
            if not isinstance(nxt, PathMap):
                return
            if nxt.shared:
                nxt = self._elts_set(component, nxt.snapshot())
            self = nxt
        # Set value to None since PathMap doesn't tell None and absence apart
        self._elts_set(basename, None)
    def __nonzero__(self):
        "Return true if any filenames are present in the set."
        return any(v for _, v in self._elts_items())
    def __len__(self):
        "Return the number of files in the set."
        return sum(len(v) if isinstance(v, PathMap) else 1
                for _, v in self._elts_items())
    def iteritems(self):
        for (name, value) in sorted(self._elts_items()):
            if isinstance(value, PathMap):
                for path, v in value.iteritems():
                    yield (os.path.join(name, path), v)
            elif value is not None:
                yield (name, value)
    def __iter__(self):
        return itertools.imap(operator.itemgetter(0), self.iteritems())
    def __str__(self):
        return '<PathMap: {}>'.format(b' '.join(self))
    # Return the current value associated with the component in the store
    def _elts_get(self, component):
        snaplist = self.store.get(component) or [None]
        return snaplist[min(self.snapid, len(snaplist) - 1)]
    # Set the current value associated with the component in the store
    def _elts_set(self, component, value):
        snaplist = self.store.setdefault(component, [None])
        needed = min(self.maxid[0], self.snapid + 1) + 1
        if len(snaplist) < needed:
            last = snaplist[-1]
            snaplist.extend(last for _ in range(len(snaplist), needed))
        snaplist[self.snapid] = value
        return value
    # Iterate through (component, current values) pairs
    def _elts_items(self):
        snapid = self.snapid
        for component, snaplist in self.store.iteritems():
            if component is self._self_value: continue
            val = snaplist[min(snapid, len(snaplist) - 1)]
            if val is not None: yield (component, val)
    # Insert obj at the location given by components.
    def _insert(self, path, obj):
        basename, components = self._split_path(path)
        if not basename:
            return
        assert(not self.shared)
        for component in components:
            nxt = self._elts_get(component)
            if not isinstance(nxt, PathMap):
                nxt = self._elts_set(component, PathMap())
            elif nxt.shared:
                nxt = self._elts_set(component, nxt.snapshot())
            self = nxt
        self._elts_set(basename, obj)
    # Return the object at the location given by components--either
    # the associated value if it's present as a filename, or a PathMap
    # containing the descendents if it's a directory name.  Return
    # None if the location does not exist in the set.
    def _find(self, path):
        basename, components = self._split_path(path)
        if not basename:
            return self
        for component in components:
            self = self._elts_get(component)
            if not isinstance(self, PathMap):
                return None
        return self._elts_get(basename)
    # Return a list of the components in path in reverse order.
    @staticmethod
    def _split_path(path):
        if isinstance(path, str):
            components = filter(None, os.path.normpath(path).split(os.sep))
            return (components.pop() if components else None, components)
        else:
            return (PathMap._self_value,
                    filter(None, os.path.normpath(path[0]).split(os.sep)))

class RepoStreamer:
    "Repository factory driver class for all repo analyzers."
    def __init__(self, extractor):
        self.markseq = 0
        self.tagseq = 0
        self.commits = {}
        self.commit_map = {}
        self.filemap = {}
        self.hash_to_mark = {}
        self.baton = None
        self.extractor = extractor
    def __newmark(self):
        self.markseq += 1
        mark = ":" + str(self.markseq)
        return mark
    def extract(self, repo, progress=True):
        if not self.extractor.isclean():
            raise Recoverable(b"directory %s has unsaved changes." % os.getcwd())
        repo.makedir()
        with Baton(prompt=b"Extracting", enable=progress) as self.baton:
            repo.addEvent(Passthrough("#reposurgeon sourcetype %s\n" \
                                      % self.extractor.vcstype.name), where=0)
            self.extractor.analyze(self.baton)
            self.extractor.pre_extract(repo)
            #saved_umask = os.umask(0)
            consume = copy.copy(self.extractor.get_revlist())
            while consume:
                revision = consume.pop(0)
                commit = Commit(repo)
                self.baton.twirl()
                present = self.extractor.checkout(revision, self.filemap)
                parents = self.extractor.get_parents(revision)
                commit.committer = Attribution(self.extractor.get_committer(revision))
                commit.authors = [Attribution(a) \
                                  for a in self.extractor.get_authors(revision)]
                commit.set_parents([self.commit_map[rev] for rev in parents])
                commit.set_branch(self.extractor.get_branch(revision))
                commit.comment = self.extractor.get_comment(revision)
                if debug_enable(DEBUG_EXTRACT):
                    msg = commit.comment
                    if msg == None:
                        msg = b""
                    announce(b"r%s: comment '%s'" % (revision, msg.strip()))
                self.filemap[revision] = {}
                for rev in parents:
                    self.filemap[revision].update(self.filemap[rev])
                if present:
                    removed = set(self.filemap[revision]) - set(present)
                    for path in present:
                        if os.path.isdir(path):
                            continue
                        if not os.path.exists(path):
                            announce(b"r%s: expected path %s does not exist!" % \
                                     (revision, path))
                            continue
                        newsig = signature(path)
                        if newsig.hashval in self.hash_to_mark:
                            #if debug_enable(DEBUG_EXTRACT):
                            #    announce(b"r%s: %s has old hash" \
                            #             % (revision, path))
                            # The file's hash corresponds to an existing
                            # blob; generate modify, copy, or rename as
                            # appropriate.
                            if path not in self.filemap[revision] \
                                   or self.filemap[revision][path]!=newsig:
                                if debug_enable(DEBUG_EXTRACT):
                                    announce(b"r%s: update for %s" % (revision, path))
                                # Iterating through dict items (with
                                # iteritems() or itemsview() for
                                # instance) while mutating the
                                # underlying dict is not supported by
                                # Python. The following loop thus uses
                                # items(), which returns a new
                                # independent list containing the
                                # (key,value) pairs.
                                for (oldpath, oldsig) in self.filemap[revision].items():
                                    if oldsig == newsig:
                                        if oldpath in removed:
                                            op = FileOp(repo)
                                            op.construct(b'R', oldpath, path)
                                            commit.append_operation(op)
                                            del self.filemap[revision][oldpath]
                                        elif oldpath != path:
                                            op = FileOp(self.repo)
                                            op.construct(b'C', oldpath, path)
                                            commit.append_operation(op)
                                        break
                                else:
                                    op = FileOp(repo)
                                    op.construct(b'M',
                                                 newsig.perms,
                                                 self.hash_to_mark[newsig.hashval],
                                                 path)
                                    commit.append_operation(op)
                        else:
                            # Content hash doesn't match any existing blobs
                            if debug_enable(DEBUG_EXTRACT):
                                announce(b"r%s: %s has new hash" \
                                         % (revision, path))
                            blobmark = self.__newmark()
                            self.hash_to_mark[newsig.hashval] = blobmark
                            # Actual content enters the representation
                            blob = Blob(repo)
                            blob.set_mark(blobmark)
                            shutil.copyfile(path, blob.blobfile(create=True))
                            blob.pathlist.append(path)
                            repo.addEvent(blob)
                            # Its new fileop is added to the commit
                            op = FileOp(repo)
                            op.construct(b'M', newsig.perms, blobmark, path)
                            commit.append_operation(op)
                        self.filemap[revision][path] = newsig
                    for tbd in removed:
                        op = FileOp(repo)
                        op.construct(b'D', tbd)
                        commit.append_operation(op)
                        del self.filemap[revision][tbd]
                self.extractor.cleanup(revision, True)
                if not parents and commit.branch != "refs/heads/master":
                    reset = Reset(repo)
                    reset.ref = commit.branch
                    repo.addEvent(reset)
                commit.sort_operations()
                commit.legacy_id = revision
                commit.properties.update(self.extractor.get_properties(revision))
                commit.set_mark(self.__newmark())
                self.commit_map[revision] = commit
                if debug_enable(DEBUG_EXTRACT):
                    announce(b"r%s: gets mark %s (%d ops)" % (revision, commit.mark, len(commit.operations())))
                repo.addEvent(commit)
            # Now append reset objects
            for (resetname, revision) in sorted(self.extractor.iter_resets(),
                                                key=operator.itemgetter(1)):
                # FIXME: what if revision is unknown ? keep previous behavior for now
                reset = Reset(repo, target=self.commit_map[revision])
                reset.ref = resetname
                repo.addEvent(reset)
            # Last, append tag objects.
            for tag in sorted(self.extractor.get_taglist(),
                              key=operator.attrgetter(b"tagger.date")):
                # Hashes produced by the GitExtractor are turned into proper
                # committish marks here.
                c = self.commit_map.get(tag.committish)
                if c is None:
                    # FIXME: we should probably error here, keep previous
                    # behavior for now
                    tag.remember(repo, committish=None)
                else:
                    tag.remember(repo, target=c)
                repo.addEvent(tag)
            self.extractor.post_extract(repo)
        repo.vcs = self.extractor.vcstype
        return repo

# Stream parsing
#
# The Subversion dumpfile format is documented at
#
# https://svn.apache.org/repos/asf/subversion/trunk/notes/dump-load-format.txt

# Use numeric codes rather than (un-interned) strings
# to reduce working-set size.
SD_NONE = 0
SD_FILE = 1
SD_DIR = 2
SD_ADD = 0
SD_DELETE = 1
SD_CHANGE = 2
SD_REPLACE = 3

class StreamParser:
    "Parse a fast-import stream or Subversion dump to populate a Repository."
    class NodeAction(object):
        __slots__ = ("revision", "path", "kind", "action",
                     "from_rev", "from_path", "content_hash",
                     "from_hash", "blob", "props",
                     "from_set", "blobmark", "generated")
        # If these don't match the constants above, havoc will ensue
        ActionValues = (b"add", b"delete", b"change", b"replace")
        PathTypeValues = (b"none", b"file", b"dir", b"ILLEGAL-TYPE")
        def __init__(self):
            # These are set during parsing
            self.revision = None
            self.path = None
            self.kind = SD_NONE
            self.action = None
            self.from_rev = None
            self.from_path = None
            self.content_hash = None
            self.from_hash = None
            self.blob = None
            self.props = None
            # These are set during the analysis phase
            self.from_set = None
            self.blobmark = None
            self.generated = False
        def __str__(self):
            # Prefer dict's repr() to OrderedDict's verbose one
            fmt = dict.__repr__ if isinstance(self.props, dict) else repr
            return b"<NodeAction: r{rev} {action} {kind} '{path}'" \
                    b"{from_rev}{from_set}{generated}{props}>".format(
                    rev = self.revision,
                    action = b"ILLEGAL-ACTION" if self.action is None else StreamParser.NodeAction.ActionValues[self.action],
                    kind = StreamParser.NodeAction.PathTypeValues[self.kind or -1],
                    path = self.path,
                    from_rev = b" from=%s~%s" % (self.from_rev, self.from_path)
                                    if self.from_rev else b"",
                    from_set = b" sources=%s" % self.from_set
                                    if self.from_set else b"",
                    generated = b" generated" if self.generated else b"",
                    props = b" properties=%s" % fmt(self.props)
                                    if self.props else b"")
    class RevisionRecord(object):
        __slots__ = ("nodes", "props")
        def __init__(self, nodes, props):
            self.nodes = nodes
            self.props = props
    # Native Subversion properties that we don't suppress: svn:externals
    # The reason for these suppressions is to avoid a huge volume of
    # junk file properties - cvs2svn in particular generates them like
    # mad.  We want to let through other properties that might carry
    # useful information.
    IgnoreProperties = {
        b"svn:executable",  # We special-case this one elsewhere
        b"svn:ignore",      # We special-case this one elsewhere
        b"svn:special",     # We special-case this one elsewhere
        b"svn:mime-type",
        b"svn:keywords",
        b"svn:needs-lock",
        b"svn:eol-style",   # Don't want to suppress, but cvs2svn floods these.
        }
    cvs2svn_tag_re = re.compile(b"This commit was manufactured by cvs2svn to create tag.*'([^']*)'")
    cvs2svn_branch_re = re.compile(b"This commit was manufactured by cvs2svn to create branch.*'([^']*)'")
    SplitSep = '.'
    def __init__(self, repo):
        self.repo = repo
        self.fp = None
        self.import_line = 0
        self.markseq = 0
        self.ccount = 0
        self.linebuffers = []
        self.warnings = []
        # Everything below here is Subversion-specific
        self.branches = {}
        self.branchlink = {}
        self.branchdeletes = set()
        self.branchcopies = set()
        self.generated_deletes = []
        self.revisions = collections.OrderedDict()
        self.copycounts = collections.OrderedDict()
        self.hashmap = {}
        self.permissions = {}
        self.fileop_branchlinks  = set()
        self.directory_branchlinks  = set()
        self.active_gitignores = {}
    def error(self, msg):
        "Throw fatal error during parsing."
        raise Fatal(msg + b" at line " + repr(self.import_line))
    def warn(self, msg):
        "Display a parse warning associated with a line."
        if self.import_line:
            complain(msg + b" at line " + repr(self.import_line))
        else:
            complain(msg)
    def gripe(self, msg):
        "Display or queue up an error message."
        if verbose < 2:
            self.warnings.append(msg)
        else:
            complain(msg)
    def __newmark(self):
        self.markseq += 1
        mark = b":" + str(self.markseq)
        return mark
    def readline(self):
        if self.linebuffers:
            line = self.linebuffers.pop()
        else:
            line = self.fp.readline()
        self.ccount += len(line)
        self.import_line += 1
        return line
    def tell(self):
        "Return the current read offset in the source stream."
        try:
            return self.fp.tell()
        except IOError:
            return None
    def pushback(self, line):
        self.ccount -= len(line)
        self.import_line -= 1
        self.linebuffers.append(line)
    # Helpers for import-stream files
    def fi_readline(self):
        "Read a line, stashing comments as we go."
        while True:
            line = self.readline()
            if line and line.startswith(b"#"):
                self.repo.addEvent(Passthrough(line))
                if line.startswith(b"#reposurgeon"):
                    # Extension command generated by some exporter's
                    # --reposurgeon mode.
                    fields = line.split()
                    if fields[1] == "sourcetype" and len(fields) == 3:
                        self.repo.hint(fields[2], strong=True)
                continue
            else:
                return line
    def fi_read_data(self, line=None):
        "Read a fast-import data section."
        if not line:
            line = self.fi_readline()
        if line.startswith(b"data <<"):
            delim = line[7:]
            data = b""
            start = self.tell()
            while True:
                dataline = self.readline()
                if dataline == delim:
                    break
                elif not dataline:
                    raise Fatal(b"EOF while reading blob")
                else:
                    data += dataline
        elif line.startswith(b"data"):
            try:
                count = int(line[5:])
                start = self.tell()
                data = self.fp.read(count)
            except ValueError:
                self.error(b"bad count in data")
        elif line.startswith(b"property"):
            line = line[9:]                     # Skip this token
            line = line[line.index(b" "):]      # Skip the property name
            nextws = line.index(b" ")
            count = int(line[:nextws-1])
            start = self.tell()
            data = line[nextws:] + self.fp.read(count)
        else:
            self.error(b"malformed data header %s" % repr(line))
        line = self.readline()
        if line != b'\n':
            self.pushback(line) # Data commands optionally end with LF
        return (data, start)
    def fi_parse_fileop(self, fileop):
        # Read a fast-import fileop
        if fileop.ref[0] == b':':
            pass
        elif fileop.ref == 'inline':
            fileop.inline = self.fi_read_data()[0]
        else:
            self.error(b"unknown content type in filemodify")
    # Helpers for Subversion dumpfiles
    @staticmethod
    def sd_body(line):
        # Parse the body from a Subversion header line
        return line.split(b":", 1)[1].strip()
    def sd_require_header(self, hdr):
        # Consume a required header line
        line = self.readline()
        self.ccount += len(line)
        if not line.startswith(hdr):
            self.error(b'required %s header missing' % hdr)
        return StreamParser.sd_body(line)
    def sd_require_spacer(self):
        line = self.readline()
        if line.strip():
            self.error(b'found %s expecting blank line' % repr(line))
    def sd_read_blob(self, length):
        # Read a Subversion file-content blob.
        content = self.fp.read(length)
        if self.fp.read(1) != b'\n':
            self.error(b"EOL not seen where expected, Content-Length incorrect")
        self.import_line += content.count(b'\n') + 1
        self.ccount += len(content) + 1
        return content
    def sd_read_props(self, target, checklength):
        # Parse a Subversion properties section, return as an OrderedDict.
        props = collections.OrderedDict()
        self.ccount = 0
        while self.ccount < checklength:
            line = self.readline()
            if debug_enable(DEBUG_SVNPARSE):
                announce(b"readprops, line %d: %s" % \
                         (self.import_line, repr(line)))
            if line.startswith(b"PROPS-END"):
                # This test should be !=, but I get random off-by-ones from
                # real dumpfiles - I don't know why.
                if self.ccount < checklength:
                    self.error(b"expected %d property chars, got %d"\
                               % (checklength, self.ccount))
                break
            elif not line.strip():
                continue
            elif line[0] == b"K":
                key = self.sd_read_blob(int(line.split()[1]))
                line = self.readline()
                if line[0] != b'V':
                    raise self.error(b"property value garbled")
                value = self.sd_read_blob(int(line.split()[1]))
                props[key] = value
                if debug_enable(DEBUG_SVNPARSE):
                    announce(b"readprops: on %s, setting %s = %s"\
                             % (target, key, repr(value)))
        return props
    #
    # The main event
    #
    def fast_import(self, fp, options, progress=False):
        "Initialize the repo from a fast-import stream or Subversion dump."
        self.repo.makedir()
        self.repo.timings = [(b"start", time.time())]
        try:
            self.fp = fp
            # Optimization: if we're reading from a plain file,
            # no need to clone all the blobs.
            if os.path.isfile(self.fp.name):
                # We can't just pass the input file object here, it
                # leads to bad results when fast_import is called
                # within a with clause.  Alas, this is a potential
                # resource leak.
                self.repo.seekstream = open(self.fp.name)
            with Baton(b"reposurgeon: from %s" % os.path.relpath(fp.name), enable=progress) as baton:
                self.import_line = self.repo.legacy_count = 0
                self.linebuffers = []
                # First, determine the input type
                line = self.readline()
                if line.startswith(b"SVN-fs-dump-format-version: "):
                    if StreamParser.sd_body(line) not in (b"1", b"2"):
                        raise Fatal(b"unsupported dump format version %s" \
                                    % StreamParser.sd_body(line))
                    # Beginning of Subversion dump parsing
                    while True:
                        line = self.readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith(b"UUID:"):
                            self.repo.uuid = StreamParser.sd_body(line)
                        elif line.startswith(b"Revision-number: "):
                            # Begin Revision processing
                            baton.twirl()
                            if debug_enable(DEBUG_SVNPARSE):
                                announce(b"revision parsing, line %d: begins" % \
                                     (self.import_line))
                            revision = StreamParser.sd_body(line)
                            plen = int(self.sd_require_header(b"Prop-content-length"))
                            self.sd_require_header(b"Content-length")
                            self.sd_require_spacer()
                            props = self.sd_read_props(b"commit", plen)
                            # Parsing of the revision header is done
                            node = None # pacify pylint
                            nodes = []
                            in_header = False
                            plen = tlen = -1
                            # Node list parsing begins
                            while True:
                                line = self.readline()
                                if debug_enable(DEBUG_SVNPARSE):
                                    announce(b"node list parsing, line %d: %s" % \
                                             (self.import_line, repr(line)))
                                if not line:
                                    break
                                elif not line.strip():
                                    if not in_header:
                                        continue
                                    else:
                                        if plen > -1:
                                            node.props = self.sd_read_props(node.path, plen)
                                        if tlen > -1:
                                            start = self.tell()
                                            # This is a crock. It is
                                            # justified only by the fact that
                                            # we get None back from self.tell()
                                            # only when the parser input is
                                            # coming from an inferior process
                                            # rather than a file. In this case
                                            # the start offset can be any random
                                            # garbage, because we'll never try
                                            # to use it for seeking blob
                                            # content.
                                            if start is None: start = 0
                                            text = self.sd_read_blob(tlen)
                                            node.blob = Blob(self.repo)
                                            # Ugh - cope with strange
                                            # undocumented Subversion format
                                            # for storing links.  Apparently the
                                            # dumper puts 'link ' in front of
                                            # the path and the loader (or at
                                            # least git-svn) removes it.
                                            if node.props and "svn:special" in node.props:
                                                if text.startswith(b"link "):
                                                    node.blob.set_content(
                                                        text[5:], start+5)
                                                else:
                                                    # Don't know if this will
                                                    # ever happen.  Best to fail
                                                    # loudly...
                                                    self.error(b"unexpected link prefix in %s" % repr(text))
                                            else:
                                                node.blob.set_content(text, start)
                                        node.revision = revision
                                        nodes.append(node)
                                        in_header = False
                                elif line.startswith(b"Revision-number: "):
                                    self.pushback(line)
                                    break
                                # Node processing begins
                                elif line.startswith(b"Node-path: "):
                                    node = StreamParser.NodeAction()
                                    node.path = StreamParser.sd_body(line)
                                    plen = tlen = -1
                                    in_header = True
                                elif line.startswith(b"Node-kind: "):
                                    node.kind = StreamParser.sd_body(line)
                                    node.kind = StreamParser.NodeAction.PathTypeValues.index(node.kind)
                                    if node.kind is None:
                                        self.error(b"unknown kind %s"%node.kind)
                                elif line.startswith(b"Node-action: "):
                                    node.action = StreamParser.sd_body(line)
                                    node.action = StreamParser.NodeAction.ActionValues.index(node.action)

                                    if node.action is None:
                                        self.error(b"unknown action %s" \
                                                   % node.action)
                                elif line.startswith(b"Node-copyfrom-rev: "):
                                    node.from_rev = StreamParser.sd_body(line)
                                elif line.startswith(b"Node-copyfrom-path: "):
                                    node.from_path = StreamParser.sd_body(line)
                                elif line.startswith(b"Text-copy-source-md5: "):
                                    node.from_hash = StreamParser.sd_body(line)
                                elif line.startswith(b"Text-content-md5: "):
                                    node.content_hash = StreamParser.sd_body(line)
                                elif line.startswith(b"Text-content-sha1: "):
                                    continue
                                elif line.startswith(b"Text-content-length: "):
                                    tlen = int(StreamParser.sd_body(line))
                                elif line.startswith(b"Prop-content-length: "):
                                    plen = int(StreamParser.sd_body(line))
                                elif line.startswith(b"Content-length: "):
                                    continue
                                else:
                                    if debug_enable(DEBUG_SVNPARSE):
                                        announce(b"node list parsing, line %d: uninterpreted line %s" % \
                                             (self.import_line, repr(line)))
                                    continue
                                # Node processing ends
                            # Node list parsing ends
                            self.revisions[revision] = StreamParser.RevisionRecord(nodes, props)
                            self.repo.legacy_count += 1
                            if debug_enable(DEBUG_SVNPARSE):
                                announce(b"revision parsing, line %d: ends" % \
                                         (self.import_line))
                            # End Revision processing
                    # End of Subversion dump parsing
                    self.repo.timings.append((b"parsing", time.time()))
                    self.svn_process(options, baton)
                    elapsed = time.time() - baton.time
                    baton.twirl(b"%d svn revisions (%d/s)" %
                                 (self.repo.legacy_count,
                                  int(self.repo.legacy_count/elapsed)))
                else:
                    self.pushback(line)
                    # Beginning of fast-import stream parsing
                    commitcount = 0
                    while True:
                        line = self.fi_readline()
                        if not line:
                            break
                        elif not line.strip():
                            continue
                        elif line.startswith(b"blob"):
                            blob = Blob(self.repo)
                            line = self.fi_readline()
                            if line.startswith(b"mark"):
                                blob.set_mark(line[5:].strip())
                                (blobcontent, blobstart) = self.fi_read_data()
                                # Parse CVS and Subversion $-headers
                                # There'd better not be more than one of these.
                                for m in re.finditer(r"\$Id *:[^$]+\$",
                                                     blobcontent):
                                    fields = m.group(0).split()
                                    if len(fields) < 2:
                                        self.gripe(b"malformed $-cookie '%s'" % m.group(0))
                                    else:
                                        # Save file basename and CVS version
                                        if fields[1].endswith(b",v"):
                                            # CVS revision
                                            blob.cookie = (fields[1][:-2], fields[2])
                                            self.repo.hint(("$Id$", "cvs"))
                                        else:
                                            # Subversion revision
                                            blob.cookie = fields[1]
                                            if self.repo.hint(("$Id$", "svn")):
                                                announce("$Id$ header hints at svn.")
                                for m in re.finditer(r"\$Revision *: *([^$]*)\$",
                                                     blobcontent):
                                    rev = m.group(0).strip()
                                    if b'.' not in rev:
                                        # Subversion revision
                                        blob.cookie = rev
                                        if self.repo.hint(("$Revision$", "svn")):
                                            announce("$Revision$ header hints at svn.")
                                blob.set_content(blobcontent, blobstart)
                            else:
                                self.error(b"missing mark after blob")
                            self.repo.addEvent(blob)
                            baton.twirl()
                        elif line.startswith(b"data"):
                            self.error(b"unexpected data object")
                        elif line.startswith(b"commit"):
                            baton.twirl()
                            commitbegin = self.import_line
                            commit = Commit(self.repo)
                            commit.set_branch(line.split()[1])
                            while True:
                                line = self.fi_readline()
                                if not line:
                                    break
                                elif line.startswith(b"#legacy-id"):
                                    # reposurgeon extension, expected to
                                    # be immediately after "commit" if present
                                    commit.legacy_id = line.split()[1]
                                    if self.repo.vcs:
                                        self.repo.legacy_map[self.repo.vcs.name.upper() + ":" + commit.legacy_id] = commit
                                    else:
                                        self.repo.legacy_map[commit.legacy_id] = commit
                                elif line.startswith(b"mark"):
                                    commit.set_mark(line[5:].strip())
                                elif line.startswith(b"author"):
                                    try:
                                        commit.authors.append(Attribution(line[7:]))
                                    except ValueError:
                                        self.error(b"malformed author line")
                                elif line.startswith(b"committer"):
                                    try:
                                        commit.committer = Attribution(line[10:])
                                    except ValueError:
                                        self.error(b"malformed committer line")
                                elif line.startswith(b"property"):
                                    fields = line.split(b" ")
                                    if len(fields) < 3:
                                        self.error(b"malformed property line")
                                    elif len(fields) == 3:
                                        commit.properties[fields[1]] = True
                                    else:
                                        name = fields[1]
                                        length = int(fields[2])
                                        value = " ".join(fields[3:])
                                        if len(value) < length:
                                            value += fp.read(length-len(value))
                                            if fp.read(1) != b'\n':
                                                self.error(b"trailing junk on property value")
                                        elif len(value) == length + 1:
                                            value = value[:-1] # Trim b'\n'
                                        else:
                                            value += self.fp.read(length - len(value))
                                            assert self.fp.read(1) == b'\n'
                                        commit.properties[name] = value
                                        # Generated by cvs-fast-export
                                        if name == "cvs-revisions":
                                            self.repo.hint("cvs",strong=True)
                                            announce("cvs_revisions property hints at CVS.")
                                            for line in value.split(b'\n'):
                                                if line:
                                                    self.repo.legacy_map["CVS:"+line] = commit
                                elif line.startswith(b"data"):
                                    commit.comment = self.fi_read_data(line)[0]
                                    if global_options[b"canonicalize"]:
                                        commit.comment = commit.comment.strip().replace(b"\r\n", b"\n") + b'\n'
                                elif line.startswith(b"from") or line.startswith(b"merge"):
                                    mark = line.split()[1]
                                    if Commit.is_callout(mark):
                                        commit.add_callout(mark)
                                    else:
                                        commit.add_parent(mark)
                                # Handling of file ops begins.
                                elif line[0] in (b"C", b"D", b"R"):
                                    commit.append_operation(FileOp(self.repo).parse(line))
                                elif line == b"deleteall\n":
                                    commit.append_operation(FileOp(self.repo).parse(b"deleteall"))
                                elif line[0] == b"M":
                                    fileop = FileOp(self.repo).parse(line)
                                    if fileop.ref != 'inline':
                                        try:
                                            self.repo.objfind(fileop.ref).pathlist.append(fileop.path)
                                        except AttributeError:
                                            # Crap out on anything but a
                                            # submodule link.
                                            if fileop.mode != b"160000":
                                                self.error(b"ref %s could not be resolved" % fileop.ref)
                                    commit.append_operation(fileop)
                                    if fileop.mode == b"160000":
                                        # This is a submodule link.  The ref
                                        # field is a SHA1 hash and the path
                                        # is an external reference name.
                                        # Don't try to collect data, just pass
                                        # it through.
                                        self.warn(b"submodule link")
                                    else:
                                        # 100644, 100755, 120000.
                                        self.fi_parse_fileop(fileop)
                                elif line[0] == b"N":
                                    fileop = FileOp(self.repo).parse(line)
                                    commit.append_operation(fileop)
                                    self.fi_parse_fileop(fileop)
                                    self.repo.inlines += 1
                                # Handling of file ops ends.
                                elif line.isspace():
                                    # This handles slightly broken
                                    # exporters like the bzr-fast-export
                                    # one that may tack an extra LF onto
                                    # the end of data objects.  With it,
                                    # we don't drop out of the
                                    # commit-processing loop until we see
                                    # a *nonblank* line that doesn't match
                                    # a commit subpart.
                                    continue
                                else:
                                    # Dodgy bzr autodetection hook...
                                    if not self.repo.vcs:
                                        if b"branch-nick" in commit.properties:
                                            self.repo.hint("bzr", strong=True)
                                    self.pushback(line)
                                    break
                            if not (commit.mark and commit.committer):
                                self.import_line = commitbegin
                                self.error(b"missing required fields in commit")
                            if commit.mark is None:
                                self.warn(b"unmarked commit")
                            self.repo.addEvent(commit)
                            commitcount += 1
                            baton.twirl()
                        elif line.startswith(b"reset"):
                            reset = Reset(self.repo)
                            reset.ref = line[6:].strip()
                            line = self.fi_readline()
                            if line.startswith(b"from"):
                                reset.remember(self.repo, committish=line[5:].strip())
                            else:
                                self.pushback(line)
                            self.repo.addEvent(reset)
                            baton.twirl()
                        elif line.startswith(b"tag"):
                            tagger = None
                            tagname = line[4:].strip()
                            line = self.fi_readline()
                            if line.startswith(b"from"):
                                referent = line[5:].strip()
                            else:
                                self.error(b"missing from after tag")
                            line = self.fi_readline()
                            if line.startswith(b"tagger"):
                                try:
                                    tagger = Attribution(line[7:])
                                except ValueError:
                                    self.error(b"malformed tagger line")
                            else:
                                self.warn(b"missing tagger after from in tag")
                                self.pushback(line)
                            self.repo.addEvent(Tag(repo = self.repo,
                                                   name = tagname,
                                                   committish = referent,
                                                   tagger = tagger,
                                                   comment = self.fi_read_data()[0]))
                            baton.twirl()
                        else:
                            # Simply pass through any line we don't understand.
                            self.repo.addEvent(Passthrough(line))
                    # End of fast-import parsing
                    self.repo.timings.append((b"parsing", time.time()))
                    if self.repo.stronghint:
                        baton.twirl("%d %s commits" % (commitcount, self.repo.vcs.name))
                    else:
                        baton.twirl("%d commits" % commitcount)
                self.import_line = 0
                if not self.repo.events:
                    raise Recoverable(b"ignoring empty repository")
            if self.warnings:
                for warning in self.warnings:
                    complain(warning)
        except KeyboardInterrupt:
            nuke(self.repo.subdir(), b"reposurgeon: import interrupted, removing %s" % self.repo.subdir())
            raise KeyboardInterrupt
    #
    # The rendezvous between parsing and object building for import
    # streams is pretty trivial and best done inline in the parser
    # because reposurgeon's internal structures are designed to match
    # those entities. For Subversion dumpfiles, on the other hand,
    # there's a fair bit of impedance-matching required.  That happens
    # in the following functions.
    #
    @staticmethod
    def node_permissions(node):
        "Fileop permissions from node properties"
        if node.props:
            if b"svn:executable" in node.props:
                return 0o100755
            elif b"svn:special" in node.props:
                # Map to git symlink, which behaves the same way.
                # Blob contents is the path the link should resolve to.
                return 0o120000
        return 0o100644
    def branchpath(self, path):
        "Strip the branch prefix from a path."
        if not self.branches or path.count(os.sep) == 0:
            return path
        for branch in self.branches:
            if path.startswith(branch):
                return path[len(branch):]
        raise Fatal(b"couldn't assign %s to a branch in %s" \
                    % (path, self.branches.keys()))
    def svn_process(self, options, baton):
        "Subversion actions to import-stream commits."
        self.repo.addEvent(Passthrough("#reposurgeon sourcetype svn\n"))
        # Find all copy sources and compute the set of branches
        if debug_enable(DEBUG_EXTRACT):
            announce(b"Pass 1")
        nobranch = b'--nobranch' in options
        copynodes = []
        for (revision, record) in self.revisions.iteritems():
            for node in record.nodes:
                if node.from_path is not None:
                    copynodes.append(node)
                    if debug_enable(DEBUG_EXTRACT):
                        announce(b"copynode at %s" % node)
                if node.action == SD_ADD and node.kind == SD_DIR and not node.path+os.sep in self.branches and not nobranch:
                    for trial in global_options[b'svn_branchify']:
                        if b'*' not in trial and trial == node.path:
                            self.branches[node.path+os.sep] = None
                        elif trial.endswith(os.sep + b'*') \
                                 and os.path.dirname(trial) == os.path.dirname(node.path) \
                                 and not node.path + os.sep + b'*' in global_options[b'svn_branchify']:
                            self.branches[node.path+os.sep] = None
                        elif trial == b'*' and not node.path + os.sep + b'*' in global_options[b'svn_branchify'] and node.path.count(os.sep) < 1:
                            self.branches[node.path+os.sep] = None
                    if node.path+os.sep in self.branches and debug_enable(DEBUG_TOPOLOGY):
                        announce(b"%s recognized as a branch" % node.path+os.sep)
            # Per-commit spinner disabled because this pass is fast
            #baton.twirl()
        copynodes.sort(key=operator.attrgetter(b"from_rev"))
        self.repo.timings.append([b"copynodes", time.time()])
        baton.twirl()
        # Build filemaps.
        if debug_enable(DEBUG_EXTRACT):
            announce(b"Pass 2")
        filemaps = {}
        filemap = PathMap()
        for (revision, record) in self.revisions.iteritems():
            for node in record.nodes:
                # Mutate the filemap according to copies
                if node.from_rev:
                    assert int(node.from_rev) < int(revision)
                    filemap.copy_from(node.path, filemaps[node.from_rev],
                                      node.from_path)
                    if debug_enable(DEBUG_FILEMAP):
                        announce(b"r%s~%s copied to %s" \
                                 % (node.from_rev, node.from_path, node.path))
                # Mutate the filemap according to adds/deletes/changes
                if node.action == SD_ADD and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce(b"r%s~%s added" % (node.revision, node.path))
                elif node.action == SD_DELETE:
                    if node.kind == SD_NONE:
                        node.kind = SD_FILE if node.path in filemap else SD_DIR
                    # Snapshot the deleted paths before removing them.
                    node.from_set = PathMap()
                    node.from_set.copy_from(node.path, filemap, node.path)
                    del filemap[node.path]
                    if debug_enable(DEBUG_FILEMAP):
                        announce(b"r%s~%s deleted" \
                                 % (node.revision, node.path))
                elif node.action in (SD_CHANGE, SD_REPLACE) and node.kind == SD_FILE:
                    filemap[node.path] = node
                    if debug_enable(DEBUG_FILEMAP):
                        announce(b"r%s~%s changed" % (node.revision, node.path))
            filemaps[revision] = filemap.snapshot()
            baton.twirl()
        del filemap
        self.repo.timings.append([b"filemaps", time.time()])
        baton.twirl()
        # Blows up huge on large repos...
        #if debug_enable(DEBUG_FILEMAP):
        #    announce(b"filemaps %s" % filemaps)
        # Build from sets in each directory copy record.
        if debug_enable(DEBUG_EXTRACT):
            announce(b"Pass 3")
        for copynode in copynodes:
            if debug_enable(DEBUG_FILEMAP):
                announce(b"r%s copynode filemap is %s" \
                         % (copynode.from_rev, filemaps[copynode.from_rev]))
            copynode.from_set = PathMap()
            copynode.from_set.copy_from(copynode.from_path,
                                        filemaps[copynode.from_rev],
                                        copynode.from_path)
            # Sanity check: if the directory node has no from set, but
            # there are files underneath it, this means the directory
            # structure implied by the filemaps is not consistent with
            # what's in the parsed Subversion nodes.  This should never
            # happen.
            if not copynode.from_set and \
                    any(filemaps[copynode.revision].ls_R(node.path)):
                self.gripe(b"inconsistently empty from set for %s" % copynode)
            baton.twirl()
        self.repo.timings.append([b"copysets", time.time()])
        baton.twirl()
        # Build commits
        # This code can eat your processor, so we make it give up
        # its timeslice at reasonable intervals. Needed because
        # it doesn't hit the disk.
        if debug_enable(DEBUG_EXTRACT):
            announce(b"Pass 4")
        split_commits = {}
        def last_relevant_commit(max_rev, path,
                                 getbranch = operator.attrgetter(b"branch")):
            # Make path look like a branch
            if path[0] == b"/": path = path[1:]
            if path[-1] != os.sep: path = path + os.sep
            # If the revision is split, try from the last split commit
            try:
                max_rev = split_commits[max_rev]
            except KeyError:
                pass
            # Find the commit object...
            try:
                obj = self.repo.legacy_map["SVN:%s" % max_rev]
            except KeyError:
                return None
            # ...then iterate backwards from there...
            past_events = (self.repo.events[i] for i in
                    range(self.repo.index(obj), -1, -1))
            # ... finding branches of commits...
            commit_branch = ((e, getbranch(e))
                             for e in past_events
                             if isinstance(e, Commit))
            # ...to find one on the right branch.
            return next((e for (e, branch) in commit_branch
                         if branch and path.startswith(branch)),
                        None)
        previous = None
        for (revision, record) in self.revisions.iteritems():
            if debug_enable(DEBUG_EXTRACT):
                announce(b"Revision %s:" % revision)
            for node in record.nodes:
                # if node.props is None, no property section.
                # if node.blob is None, no text section.
                try:
                    assert node.action in (SD_CHANGE, SD_ADD, SD_DELETE, SD_REPLACE)
                    assert node.blob is not None or \
                           node.props is not None or \
                           node.from_rev or \
                           node.action in (SD_ADD, SD_DELETE)
                    assert (node.from_rev is None) == (node.from_path is None)
                    assert node.kind in (SD_FILE, SD_DIR)
                    assert node.kind != SD_NONE or node.action == SD_DELETE
                    assert node.action in (SD_ADD, SD_REPLACE) or not node.from_rev
                except AssertionError:
                    raise Fatal(b"forbidden operation in dump stream at r%s: %s" \
                                % (revision, node))
            commit = Commit(self.repo)
            try:
                ad = record.props.pop(b"svn:date")
            except KeyError as key:
                self.error(b"missing required %s" % key)
            if "svn:author" in record.props:
                au = record.props.pop(b"svn:author")
            else:
                au = b"no-author"
            if b"svn:log" in record.props:
                commit.comment = record.props.pop(b"svn:log")
                if not commit.comment.endswith(b"\n"):
                    commit.comment += b"\n"
            if '--use-uuid' in options:
                attribution = b"%s <%s@%s> %s" % (au, au, self.repo.uuid, ad)
            else:
                attribution = b"%s <%s> %s" % (au, au, ad)
            commit.committer = Attribution(attribution)
            commit.properties.update(record.props)
            # Zero revision is never interesting - no operations, no
            # comment, no author, it's just a start marker for a
            # non-incremental dump.
            if revision == "0":
                continue
            expanded_nodes = []
            has_properties = set()
            for (n, node) in enumerate(record.nodes):
                if debug_enable(DEBUG_EXTRACT):
                    announce(b"r%s:%d: %s" % (revision, n+1, node))
                elif node.kind == SD_DIR \
                         and node.action != SD_CHANGE \
                         and debug_enable(DEBUG_TOPOLOGY):
                    announce(str(node))
                # Handle per-path properties.
                if node.props is not None:
                    if b"cvs2svn:cvs-rev" in node.props:
                        cvskey = b"CVS:%s:%s" % (node.path,
                                                node.props["cvs2svn:cvs-rev"])
                        self.repo.legacy_map[cvskey] = commit
                        del node.props[b"cvs2svn:cvs-rev"]
                    # Remove blank lines from svn:ignore property values.
                    if b"svn:ignore" in node.props:
                        old_ignore = node.props[b"svn:ignore"]
                        ignore_lines = [line for line in old_ignore.splitlines(True) if line != b"\n"]
                        new_ignore = b"".join(ignore_lines)
                        if new_ignore == b"":
                            del node.props[b"svn:ignore"]
                        else:
                            node.props[b"svn:ignore"] = new_ignore
                    if not "--ignore-properties" in options:
                        prop_items = ((prop, val) \
                                        for (prop,val) in node.props.iteritems() \
                                        if ((prop not in StreamParser.IgnoreProperties) and not (prop == "svn:mergeinfo" and node.kind == SD_DIR)))
                        try:
                            first = next(prop_items)
                        except StopIteration:
                            if node.path in has_properties:
                                self.gripe(b"r%s~%s: properties cleared." \
                                             % (node.revision, node.path))
                                has_properties.discard(node.path)
                        else:
                            self.gripe(b"r%s~%s properties set:" \
                                                   % (node.revision, node.path))
                            for prop, val in itertools.chain((first,), prop_items):
                                self.gripe(b"\t%s = '%s'" % (prop, val))
                            has_properties.add(node.path)
                if node.kind == SD_FILE:
                    expanded_nodes.append(node)
                elif node.kind == SD_DIR:
                    # os.sep is appended to avoid collisions with path
                    # prefixes.
                    node.path += os.sep
                    if node.from_path:
                        node.from_path += os.sep
                    if node.action in (SD_ADD, SD_CHANGE):
                        if node.path in self.branches:
                            if not node.props: node.props = {}
                            startwith = next(vcs.dfltignores for vcs in vcstypes if vcs.name == "svn")
                            try:
                                ignore = startwith + \
                                         b"# The contents of the svn:ignore " \
                                         b"property on the branch root.\n" + \
                                         node.props["svn:ignore"]
                            except KeyError:
                                ignore = startwith
                            node.props[b"svn:ignore"] = ignore
                    elif node.action in (SD_DELETE, SD_REPLACE):
                        if node.path in self.branches:
                            self.branchdeletes.add(node.path)
                            expanded_nodes.append(node)
                            # The deleteall will also delete .gitignore files
                            for ignorepath in list(gi
                                        for gi in self.active_gitignores
                                        if gi.startswith(node.path)):
                                del self.active_gitignores[ignorepath]
                        else:
                            # A delete or replace with no from set
                            # can occur if the directory is empty.
                            # We can just ignore this case.
                            if node.from_set is not None:
                                for child in node.from_set:
                                    if debug_enable(DEBUG_EXTRACT):
                                        announce(b"r%s: deleting %s" \
                                                 % (revision, child))
                                    newnode = StreamParser.NodeAction()
                                    newnode.path = child
                                    newnode.revision = revision
                                    newnode.action = SD_DELETE
                                    newnode.kind = SD_FILE
                                    newnode.generated = True
                                    expanded_nodes.append(newnode)
                            # Emit delete actions for the .gitignore files we
                            # have generated. Note that even with a directory
                            # with no files from SVN, we might have added
                            # .gitignore files we now must delete.
                            for ignorepath in list(gi
                                        for gi in self.active_gitignores
                                        if gi.startswith(node.path)):
                                newnode = StreamParser.NodeAction()
                                newnode.path = ignorepath
                                newnode.revision = revision
                                newnode.action = SD_DELETE
                                newnode.kind = SD_FILE
                                newnode.generated = True
                                expanded_nodes.append(newnode)
                                del self.active_gitignores[ignorepath]
                    # Handle directory copies.  If this is a copy
                    # between branches, no fileop should be issued
                    # until there is an actual file modification on
                    # the new branch. Instead, remember that the
                    # branch root inherits the tree of the source
                    # branch and should not start with a deleteall.
                    # Exception: If the target branch has been
                    # deleted, perform a normal copy and interpret
                    # this as an ad-hoc branch merge.
                    if node.from_path:
                        branchcopy = node.from_path in self.branches \
                                         and node.path in self.branches \
                                         and node.path not in self.branchdeletes
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce(b"r%s: directory copy to %s from " \
                                     b"r%s~%s (branchcopy %s)" \
                                     % (revision,
                                        node.path,
                                        node.from_rev,
                                        node.from_path,
                                        branchcopy))
                        # Update our .gitignore list so that it includes those
                        # in the newly created copy, to ensure they correctly
                        # get deleted during a future directory deletion.
                        l = len(node.from_path)
                        for sourcegi, value in list((gi,v) for (gi,v) in
                                    self.active_gitignores.iteritems()
                                    if gi.startswith(node.from_path)):
                            destgi = node.path + sourcegi[l:]
                            self.active_gitignores[destgi] = value
                        if branchcopy:
                            self.branchcopies.add(node.path)
                        else:
                            self.branchdeletes.discard(node.path)
                            # Generate copy ops for generated .gitignore files
                            # to match the copy of svn:ignore props on the
                            # Subversion side. We use the just updated
                            # active_gitignores dict for that purpose.
                            if '--user-ignores' not in options:
                                for gipath, ignore in list(
                                            (gi,v) for (gi,v) in
                                            self.active_gitignores.iteritems()
                                            if gi.startswith(node.path)):
                                    blob = Blob(self.repo)
                                    blob.set_content(ignore)
                                    subnode = StreamParser.NodeAction()
                                    subnode.path = gipath
                                    subnode.revision = revision
                                    subnode.action = SD_ADD
                                    subnode.kind = SD_FILE
                                    subnode.blob = blob
                                    subnode.content_hash = \
                                            hashlib.md5(ignore).hexdigest()
                                    subnode.generated = True
                                    expanded_nodes.append(subnode)
                            # Now generate copies for all files in the source
                            for source in node.from_set:
                                lookback = filemaps[node.from_rev][source]
                                if lookback is None:
                                    raise Fatal(b"r%s: can't find ancestor %s" \
                                             % (revision, source))
                                subnode = StreamParser.NodeAction()
                                subnode.path = node.path + \
                                        source[len(node.from_path):]
                                subnode.revision = revision
                                subnode.from_path = lookback.path
                                subnode.from_rev = lookback.revision
                                subnode.from_hash = lookback.content_hash
                                subnode.action = SD_ADD
                                subnode.kind = SD_FILE
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce(b"r%s: generated copy r%s~%s -> %s" \
                                             % (revision,
                                                subnode.from_rev,
                                                subnode.from_path,
                                                subnode.path))
                                subnode.generated = True
                                expanded_nodes.append(subnode)
                    # Property settings can be present on either
                    # SD_ADD or SD_CHANGE actions.
                    if node.props is not None:
                        if debug_enable(DEBUG_EXTRACT):
                            announce(b"r%s: setting properties %s on %s" \
                                     % (revision, node.props, node.path))
                        # svn:ignore gets handled here,
                        if '--user-ignores' not in options:
                            if node.path == os.sep:
                                gitignore_path = b".gitignore"
                            else:
                                gitignore_path = os.path.join(node.path,
                                                              b".gitignore")
                            # There are no other directory properties that can
                            # turn into fileops.
                            ignore = node.props.get(b"svn:ignore")
                            if ignore is not None:
                                # svn:ignore properties are nonrecursive
                                # to lower directories, but .gitignore
                                # patterns are recursive.  Thus we need to
                                # anchor the translated pattern with
                                # leading / in order to render the
                                # Subversion behavior accurately.  Hiwever,
                                # if dibe naively this clobbers the branch-root
                                # defaults, so we need to have orotected thise
                                # with a leadung slash and reverse the transform.
                                ignore = re.sub(b"\n(?!#)", b"\n/", b"\n" + ignore)
                                ignore = ignore.replace("\n//", "\n")
                                ignore = ignore[1:]
                                if ignore.endswith(b"/"):
                                    ignore = ignore[:-1]
                                blob = Blob(self.repo)
                                blob.set_content(ignore)
                                newnode = StreamParser.NodeAction()
                                newnode.path = gitignore_path
                                newnode.revision = revision
                                newnode.action = SD_ADD
                                newnode.kind = SD_FILE
                                newnode.blob = blob
                                newnode.content_hash = \
                                        hashlib.md5(ignore).hexdigest()
                                if debug_enable(DEBUG_IGNORES):
                                    announce(b"r%s: queuing up %s generation with:\n%s." % (revision, newnode.path, node.props["svn:ignore"]))
                                # Must append rather than simply performing.
                                # Otherwise when the property is unset we
                                # won't have the right thing happen.
                                newnode.generated = True
                                expanded_nodes.append(newnode)
                                self.active_gitignores[gitignore_path] = ignore
                            elif gitignore_path in self.active_gitignores:
                                newnode = StreamParser.NodeAction()
                                newnode.path = gitignore_path
                                newnode.revision = revision
                                newnode.action = SD_DELETE
                                newnode.kind = SD_FILE
                                if debug_enable(DEBUG_IGNORES):
                                    announce(b"r%s: queuing up %s deletion." % (revision, newnode.path))
                                newnode.generated = True
                                expanded_nodes.append(newnode)
                                del self.active_gitignores[gitignore_path]
            # Lift .cvsignore files, which we can assume are legacies
            # from a bygone era and happen to have syntax upward-compatible
            # with that of .gitignore
            for node in expanded_nodes:
                if node.path.endswith(b".cvsignore"):
                    node.path = node.path[:-len(b".cvsignore")] + ".gitignore"
            # Ugh.  Because cvs2svn is brain-dead and issues D/M pairs
            # for identical paths in generated commits, we have to remove those
            # D ops here.  Otherwise later on when we're generating ops, if
            # the M node happens to be missing its hash it will be seen as
            # unmodified and only the D will be issued.
            seen = set()
            for node in reversed(expanded_nodes):
                if node.action == SD_DELETE and node.path in seen:
                    node.action = None
                seen.add(node.path)
            # Create actions corresponding to both
            # parsed and generated nodes.
            actions = []
            ancestor_nodes = {}
            for node in expanded_nodes:
                if node.action is None: continue
                if node.kind == SD_FILE:
                    if node.action == SD_DELETE:
                        assert node.blob is None
                        fileop = FileOp(self.repo)
                        fileop.construct(b"D", node.path)
                        actions.append((node, fileop))
                        ancestor_nodes[node.path] = None
                    elif node.action in (SD_ADD, SD_CHANGE, SD_REPLACE):
                        # Try to figure out who the ancestor of
                        # this node is.
                        if node.from_path or node.from_hash:
                            # Try first via from_path
                            ancestor = filemaps[node.from_rev][node.from_path]
                            if debug_enable(DEBUG_TOPOLOGY):
                                if ancestor:
                                    announce(b"r%s~%s -> %s (via filemap)" % \
                                             (node.revision, node.path, ancestor))
                                else:
                                    announce(b"r%s~%s has no ancestor (via filemap)" % \
                                             (node.revision, node.path))
                            # Fallback on the first blob that had this hash
                            if node.from_hash and not ancestor:
                                ancestor = self.hashmap[node.from_hash]
                                if debug_enable(DEBUG_TOPOLOGY):
                                    announce(b"r%s~%s -> %s (via hashmap)" % \
                                         (node.revision, node.path, ancestor))
                            if not ancestor and not node.path.endswith(b".gitignore"):
                                self.gripe(b"r%s~%s: missing filemap node." \
                                          % (node.revision, node.path))
                        elif node.action != SD_ADD:
                            # Ordinary inheritance, no node copy.  For
                            # robustness, we don't assume revisions are
                            # consecutive numbers.
                            try:
                                ancestor = ancestor_nodes[node.path]
                            except KeyError:
                                ancestor = filemaps[previous][node.path]
                        else:
                            ancestor = None
                        # Time for fileop generation
                        if node.blob is not None:
                            if node.content_hash in self.hashmap:
                                # Blob matches an existing one -
                                # node was created by a
                                # non-Subversion copy followed by
                                # add.  Get the ancestry right,
                                # otherwise parent pointers won't
                                # be computed properly.
                                ancestor = self.hashmap[node.content_hash]
                                node.from_path = ancestor.from_path
                                node.from_rev = ancestor.from_rev
                                node.blobmark = ancestor.blobmark
                            else:
                                # An entirely new blob
                                node.blobmark = node.blob.set_mark(self.__newmark())
                                self.repo.addEvent(node.blob)
                                # Blobs generated by reposurgeon
                                # (e.g .gitignore content) have no
                                # content hash.  Don't record
                                # them, otherwise they'll all
                                # collide :-)
                                if node.content_hash:
                                    self.hashmap[node.content_hash] = node
                        elif ancestor:
                            node.blobmark = ancestor.blobmark
                        else:
                            # No ancestor, no blob. Has to be a
                            # pure property change.  There's no
                            # way to figure out what mark to use
                            # in a fileop.
                            if not node.path.endswith(b".gitignore"):
                                self.gripe(b"r%s~%s: permission information may be lost." \
                                           % (node.revision, node.path))
                            continue
                        ancestor_nodes[node.path] = node
                        assert node.blobmark
                        # Time for fileop generation
                        if ancestor:
                            perms = oldperms = self.permissions.get(ancestor.path,
                                                                    0o100644)
                        else:
                            perms = oldperms = 0o100644
                        if node.props is not None:
                            perms = self.node_permissions(node)
                        new_content = (node.blob is not None)
                        # Ignore and complain about explicit .gitignores
                        # created, e.g, by git-svn.  In an ideal world we
                        # would merge these with svn:ignore properties. but
                        # this would be hairy and bug-prone. So we give
                        # the user a heads-up and expect these to be
                        # merged by hand.
                        if new_content \
                           and not node.generated \
                           and '--user-ignores' not in options \
                           and node.path.endswith(b".gitignore"):
                            self.gripe(b"r%s~%s: user-created .gitignore ignored." \
                                       % (node.revision, node.path))
                            continue
                        # This ugly nasty guard is critically important.
                        # We need to generate a modify if:
                        # 1. There is new content.
                        # 2. This node was generated as an
                        # expansion of a directory copy.
                        # 3. The node was produced by an explicit
                        # Subversion file copy (not a directory copy)
                        # in which case it has an MD5 hash that points
                        # back to a source.
                        # 4. The permissions for this path have changed;
                        # we need to generate a modify with an old mark
                        # but new permissions.
                        generated_file_copy = node.generated
                        subversion_file_copy = (node.from_hash is not None)
                        permissions_changed = (perms != oldperms)
                        if (new_content or
                            generated_file_copy or
                            subversion_file_copy or
                            permissions_changed):
                            assert perms
                            fileop = FileOp(self.repo)
                            fileop.construct(b"M",
                                             perms,
                                             node.blobmark,
                                             node.path)
                            actions.append((node, fileop))
                            self.repo.objfind(fileop.ref).pathlist.append(node.path)
                        elif debug_enable(DEBUG_EXTRACT):
                            announce(b"r%s~%s: unmodified" % (node.revision, node.path))
                        self.permissions[node.path] = perms
                # These are directory actions.
                elif node.action in (SD_DELETE, SD_REPLACE):
                    if debug_enable(DEBUG_EXTRACT):
                        announce(b"r%s: deleteall %s" % (revision,node.path))
                    fileop = FileOp(self.repo)
                    fileop.construct(b"deleteall", node.path[:-1])
                    actions.append((node, fileop))
            # Time to generate commits from actions and fileops.
            if debug_enable(DEBUG_EXTRACT):
                announce(b"r%s: %d actions" % (revision, len(actions)))
            # First, break the file operations into branch cliques
            cliques = collections.defaultdict(list)
            lastbranch = None
            for (node, fileop) in actions:
                # Try last seen branch first
                if lastbranch and node.path.startswith(lastbranch):
                    cliques[lastbranch].append(fileop)
                    continue
                for branch in self.branches:
                    if node.path.startswith(branch):
                        cliques[branch].append(fileop)
                        lastbranch = branch
                        break
                else:
                    cliques[b""].append(fileop)
            # Make two operation lists from the cliques, sorting cliques
            # containing only branch deletes from other cliques.
            deleteall_ops = []
            other_ops = []
            for (branch, ops) in cliques.iteritems():
                if len(ops) == 1 and ops[0].op == b"deleteall":
                    deleteall_ops.append((branch, ops))
                else:
                    other_ops.append((branch, ops))
            oplist = itertools.chain(other_ops, deleteall_ops)
            # Create all commits corresponding to the revision
            newcommits = []
            commit.legacy_id = revision
            if len(other_ops) <= 1:
                # In the ordinary case, we can assign all non-deleteall fileops
                # to the base commit.
                self.repo.legacy_map[b"SVN:%s" % commit.legacy_id] = commit
                try:
                    commit.common, stage = next(oplist)
                    commit.set_operations(stage)
                    commit.invalidate_pathset_cache()
                except StopIteration:
                    commit.common = os.path.commonprefix([node.path for node in record.nodes])
                commit.set_mark(self.__newmark())
                if debug_enable(DEBUG_EXTRACT):
                    announce(b"r%s gets mark %s" % (revision, commit.mark))
                newcommits.append(commit)
            # If the commit is mixed, or there are deletealls left over,
            # handle that.
            oplist = sorted(oplist, key=operator.itemgetter(0))
            for (i, (branch, fileops)) in enumerate(oplist):
                split = commit.clone()
                split.common = branch
                # Sequence numbers for split commits are 1-origin
                split.legacy_id += StreamParser.SplitSep + str(i + 1)
                self.repo.legacy_map["SVN:%s" % split.legacy_id] = split
                split.comment += "\n[[Split portion of a mixed commit.]]\n"
                split.set_mark(self.__newmark())
                split.set_operations(fileops)
                split.invalidate_pathset_cache()
                newcommits.append(split)
            # The revision is truly mixed if there is more than one clique
            # not consisting entirely of deleteall operations.
            if len(other_ops) > 1:
                # Store the last used split id
                split_commits[revision] = split.legacy_id
            # Sort fileops according to git rules
            for newcommit in newcommits:
                newcommit.sort_operations()
            # Deduce links between branches on the basis of copies. This
            # is tricky because a revision can be the target of multiple
            # copies.  Humans don't abuse this because tracking multiple
            # copies is too hard to do in a slow organic brain, but tools
            # like cvs2svn can generate large sets of them. cvs2svn seems
            # to try to copy each file and directory from the commit
            # corresponding to the CVS revision where the file was last
            # changed before the copy, which may be substantially earlier
            # than the CVS revision corresponding to the
            # copy). Fortunately, we can resolve such sets by the simple
            # expedient of picking the *latest* revision in them!
            # No code uses the result if branch analysis is turned off.
            if not nobranch:
                for newcommit in newcommits:
                    if commit.mark in self.branchlink: continue
                    copies = [node for node in record.nodes \
                              if node.from_rev is not None \
                              and node.path.startswith(newcommit.common)]
                    if copies and debug_enable(DEBUG_TOPOLOGY):
                        announce(b"r%s: copy operations %s" %
                                     (newcommit.legacy_id, copies))
                    # If the copies include one for the directory, use that as
                    # the first parent: most of the files in the new branch
                    # will come from that copy, and that might well be a full
                    # branch copy where doing that way is needed because the
                    # fileop for the copy didn't get generated and the commit
                    # tree would be wrong if we didn't.
                    latest = next((node for node in copies
                                    if node.kind == SD_DIR and
                                       node.from_path and
                                       node.path == newcommit.common),
                                  None)
                    if latest is not None:
                        self.directory_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce(b"r%s: directory copy with %s" \
                                     % (newcommit.legacy_id, copies))
                    # Use may have botched a branch creation by doing a
                    # non-Subversion directory copy followed by a bunch of
                    # Subversion adds. Blob hashes will match existing files,
                    # but from_rev and from_path won't be set at parse time.
                    # Our code detects this case and makes file
                    # backlinks, but can't deduce the directory copy.
                    # Thus, we have to treat multiple file copies as
                    # an instruction to create a gitspace branch.
                    #
                    # This guard filters out copy op sets that are
                    # *single* file copies. We're making an assumption
                    # here that multiple file copies should always
                    # trigger a branch link creation.  This assumption
                    # could be wrong, which is why we emit a warning
                    # message later on for branch links detected this
                    # way
                    #
                    # Even with this filter you'll tend to end up with lots
                    # of little merge bubbles with no commits on one side;
                    # these have to be removed by a debubbling pass later.
                    # I don't know what generates these things - cvs2svn, maybe.
                    #
                    # The second conjunct of this guard filters out the case
                    # where the user actually did do a previous Subversion file
                    # copy to start the branch, in which case we want to link
                    # through that.
                    elif len(copies) > 1 \
                             and newcommit.common not in self.directory_branchlinks:
                        self.fileop_branchlinks.add(newcommit.common)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce(b"r%s: making branch link %s" %
                                     (newcommit.legacy_id, newcommit.common))
                        # Use max() on the reversed iterator since max returns
                        # the first item with the max key and we want the last
                        latest = max(reversed(copies),
                                     key=lambda node: int(node.from_rev))
                    if latest is not None:
                        prev = last_relevant_commit(
                                latest.from_rev, latest.from_path,
                                operator.attrgetter(b"common"))
                        if prev is not None:
                            self.branchlink[newcommit.mark] = (newcommit, prev)
                            if debug_enable(DEBUG_TOPOLOGY):
                                announce(b"r%s: link %s (%s) back to %s (%s, %s)" % \
                                         (newcommit.legacy_id,
                                          newcommit.mark,
                                          newcommit.common,
                                          latest.from_rev,
                                          prev.mark,
                                          prev.common
                                          ))
                        else:
                            if debug_enable(DEBUG_TOPOLOGY):
                                complain(b"lookback for %s failed" % latest)
                            raise Fatal(b"couldn't find a branch root for the copy of %s at r%s." % (latest.path, latest.revision))
            # We're done, add all the new commits
            self.repo.events += newcommits
            self.repo.declare_sequence_mutation()
            # Report progress, and give up our scheduler slot
            # so as not to eat the processor.
            baton.twirl()
            time.sleep(0)
            previous = revision
        # Filemaps are no longer needed
        del filemaps
        # Warn about dubious branch links
        self.fileop_branchlinks.discard(b"trunk" + os.sep)
        if self.fileop_branchlinks - self.directory_branchlinks:
            self.gripe(b"branch links detected by file ops only: %s" % " ".join(self.fileop_branchlinks - self.directory_branchlinks))
        self.repo.timings.append([b"commits", time.time()])
        if debug_enable(DEBUG_EXTRACT):
            announce(b"at post-parsing time:")
            for commit in self.repo.commits():
                msg = commit.comment
                if msg == None:
                    msg = b""
                announce(b"r%-4s %4s %2d %2d '%s'" % \
                         (commit.legacy_id, commit.mark,
                          len(commit.operations()),
                          len(commit.properties),
                          msg.strip()[:20]))
        baton.twirl()
        # First, turn the root commit into a tag
        if self.repo.events and not self.repo.earliest_commit().operations():
            try:
                initial, second = itertools.islice(self.repo.commits(), 2)
                self.repo.tagify(initial,
                                 b"root",
                                 second,
                                 "[[Tag from root commit at Subversion r%s]]\n" % initial.legacy_id)
            except ValueError: # self.repo has less than two commits
                self.gripe(b"could not tagify root commit.")
        # Now, branch analysis.
        branchroots = []
        if not self.branches or nobranch:
            last = None
            for commit in self.repo.commits():
                commit.set_branch(os.path.join(b"refs", b"heads", b"master") + os.sep)
                if last is not None: commit.set_parents([last])
                last = commit
        else:
            # Instead, determine a branch for each commit...
            if debug_enable(DEBUG_EXTRACT):
                announce(b"Branches: %s" % (self.branches,))
            lastbranch = None
            for commit in self.repo.commits():
                if lastbranch is not None \
                        and commit.common.startswith(lastbranch):
                    branch = lastbranch
                else:
                    branch = next((b for b in self.branches
                                  if commit.common.startswith(b)),
                                  None)
                if branch is not None:
                    commit.set_branch(branch)
                    for fileop in commit.operations():
                        if fileop.op in (b"M", b"D"):
                            fileop.path = fileop.path[len(branch):]
                        elif fileop.op in (b"R", b"C"):
                            fileop.source = fileop.source[len(branch):]
                            fileop.target = fileop.target[len(branch):]
                    commit.invalidate_pathset_cache()
                else:
                    commit.set_branch(b"root")
                    self.branches[b"root"] = None
                lastbranch = branch
                baton.twirl()
            self.repo.timings.append([b"branches", time.time()])
            baton.twirl()
            # ...then rebuild parent links so they follow the branches
            for commit in self.repo.commits():
                if self.branches[commit.branch] is None:
                    branchroots.append(commit)
                    commit.set_parents([])
                else:
                    commit.set_parents([self.branches[commit.branch]])
                self.branches[commit.branch] = commit
                # Per-commit spinner disabled because this pass is fast
                #baton.twirl()
            self.repo.timings.append([b"parents", time.time()])
            baton.twirl()
            # The root branch is special. It wasn't made by a copy, so
            # we didn't get the information to connect it to trunk in the
            # last phase.
            try:
                commit = next(c for c in self.repo.commits()
                              if c.branch == b"root")
            except StopIteration:
                pass
            else:
                earliest = self.repo.earliest_commit()
                if commit != earliest:
                    self.branchlink[commit.mark] = (commit, earliest)
            self.repo.timings.append([b"root", time.time()])
            baton.twirl()
            # Add links due to Subversion copy operations
            if debug_enable(DEBUG_EXTRACT):
                announce(b"branch roots: [{roots}], links {{{links}}}".format(
                    roots = b", ".join(c.mark for c in branchroots),
                    links = b", ".join(b"{l[0].mark}: {l[1].mark}".format(l=l)
                                      for l in self.branchlink.itervalues())))
            for (child, parent) in self.branchlink.itervalues():
                if not parent.repo is self.repo:
                    # The parent has been deleted since, don't add the link;
                    # it can only happen if parent was the now tagified root.
                    continue
                if not child.has_parents() \
                        and not child.branch in self.branchcopies:
                    # The branch wasn't created by copying another branch and
                    # is instead populated by fileops. Prepend a deleteall to
                    # ensure that it starts with a clean tree instead of
                    # inheriting that of its soon to be added first parent.
                    # The deleteall is put on the first commit of the branch
                    # which has fileops or more than one child.
                    commit = child
                    while len(commit.children()) == 1 and not commit.operations():
                        commit = commit.first_child()
                    if commit.operations() or commit.has_children():
                        fileop = FileOp(self.repo)
                        fileop.construct(b"deleteall")
                        commit.prepend_operation(fileop)
                        self.generated_deletes.append(commit)
                if parent not in child.parents():
                    child.add_parent(parent)
            for root in branchroots:
                if getattr(commit.branch, b"fileops", None) \
                        and root.branch != (b"trunk" + os.sep):
                    self.gripe(b"r%s: can't connect nonempty branch %s to origin" \
                                % (root.legacy_id, root.branch))
            self.repo.timings.append(["branchlinks", time.time()])
            baton.twirl()
            # Add links due to svn:mergeinfo properties
            mergeinfo = PathMap()
            mergeinfos = {}
            for (revision, record) in self.revisions.iteritems():
                for node in record.nodes:
                    if node.kind != SD_DIR: continue
                    # Mutate the mergeinfo according to copies
                    if node.from_rev:
                        assert int(node.from_rev) < int(revision)
                        mergeinfo.copy_from(
                                node.path,
                                mergeinfos.get(node.from_rev) or PathMap(),
                                node.from_path)
                        if debug_enable(DEBUG_EXTRACT):
                            announce(b"r%s~%s mergeinfo copied to %s" \
                                % (node.from_rev, node.from_path, node.path))
                    # Mutate the filemap according to current mergeinfo.
                    # The general case is multiline: each line may describe
                    # multiple spans merging to this revision; we only consider
                    # the end revision of each span.
                    # Because svn:mergeinfo will persist like other properties,
                    # we need to compare with the already present mergeinfo and
                    # only take new entries into account when creating merge
                    # links. Also, since merging will also inherit the
                    # mergeinfo entries of the source path, we also need to
                    # gather and ignore those.
                    existing_merges = set(mergeinfo[(node.path,)] or [])
                    own_merges = set()
                    try:
                        info = node.props[b'svn:mergeinfo']
                    except (AttributeError, TypeError, KeyError):
                        pass
                    else:
                        for line in info.split(b'\n'):
                            try:
                                from_path, ranges = line.split(b":", 1)
                            except ValueError:
                                continue
                            for span in ranges.split(b","):
                                # Ignore single-rev fields, they are cherry-picks.
                                # TODO: maybe we should even test if min_rev
                                # corresponds to some from_rev + 1 to ensure no
                                # commit has been skipped.
                                try:
                                    min_rev, from_rev = span.split(b"-", 1)
                                except ValueError:
                                    min_rev = from_rev = None
                                if (not min_rev) or (not from_rev): continue
                                # Import mergeinfo from merged branches
                                try:
                                    past_merges = mergeinfos[from_rev][(from_path,)]
                                except KeyError:
                                    pass
                                else:
                                    if past_merges:
                                        existing_merges.update(past_merges)
                                # Svn doesn't fit the merge range to commits on
                                # the source branch; we need to find the latest
                                # commit between min_rev and from_rev made on
                                # that branch.
                                from_commit = last_relevant_commit(
                                                    from_rev, from_path)
                                if from_commit is not None and \
                                        int(from_commit.legacy_id.split(b".",1)[0]) \
                                            >= int(min_rev):
                                    own_merges.add(from_commit.mark)
                                else:
                                    self.gripe(b"cannot resolve mergeinfo "
                                               b"source from revision %s for "
                                               b"path %s." % (from_rev,
                                                             node.path))
                    mergeinfo[(node.path,)] = own_merges
                    new_merges = own_merges - existing_merges
                    if not new_merges: continue
                    # Find the correct commit in the split case
                    commit = last_relevant_commit(revision, node.path)
                    if commit is None or \
                            not commit.legacy_id.startswith(revision):
                        # The reverse lookup went past the target revision
                        self.gripe(b"cannot resolve mergeinfo destination "
                                   b"to revision %s for path %s."
                                   % (revision, node.path))
                        continue
                    # Alter the DAG to express merges.
                    for mark in new_merges:
                        parent = self.repo.objfind(mark)
                        if parent not in commit.parents():
                            commit.add_parent(parent)
                        if debug_enable(DEBUG_TOPOLOGY):
                            announce(b"processed new mergeinfo from r%s "
                                     b"to r%s." % (parent.legacy_id,
                                                  commit.legacy_id))
                mergeinfos[revision] = mergeinfo.snapshot()
                baton.twirl()
            del mergeinfo, mergeinfos
            self.repo.timings.append(["mergeinfo", time.time()])
            baton.twirl()
            if debug_enable(DEBUG_EXTRACT):
                announce(b"after branch analysis")
                for commit in self.repo.commits():
                    try:
                        ancestor = commit.parents()[0]
                    except IndexError:
                        ancestor = '-'
                    announce(b"r%-4s %4s %4s %2d %2d '%s'" % \
                             (commit.legacy_id,
                              commit.mark, ancestor,
                              len(commit.operations()),
                              len(commit.properties),
                              commit.branch))
        baton.twirl()
        # Code controlled by --nobranch option ends.
        # Canonicalize all commits to ensure all ops actually do something.
        for commit in self.repo.commits():
            commit.canonicalize()
            baton.twirl()
        self.repo.timings.append([b"canonicalize", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce(b"after canonicalization")
        # Now clean up junk commits generated by cvs2svn.
        # We need a list copy since commits are deleted in the loop
        for commit in list(self.repo.commits()):
            # It is possible for commit.comment to be None if
            # the repository has been dumpfiltered and has empty commits.
            # If that's the case it can't very well have CVS artifacts in it.
            if commit.comment is None:
                self.gripe("r%s has no comment" % commit.legacy_id)
                continue
            # Things that cvs2svn created as tag surrogates
            # get turned into actual tags.
            m = StreamParser.cvs2svn_tag_re.search(commit.comment)
            if m and not commit.has_children():
                fulltag = os.path.join(b"refs", b"tags", m.group(1))
                self.repo.events.append(Reset(self.repo, ref=fulltag,
                                              target=commit.parents()[0]))
                commit.delete([b"--tagback"])
            # Childless generated branch commits carry no informationn,
            # and just get removed.
            m = StreamParser.cvs2svn_branch_re.search(commit.comment)
            if m and not commit.has_children():
                commit.delete([b"--tagback"])
            baton.twirl()
        self.repo.timings.append([b"junk", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce(b"after cvs2svn artifact removal")
        # Now we need to tagify all other commits without fileops, because git
        # is going to just discard them when we build a live repo and they
        # might possibly contain interesting metadata.
        # * Commits from tag creation often have no fileops since they come
        #   from a directory copy in Subversion. The annotated tag name is the
        #   basename of the SVN tag directory.
        # * Same for branch-root commits. The tag name is the basename of the
        #   branch directory in SVN, with "-root" appended to distinguish them
        #   from SVN tags.
        # * Commits at a branch tip that consist only of deleteall are also
        #   tagified: their fileops aren't worth saving; the comment metadata
        #   just might be.
        # * All other commits without fileops get turned into an annotated tag
        #   with name "emptycommit-<revision>".
        rootmarks = {root.mark for root in branchroots} # empty if nobranch
        rootskip = {b"trunk"+os.sep, b"root"}
        def tagname(commit):
            # Give branch and tag roots a special name, except for "trunk" and
            # "root" which do not come from a regular branch copy.
            if commit.mark in rootmarks and os.path.basename(commit.branch) not in rootskip:
                name = os.path.basename(commit.branch)
                if commit.branch.startswith(b"refs/tags"):
                    return name
                return name + b"-root"
            # Fallback on standard rules.
            return None
        def taglegend(commit):
            # Tipdelete commits and branch roots don't get any legend.
            if commit.operations() or (commit.mark in rootmarks \
                    and os.path.basename(commit.branch) not in rootskip):
                return b""
            # Otherwise, generate one for inspection.
            legend = [b"[[Tag from zero-fileop commit at Subversion r%s" \
                             % commit.legacy_id]
            # This guard can fail on a split commit
            if commit.legacy_id in self.revisions:
                if self.revisions[commit.legacy_id].nodes:
                    legend.append(b":\n")
                    legend.extend(str(node)+b"\n"
                            for node in self.revisions[commit.legacy_id].nodes)
            legend.append(b"]]\n")
            return b"".join(legend)
        #Pre compile the regex mappings for the next step
        def compile_regex (mapping):
            regex, replace = mapping
            return re.compile(regex), replace
        compiled_mapping = map(compile_regex, global_options[b"svn_branchify_mapping"])
        # Now pretty up the branch names
        for commit in self.repo.commits():
            matched = False
            for regex, replace in compiled_mapping:
                result, substitutions = regex.subn(replace,commit.branch)
                if substitutions == 1:
                    matched = True
                    commit.set_branch(os.path.join(b"refs",result))
                    break
            if matched:
                continue
            if commit.branch == "root":
                commit.set_branch(os.path.join(b"refs", b"heads", b"root"))
            elif commit.branch.startswith(b"tags" + os.sep):
                branch = commit.branch
                if branch.endswith(os.sep):
                    branch = branch[:-1]
                commit.set_branch(os.path.join(b"refs", b"tags",
                                              os.path.basename(branch)))
            elif commit.branch == b"trunk" + os.sep:
                commit.set_branch(os.path.join(b"refs", b"heads", b"master"))
            else:
                commit.set_branch(os.path.join(b"refs", b"heads",
                                              os.path.basename(commit.branch[:-1])))
            baton.twirl()
        ##self.repo.timings.append([b"polishing", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce(b"after branch name mapping")
        self.repo.tagify_empty(tipdeletes = True,
                               canonicalize = False,
                               name_func = tagname,
                               legend_func = taglegend,
                               gripe = self.gripe)
        self.repo.timings.append(["tagifying", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce(b"after tagification")
        # cvs2svn likes to crap out sequences of deletes followed by
        # filecopies on the same node when it's generating tag commits.
        # These are lots of examples of this in the nut.svn test load.
        # These show up as redundant (D, M) fileop pairs.
        for commit in self.repo.commits():
            if any(fileop is None for fileop in commit.operations()):
                raise Fatal(b"Null fileop at r%s" % commit.legacy_id)
            for i in range(len(commit.operations())-1):
                if commit.operations()[i].op == b'D' and commit.operations()[i+1].op == b'M':
                    if commit.operations()[i].path == commit.operations()[i+1].path:
                        commit.operations()[i].op = None
            commit.set_operations([fileop for fileop in commit.operations() if fileop.op is not None])
            baton.twirl()
        self.repo.timings.append(["canonicalizing", time.time()])
        baton.twirl()
        if debug_enable(DEBUG_EXTRACT):
            announce(b"after delete/copy canonicalization")
        # Remove spurious parent links caused by random cvs2svn file copies.
        #baton.twirl(b"debubbling")
        for commit in self.repo.commits():
            try:
                a, b = commit.parents()
            except ValueError:
                pass
            else:
                if a is b:
                    self.gripe(b"r%s: duplicate parent marks" % commit.legacy_id)
                elif a.branch == b.branch == commit.branch:
                    if b.committer.date < a.committer.date:
                        (a, b) = (b, a)
                    if b.descended_from(a):
                        commit.remove_parent(a)
            # Per-commit spinner disabled because this pass is fast
            #baton.twirl()
        self.repo.timings.append([b"debubbling", time.time()])
        baton.twirl()
        self.repo.renumber(baton=baton)
        baton.twirl()
        self.repo.timings.append([b"renumbering", time.time()])
        self.repo.write_legacy = True
        # Look for tag and branch merges that mean we may want to undo a
        # tag or branch creation
        ignore_deleteall = set(commit.mark
                               for commit in self.generated_deletes)
        for commit in self.repo.commits():
            if commit.operations() and commit.operations()[0].op == b'deleteall' \
                    and commit.has_children() \
                    and commit.mark not in ignore_deleteall:
                self.gripe(b"mid-branch deleteall on %s at <%s>." % \
                        (commit.branch, commit.legacy_id))
        self.repo.timings.append(["linting", time.time()])
        # Treat this in-core state as though it was read from an SVN repo
        self.repo.hint("svn", strong=True)

class SubversionDumper:
    "Respository to Subversion stream dump."
    def __init__(self, repo, nobranch=False):
        self.repo = repo
        self.nobranch = nobranch
        self.pathmap = {}
        self.mark_to_revision = {}
        self.branches_created = []
        self.tag_latch = False
    class FlowState:
        def __init__(self, rev, props=None):
            self.rev = rev
            self.props = props or {}
            self.is_directory = False
            self.subfiles = 0
    @staticmethod
    def svnprops(pdict):
        return b"".join(b"K %d\n%s\nV %d\n%s\n" % (len(key), key, len(val), val)
                        for key, val in sorted(pdict.iteritems()) if val)
    @staticmethod
    def dump_revprops(fp, revision, date, author=None, log=None, parents=None):
        "Emit a Revision-number record describing unversioned properties."
        fp.write(b"Revision-number: %d\n" % revision)
        parts = []
        parts.append(SubversionDumper.svnprops({b"svn:log": log}))
        parts.append(SubversionDumper.svnprops({b"svn:author": author}))
        # Ugh.  Subversion apparently insists on those decimal places
        parts.append(SubversionDumper.svnprops({b"svn:date": date.rfc3339()[:-1]+b".000000Z"}))
        # Hack merge links into mergeinfo properties.  This is a kluge
        # - the Subversion model is really like cherrypicking rather
        # than branch merging - but it's better than nothing, and
        # should at least round-trip with the logic in the Subversion
        # dump parser.
        if len(parents or []) > 1:
            parents = iter(parents)
            next(parents) # ignore main parent
            ancestral = ".".join(itertools.imap(str, sorted(parents)))
            parts.append(SubversionDumper.svnprops({b"svn:mergeinfo": ancestral}))
        parts.append(b"PROPS-END\n")
        parts.append(b"\n")
        revprops = b"".join(parts)
        fp.write(b"Prop-content-length: %d\n" % (len(revprops)-1))
        fp.write(b"Content-length: %d\n\n" % (len(revprops)-1))
        fp.write(revprops)
    @staticmethod
    def dump_node(fp, path, kind, action, content=b"",
                  from_rev=None, from_path=None,
                  props=None):
        "Emit a Node record describing versioned properties and content."
        fp.write(b"Node-path: %s\n" % path)
        fp.write(b"Node-kind: %s\n" % kind)
        fp.write(b"Node-action: %s\n" % action)
        if from_rev:
            fp.write(b"Node-copyfrom-rev: %s\n" % from_rev)
        if from_path:
            fp.write(b"Node-copyfrom-path: %s\n" % from_path)
        nodeprops = SubversionDumper.svnprops(props or {}) + "PROPS-END\n"
        fp.write(b"Prop-content-length: %d\n" % len(nodeprops))
        if content:
            fp.write(b"Text-content-length: %d\n" % len(content))
            # Checksum validation in svnload works if we do sha1 but
            # not if we try md5.  It's unknown why - possibly svn load
            # is simply ignoring sha1.
            #fp.write(b"Text-content-md5: %s\n" % hashlib.md5(content).hexdigest())
            fp.write(b"Text-content-sha1: %s\n" % hashlib.sha1(content).hexdigest())
        fp.write(b"Content-length: %d\n\n" % (len(nodeprops) + len(content)))
        fp.write(nodeprops)
        if content:
            fp.write(content)
        fp.write(b"\n\n")
    @staticmethod
    def svnbranch(branch):
        "The branch directory corresponding to a specified git branch."
        segments = branch.split(os.sep)
        assert segments[0] == b"refs"
        if tuple(segments) == (b"refs", b"heads", b"master"):
            return "trunk"
        if segments[1] not in (b"tags", b"heads") or len(segments) != 3:
            raise Recoverable(b"%s can't be mapped to Subversion." % branch)
        svnbase = segments[2]
        if svnbase.endswith(b"trunk"):
            svnbase += b"-git"
        if segments[1] == b"tags":
            return os.path.join(b"tags", svnbase)
        else:
            return os.path.join(b"branches", svnbase)
    def svnize(self, branch, path=b""):
        "Return SVN path corresponding to a specified gitspace branch and path."
        if self.nobranch:
            return path
        return os.path.join(SubversionDumper.svnbranch(branch), path)
    def filedelete(self, fp, branch, path):
        "Emit the dump-stream records required to delete a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce(b"filedelete%s" % repr((branch, path)))
        svnpath = self.svnize(branch, path)
        fp.write(b"Node-path: %s\n" % svnpath)
        fp.write(b"Node-action: delete\n\n\n")
        del self.pathmap[svnpath]
        while True:
            svnpath = os.path.dirname(svnpath)
            # The second disjunct in this guard is a
            # spasmodic twitch in the direction of
            # respecting Subversion's notion of a "flow".
            # We refrain from deleting branch directories
            # so they'll have just one flow throughout the
            # life of the repository.
            if not svnpath or svnpath in self.branches_created:
                break
            self.pathmap[svnpath].subfiles -= 1
            if self.pathmap[svnpath].subfiles == 0:
                fp.write(b"Node-path: %s\n" % svnpath)
                fp.write(b"Node-action: delete\n\n\n")
                del self.pathmap[svnpath]
    def directory_create(self, fp, revision, branch, path, parents=None):
        if debug_enable(DEBUG_SVNDUMP):
            announce(b"directory_create%s" % repr((revision, branch, path)))
        creations = []
        # Branch creation may be required
        svnout = SubversionDumper.svnbranch(branch)
        if svnout not in self.branches_created:
            if not svnout.startswith(b"tags") and "branches" not in self.branches_created:
                self.branches_created.append(b"branches")
                creations.append((b"branches", None, None))
            self.branches_created.append(svnout)
            if parents:
                from_rev = self.mark_to_revision[parents[0].mark],
                from_branch = SubversionDumper.svnbranch(parents[0].branch)
                creations.append((svnout, from_rev, from_branch))
                # Iterating through dict keys while mutating the dict
                # is not supported by Python. The following thus uses
                # keys() which returns a new list of the dict keys to
                # loop over.
                for key in self.pathmap.keys():
                    if key.startswith(from_branch + os.sep) and key != from_branch:
                        counterpart = svnout + key[len(from_branch):]
                        self.pathmap[counterpart] = SubversionDumper.FlowState(revision)
            else:
                creations.append((svnout, None, None))
        # Create all directory segments required
        # to get down to the level where we can
        # create the file.
        parts = os.path.dirname(path).split(os.sep)
        if parts[0]:
            parents = [os.sep.join(parts[:i+1])
                                   for i in range(len(parts))]
            for parentdir in parents:
                if parentdir not in self.pathmap:
                    fullpath = os.path.join(svnout, parentdir)
                    creations.append((fullpath, None, None))
        for (path, from_rev, from_path) in creations:
            SubversionDumper.dump_node(fp,
                                       path=path,
                                       kind=b"dir",
                                       action=b"add",
                                       from_rev=from_rev,
                                       from_path=from_path)
            self.pathmap[path] = SubversionDumper.FlowState(revision)
            self.pathmap[path].is_directory = True
            self.pathmap[path].subfiles += 1
    def filemodify(self, fp, revision, branch, mode, ref, path, parents):
        "Emit the dump-stream records required to add or modify a file."
        if debug_enable(DEBUG_SVNDUMP):
            announce(b"filemodify%s" % repr((revision, branch, mode, ref, path,
                                            [event.mark for event in parents])))
        # Branch and directory creation may be required.
        # This has to be called early so copy can update the filemap.
        self.directory_create(fp, revision, branch, path, parents)
        svnpath = self.svnize(branch, path)
        if svnpath in self.pathmap:
            svnop = b"change"
            self.pathmap[svnpath].rev = revision
        else:
            svnop = b"add"
            self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
        if debug_enable(DEBUG_SVNDUMP):
            announce(b"Generating %s %s" % (svnpath, svnop))
        content = self.repo.objfind(ref).get_content()
        changeprops = None
        if svnpath in self.pathmap:
            if mode == '100755':
                if b"svn:executable" not in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props[b"svn:executable"] = b"true"
                    changeprops = self.pathmap[svnpath].props
            elif mode == '100644':
                if b"svn:executable" in self.pathmap[svnpath].props:
                    self.pathmap[svnpath].props[b"svn:executable"] = b"false"
                    changeprops = self.pathmap[svnpath].props
        #if mode == "120000":
        #    changeprops = {b"svn:special":b"*"}
        #    content = b"link " + content
        # The actual content
        SubversionDumper.dump_node(fp,
                  path=svnpath,
                  kind=b"file",
                  action=svnop,
                  props=changeprops,
                  content=content)
    def filecopy(self, fp, revision, branch, source, target):
        if debug_enable(DEBUG_SVNDUMP):
            announce(b"filecopy%s" % repr((revision, branch, source, target)))
        svnsource = self.svnize(branch, source)
        try:
            flow = self.pathmap[svnsource]
        except:
            raise Fatal(b"couldn't retrieve flow information for %s" % source)
        self.directory_create(fp, revision, branch, target)
        svntarget = self.svnize(branch, target)
        self.pathmap[svntarget] = self.pathmap[svnsource]
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind=b"file",
                                   action=b"add",
                                   from_path=svnsource,
                                   from_rev=flow.rev)
    def make_tag(self, fp, revision, branch, name, log, author):
        if debug_enable(DEBUG_SVNDUMP):
            announce(b"make_tag%s" % repr((revision, branch, name, log, str(author))))
        svnsource = self.svnize(branch)
        svntarget = os.path.join(b"tags", name)
        SubversionDumper.dump_revprops(fp, revision,
                                       log=log,
                                       author=author.email.split(b"@")[0],
                                       date=author.date)
        if not self.tag_latch:
            self.tag_latch = True
            SubversionDumper.dump_node(fp,
                                   path=b"tags",
                                   kind=b"dir",
                                   action=b"add")
        SubversionDumper.dump_node(fp,
                                   path=svntarget,
                                   kind=b"dir",
                                   action=b"add",
                                   from_path=svnsource,
                                   from_rev=revision-1)
    def dump(self, selection, fp, progress=False):
        "Export the repository as a Subversion dumpfile."
        self.tag_latch = False
        tags = [event for event in self.repo.events if isinstance(event, Tag)]
        with Baton(b"reposurgeon: dumping", enable=progress) as baton:
            try:
                fp.write(b"SVN-fs-dump-format-version: 2\n\n")
                fp.write(b"UUID: %s\n\n" % (self.repo.uuid or uuid.uuid4()))
                SubversionDumper.dump_revprops(fp,
                                               revision=0,
                                               date=Date(rfc3339(time.time())))
                baton.twirl()
                revision = 0
                for i in selection:
                    event = self.repo.events[i]
                    # Passthroughs are lost; there are no equivalents
                    # in Subversion's ontology.
                    if not isinstance(event, Commit):
                        continue
                    revision += 1
                    self.mark_to_revision[event.mark] = revision
                    # We must treat the gitspace committer attribute
                    # as the author: gitspace author information is
                    # lost.  So is everything but the local part of
                    # the committer name.
                    backlinks = [self.mark_to_revision[mark]
                                 for mark in event.parent_marks()]
                    SubversionDumper.dump_revprops(fp, revision,
                                                   log=event.comment,
                                                   author=event.committer.email.split(b"@")[0],
                                                   date=event.committer.date,
                                                   parents=backlinks)
                    for fileop in event.operations():
                        if fileop.op == b"D":
                            if fileop.path.endswith(b".gitignore"):
                                svnpath = self.svnize(event.head(), fileop.path)
                                self.pathmap[svnpath].props[b"svn:ignore"] = b""
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind=b"dir",
                                          action=b"change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filedelete(fp, event.head(), fileop.path)
                        elif fileop.op == b"M":
                            if fileop.path.endswith(b".gitignore"):
                                svnpath = self.svnize(event.head(),
                                                      os.path.dirname(fileop.path))
                                blob = self.repo.objfind(fileop.ref)
                                if svnpath not in self.pathmap:
                                    self.pathmap[svnpath] = SubversionDumper.FlowState(revision)
                                self.pathmap[svnpath].props[b"svn:ignore"] = blob.get_content()
                                SubversionDumper.dump_node(fp,
                                          path=os.path.dirname(svnpath),
                                          kind=b"dir",
                                          action=b"change",
                                          props = self.pathmap[svnpath].props)
                            else:
                                self.filemodify(fp,
                                                revision,
                                                event.head(),
                                                fileop.mode,
                                                fileop.ref,
                                                fileop.path,
                                                event.parents())
                        elif fileop.op == b"R":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                            self.filedelete(fp, event.branch, fileop.source)
                        elif fileop.op == b"C":
                            self.filecopy(fp,
                                          revision,
                                          event.head(),
                                          fileop.source,
                                          fileop.target)
                        elif fileop.op == b"deleteall":
                            branchdir = self.svnbranch(event.head())
                            # Here again the object is mutated, so a copy list must be used.
                            for path in self.pathmap.keys():
                                if path.startswith(branchdir + os.sep):
                                    del self.pathmap[path]
                            fp.write(b"Node-path: %s\n" % branchdir)
                            fp.write(b"Node-action: delete\n\n\n")
                        else:
                            raise Fatal(b"unsupported fileop type %s." \
                                        % fileop.op)
                    # Turn any annotated tag pointing at this commit into
                    # a directory copy.
                    for tag in tags:
                        if tag.target is event:
                            revision += 1
                            self.make_tag(fp,
                                          revision,
                                          event.head(),
                                          name=tag.name,
                                          log=tag.comment,
                                          author=tag.tagger)
                            break
                    else:
                        # Preserve lightweight tags, too.  Ugh, O(n**2).
                        if event.has_children():
                            for child in event.children():
                                if child.branch == event.branch:
                                    break
                            else:
                                revision += 1
                                self.make_tag(fp,
                                              revision,
                                              event.head(),
                                              name=os.path.basename(event.branch),
                                              log=b"",
                                              author=event.committer)
                    fp.flush()
            except IOError as e:
                raise Fatal(b"export error: %s" % e)

# Generic repository-manipulation code begins here

class Repository:
    "Generic repository object."
    def __init__(self, name=None):
        self.name = name
        self.readtime = time.time()
        self.vcs = None
        self.stronghint = False
        self.hintlist = []
        self.sourcedir = None
        self.seekstream = None
        self.events = []    # A list of the events encountered, in order
        self._commits = None
        self._mark_to_index = {}
        self._mark_to_object = {}
        self._namecache = {}
        self.preserve_set = set()
        self.case_coverage = set()
        self.basedir = os.getcwd()
        self.uuid = None
        self.write_legacy = False
        self.dollar_map = {}        # From dollar cookies in files
        self.legacy_map = {}    # From anything that doesn't survive rebuild
        self.legacy_count = None
        self.timings = []
        self.assignments = {}
        self._has_manifests = False
        self.inlines = 0
        self.uniqueness = None
    def cleanup(self):
        "Release blob files associated with this repo."
        nuke(self.subdir(), b"reposurgeon: cleaning up %s" % self.subdir())
    def subdir(self, name=None):
        if name is None:
            name = self.name
        if not name:
            return os.path.join(self.basedir, b".rs" + repr(os.getpid()))
        else:
            return os.path.join(self.basedir, b".rs" + repr(os.getpid())+ "-" + name)
    def makedir(self):
        try:
            if debug_enable(DEBUG_SHUFFLE):
                announce(b"repository fast import creates " + self.subdir())
            target = self.subdir()
            if not os.path.exists(target):
                os.mkdir(target)
        except OSError:
            raise Fatal(b"can't create operating directory")
    def hint(self, clue, strong=False):
        "Hint what the source of this repository might be."
        newhint = not self.hintlist or clue not in self.hintlist
        if newhint and self.stronghint and strong:
            announce("new hint %s conficts with old %s" \
                              % (clue, self.hintlist[-1]))
            return False
        if not self.stronghint:
            if isinstance(clue, basestring):
                self.vcs = next(vcstype for vcstype in vcstypes if vcstype.name == clue)
            if newhint:
                self.hintlist.append(clue)
        notify = newhint and not self.stronghint
        self.stronghint = self.stronghint or strong
        return notify
    def size(self):
        "Return the size of this import stream, for statistics display."
        return sum(len(str(e)) for e in self.events)
    def branchset(self):
        "Return a set of all branchnames appearing in this repo."
        branches = set()
        for e in self.events:
            if isinstance(e, Reset) and e.committish is not None:
                branches.add(e.ref)
            elif isinstance(e, Commit):
                branches.add(e.branch)
        return branches
    def branchmap(self):
        "Return a map of branchnames to terminal marks in this repo."
        brmap = {}
        for e in self.events:
            if isinstance(e, Reset):
                if e.committish is None:
                    brmap.pop(e.ref, None)
                else:
                    brmap[e.ref] = e.committish
            elif isinstance(e, Commit):
                brmap[e.branch] = e.mark
        return brmap
    def index(self, obj):
        "Index of the specified object."
        try:
            mark = obj.mark
        except AttributeError:
            for (ind, event) in enumerate(self.events):
                if event == obj: return ind
        else:
            ind = self.find(mark)
            if ind is not None: return ind
        raise Fatal(b"internal error: <%s> not matched "
                    b"in repository %s" % (obj.legacy_id, self.name))
    def find(self, mark):
        "Find an object index by mark"
        if not self._mark_to_index:
            for (ind, event) in enumerate(self.events):
                if hasattr(event, b"mark"):
                    self._mark_to_index[event.mark] = ind
        return self._mark_to_index.get(mark)
    def objfind(self, mark):
        "Find an object by mark"
        if not self._mark_to_object:
            for event in self.events:
                if hasattr(event, b"mark"):
                    self._mark_to_object[event.mark] = event
        return self._mark_to_object.get(mark)
    def all(self):
        "Return a set that selects the entire repository."
        return range(len(self.events))
    def __build_namecache(self):
        "Avoid repeated O(n**2) lookups."
        self._namecache = {}
        commitcount = 0
        for (i, event) in enumerate(self.events):
            if isinstance(event, Commit):
                commitcount += 1
                self._namecache["#%d" % commitcount] = [i]
            if isinstance(event, Tag):
                self._namecache[event.name] = [i]
            if hasattr(event, b"legacy_id") and event.legacy_id:
                self._namecache[event.legacy_id] = [i]
            if hasattr(event, b"committer"):
                key = event.committer.action_stamp()
                if key not in self._namecache:
                    self._namecache[key] = [i]
                elif i not in self._namecache[key]:
                    self._namecache[key].append(i)
            if hasattr(event, b"author"):
                key = event.author.action_stamp()
                if key not in self._namecache:
                    self._namecache[key] = [i]
                elif i not in self._namecache[key]:
                    self._namecache[key].append(i)
    def named(self, ref):
        "Resolve named reference in the context of this repository."
        selection = set([])
        # For matches that require iterating across the entire event
        # sequence, building an entire name lookup table is not much
        # more expensive in time than doing a single lookup. Avoid
        # lots of O(n**2) searches by building a lookup cache, at the
        # expense of increased working set for the hash table.
        if not self._namecache:
            self.__build_namecache()
        if ref in self._namecache:
            return self._namecache[ref]
        # No hit in the cache? Then search branches.
        for symbol in sorted(self.branchset(),
                             key=len, reverse=True): # longest name first
            if ref == os.path.basename(symbol):
                loc = None
                # Find the last commit with this branchname
                for (i, event) in enumerate(self.events):
                    if isinstance(event, Commit):
                        if event.branch == symbol:
                            loc = i
                if loc is None:
                    raise Recoverable(b"branch name %s points to hyperspace" % symbol)
                else:
                    return set([loc])
        # Next, assignments
        lookup = self.assignments.get(ref)
        if lookup:
            return lookup
        # Might be a date or action stamp (though action stamps should
        # be in the name cache already)
        ordinal = None
        m = re.search(b"#[0-9]+$", ref)
        if m:
            try:
                ordinal = int(m.group(0)[1:])
                ref = ref[:-len(m.group(0))]
            except ValueError:
                raise Recoverable(b"ill-formed date")
        bang = ref.find(b'!')
        date_end = len(ref)
        if bang >= 0:
            date_end = min(bang, date_end)
        date = ref[:date_end]
        datematch = None
        try:
            date = Date(date)
            datematch = lambda t: t == date
        except (Fatal, ValueError):
            try:
                date = calendar.timegm(time.strptime(date, b"%Y-%m-%d"))
                datematch = lambda t: t.timestamp >= date and t.timestamp < date + 24*60*60
            except ValueError:
                datematch = None
        email_id = None
        if date is not None and bang > -1:
            email_id = ref[bang+1:]
        matches = []
        if datematch:
            for (ei, event) in enumerate(self.events):
                if hasattr(event, b'committer'):
                    if not datematch(event.committer.date):
                        continue
                    if email_id and event.committer.email != email_id:
                        continue
                    else:
                        matches.append(ei)
                elif hasattr(event, b'tagger'):
                    if not datematch(event.tagger.date):
                        continue
                    elif email_id and event.tagger.email!=email_id:
                        continue
                    else:
                        matches.append(ei)
            if len(matches) < 1:
                raise Recoverable(b"no events match %s" % ref)
            elif len(matches) > 1:
                if ordinal is not None and ordinal <= len(matches):
                    selection.add(matches[ordinal-1])
                else:
                    selection |= set(matches)
            else:
                selection.add(matches[0])
            if selection:
                return selection
        return None
    def invalidate_object_map(self):
        "Force an object-map rebuild on the next lookup."
        self._mark_to_object = {}
    def invalidate_manifests(self):
        if self._has_manifests:
            for c in self.commits():
                c.filemap = None
            self._has_manifests = False
    def read_authormap(self, selection, fp):
        "Read an author-mapping file and apply it to the repo."
        authormap = {}
        try:
            for line in fp:
                line = line.strip()
                if not line:
                    continue
                if line.startswith(b'#'):
                    continue
                (local, netwide) = line.strip().split(b'=')
                try:
                    (name, mail, timezone) = Attribution.parseaddr(netwide.strip())
                except ValueError:
                    raise Fatal(b"malformed author-map line")
                if not mail:
                    raise Fatal(b"can't recognize address in '%s'" % netwide)
                authormap[local.strip().lower()] = (name, mail, timezone)
        except IOError:
            raise Recoverable(b"couldn't open author-map file")
        except ValueError:
            raise Recoverable(b"bad author map syntax: %s" % repr(line))
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                event.committer.remap(authormap)
                for author in event.authors:
                    author.remap(authormap)
            elif isinstance(event, Tag):
                event.tagger.remap(authormap)
    def write_authormap(self, selection, fp):
        "List the identifiers we need."
        contributors = {}
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                contributors[event.committer.name] = event.committer.who()
                for author in event.authors:
                    contributors[author.name] = author.who()
            elif isinstance(event, Tag):
                contributors[event.tagger.name] = event.tagger.who()
        for (name, cid) in contributors.iteritems():
            fp.write(b"%s = %s\n" % (name, cid))
    def read_legacymap(self, fp):
        "Read a legacy-references dump and initialize the repo's legacy map."
        commit_map = {}
        for event in self.commits():
            key = (event.committer.date.timestamp, event.committer.email)
            if key not in commit_map:
                commit_map[key] = []
            commit_map[key].append(event)
        try:
            matched = unmatched = 0
            for line in fp:
                (legacy, stamp) = line.split()
                (timefield, person) = stamp.split(b'!')
                if ':' in person:
                    (person, seq) = person.split(b':')
                    seq = int(seq) - 1
                else:
                    seq = 0
                assert legacy and timefield and person
                when_who = (Date(timefield).timestamp, person)
                if when_who in commit_map:
                    self.legacy_map[legacy] = commit_map[when_who][seq]
                    if legacy.startswith(b"SVN:"):
                        commit_map[when_who][seq].legacy_id = legacy[4:]
                    matched += 1
                else:
                    unmatched += 1
            if verbose >= 1:
                announce(b"%d matched, %d unmatched, %d total"\
                         % (matched, unmatched, matched+unmatched))
            del commit_map
        except ValueError:
            raise Recoverable(b"bad syntax in legacy map.")
    def write_legacymap(self, fp):
        "Dump legacy references."
        for cookie, commit in sorted(
                self.legacy_map.iteritems(),
                key=lambda f: (f[1].committer.date.timestamp, f[0])):
            if "SVN" in cookie and StreamParser.SplitSep in cookie:
                serial = ':' + cookie.split(StreamParser.SplitSep)[1]
            else:
                serial = ''
            # The objfind test is needed in case this repo is an expunge
            # fragment with a copied legacy map.  It's a simple substitute
            # for partitioning the map at expunge time.
            if self.objfind(commit.mark) and commit.legacy_id:
                fp.write(b"%s\t%s!%s%s\n" % (cookie,
                                           commit.committer.date.rfc3339(),
                                           commit.committer.email,
                                           serial))
    def tagify(self, commit, name, target, legend=b"", delete=True):
        "Turn a commit into a tag."
        if debug_enable(DEBUG_EXTRACT):
            commit_id = commit.mark
            if commit.legacy_id:
                commit_id += b" <%s>" % commit.legacy_id
            announce(b"tagifying: %s -> %s" % (commit_id, name))
        if commit.operations():
            raise Fatal(b"Attempting to tagify a commit with fileops.")
        if not commit.comment:
            pref = b""
        else:
            pref = commit.comment + b"\n"
        self.addEvent(Tag(commit.repo,
                          name=name,
                          target=target,
                          tagger=commit.committer,
                          comment=pref + legend))
        if delete: commit.delete(["--tagback"])
    def tagify_empty(self, commits = None,
                           tipdeletes = False,
                           tagify_merges = False,
                           canonicalize = True,
                           name_func = lambda _: None,
                           legend_func = lambda _: b"",
                           gripe = complain
                          ):
        """Turn into tags commits without (meaningful) fileops.
            Arguments: * commits:       None, or an iterable of event indices
                                        tagify_empty() ignores non-commits
                       * tipdeletes:    whether tipdeletes should be tagified
                       * canonicalize:  whether to canonicalize fileops first
                       * name_func:     custom function for choosing the tag
                                        name; if it returns a False value like
                                        None, a default scheme is used
                       * legend_func:   custom function for choosing the legend
                                        of a tag; no fallback is provided. By
                                        default it always returns ""."""
        # Default scheme for tag names
        def default_name(commit):
            if commit.operations():
                branch = commit.branch
                if branch[-1] == os.sep: branch = branch[:-1]
                return b"tipdelete-" + os.path.basename(branch)
            if commit.legacy_id:
                return b"emptycommit-" + commit.legacy_id
            elif commit.mark:
                return b"emptycommit-mark" + commit.mark[1:]
            else:
                return "emptycommit-index" + commit.index()
        # Use a separate loop because delete() invalidates manifests.
        if canonicalize:
            for _, commit in self.iterevents(commits, types=Commit):
                commit.canonicalize()
        # Tagify commits without fileops
        usednames = {e.name for e in self.events if isinstance(e, Tag)}
        if tipdeletes:
            is_tipdelete = lambda c: c.alldeletes(killset={b"deleteall"}) \
                                     and not c.has_children()
        else:
            is_tipdelete = lambda _: False
        deletia = []
        for index, commit in self.iterevents(commits, types=Commit):
            if (not commit.operations()) or is_tipdelete(commit):
                if commit.has_parents():
                    if len(commit.parents()) > 1 and not tagify_merges:
                        continue
                    name = name_func(commit) or default_name(commit)
                    for i in itertools.count():
                        suffix = b".{}".format(i) if i else b""
                        if name + suffix not in usednames: break
                    usednames.add(name + suffix)
                    legend = legend_func(commit)
                    if commit.operations():
                        commit.set_operations([])
                    self.tagify(commit,
                                name + suffix,
                                commit.parents()[0],
                                legend,
                                delete = False)
                    deletia.append(index)
                else:
                    msg = []
                    if commit.legacy_id:
                        msg.append(b"r%s:" % commit.legacy_id)
                    elif commit.mark:
                        msg.append(b"'%s':" % commit.mark)
                    msg.append(b"deleting parentless")
                    if commit.operations():
                        msg.append(b"tip delete of %s." % commit.branch)
                    else:
                        msg.append(b"zero-op commit on %s." % commit.branch)
                    gripe(b" ".join(msg))
                    deletia.append(index)
        self.delete(deletia, [b"--tagback", "--tagify"])
    def fast_import(self, fp, options, progress=False):
        "Read a stream file and use it to populate the repo."
        StreamParser(self).fast_import(fp, options, progress)
        self.readtime = time.time()
    def parse_dollar_cookies(self):
        "Extract info about legacy references from CVS/SVN header cookies."
        if self.dollar_map:
            return
        # The goal here is to throw away CVS and Subversion header
        # information still fossilized into $Id$ and $Subversion$
        # headers after conversion to a later version. For each
        # cookie, all but the earliest blob containing it has it
        # as a fossil which should be removed.  Then, the earliest
        # commit referencing that blob gets a legacy property set;
        # later references will be branching artifacts.
        seen = set()
        for event in self.events:
            if isinstance(event, Blob) and event.cookie:
                if event.cookie in seen:
                    continue
                else:
                    # The first commit immediately after this blob
                    for ei in range(self.find(event.mark), len(self.events)):
                        if isinstance(self.events[ei], Commit):
                            commit = self.events[ei]
                            break
                    seen.add(event.cookie)
                    if "legacy" in commit.properties:
                        complain(b"legacy property of %s overwritten" \
                                 % commit.mark)
                    if isinstance(event.cookie, str):
                        svnkey = b"SVN:" + event.cookie
                        self.dollar_map[svnkey] = commit
                    else:
                        (basename, cvsref) = event.cookie
                        for fileop in commit.operations():
                            if fileop.op == 'M' and fileop.ref == event.mark:
                                if not os.path.basename(fileop.path).endswith(basename):
                                    # Usually the harmless result of a
                                    # file move or copy that cvs2svn or
                                    # git-svn didn't pick up on.
                                    complain(b"mismatched CVS header path '%s' in %s vs '%s' in %s"
                                             % (fileop.path, commit.mark, basename, event.mark))
                                cvskey = b"CVS:%s:%s" % (fileop.path, cvsref)
                                self.dollar_map[cvskey] = commit
    def check_uniqueness(self, verbosely, announcer=announce):
        "Audit the repository for uniqueness properties."
        self.uniqueness = None
        timecheck = {}
        time_collisions = {}
        for event in self.commits():
            when = event.when()
            if when in timecheck:
                if when not in time_collisions:
                    time_collisions[when] = [timecheck[when]]
                time_collisions[when].append(event)
            timecheck[when] = event
        if not time_collisions:
            self.uniqueness = "committer_date"
            if verbosely:
                announcer("All commit times in this repository are unique.")
            return
        announcer("These timestamps have multiple commits: %s" \
                 % " ".join(map(rfc3339, time_collisions.keys())))
        stamp_collisions = set()
        for clique in time_collisions.values():
            stampcheck = {}
            for event in clique:
                if event.action_stamp() in stampcheck:
                    stamp_collisions.add(stampcheck[event.action_stamp()])
                    stamp_collisions.add(event.mark)
                stampcheck[event.action_stamp()] = event.mark
        if not stamp_collisions:
            self.uniqueness = "committer_stamp"
            announcer("All commit stamps in this repository are unique.")
            return
        announcer("These marks are in stamp collisions: %s" \
                  % " ".join(stamp_collisions))
    def export_style(self):
        "How should we tune the export dump format?"
        if self.vcs:
            return self.vcs.styleflags
        else:
            # Default to git style
            return (b"nl-after-commit",)
    def fast_export(self, selection, fp, options, target=None, progress=False):
        "Dump the repo object in Subversion dump or fast-export format."
        if target and target.name == b"svn":
            SubversionDumper(self).dump(selection, fp, progress)
            return
        if selection != self.all():
            implied = set(selection)
            for ei in selection:
                event = self.events[ei]
                if isinstance(event, Commit):
                    for fileop in event.operations():
                        if fileop.op == b'M':
                            if fileop.ref != b"inline":
                                implied.add(self.find(fileop.ref))
                    for tag in event.attachments:
                        implied.add(self.find(tag.committish))
            selection = list(implied)
            selection.sort()
        with Baton(b"reposurgeon: exporting", enable=progress) as baton:
            try:
                realized = {}
                selection_marks = [self.events[i].mark for i in selection if hasattr(self.events[i], b"mark")]
                for ei in selection:
                    baton.twirl()
                    event = self.events[ei]
                    if debug_enable(DEBUG_UNITE):
                        if hasattr(event, b"mark"):
                            announce(b"writing %d %s %s" % (ei, event.mark, event.__class__.__name__))
                    fp.write(event.dump(target, options=options, realized=realized, internals=selection_marks))
            except IOError as e:
                raise Fatal(b"export error: %s" % e)
    def preserve(self, filename):
        "Add a path to the preserve set, to be copied back on rebuild."
        if os.path.exists(filename):
            self.preserve_set.add(filename)
        else:
            raise Recoverable(b"%s doesn't exist" % filename)
    def unpreserve(self, filename):
        "Remove a path from the preserve set."
        if filename in self.preserve_set:
            self.preserve_set.remove(filename)
        else:
            raise Recoverable(b"%s doesn't exist" % filename)
    def preservable(self):
        "Return the repo's preserve set."
        return self.preserve_set
    def rename(self, newname):
        "Rename the repo."
        try:
            # Can fail if the target directory exists.
            if debug_enable(DEBUG_SHUFFLE):
                announce(b"repository rename %s->%s calls os.rename(%s, %s)" % (self.name, newname, repr(self.subdir()), repr(self.subdir(newname))))
            os.rename(self.subdir(), self.subdir(newname))
            self.name = newname
        except OSError as e:
            raise Fatal(b"repo rename %s -> %s failed: %s"
                                       % (self.subdir(), self.subdir(newname), e))
    def addEvent(self, event, where=None):
        if where is not None:
            self.events.insert(where, event)
        else:
            self.events.append(event)
        self.declare_sequence_mutation()
    @memoized_iterator(b"_commits")
    def commits(self):
        "Iterate through the repository commit objects."
        return (e for e in self.events if isinstance(e, Commit))
    def filter_assignments(self, f):
        "Filter assignments, warning if any of them goes empty."
        for (name, values) in self.assignments.items():
            newassigns = []
            dc = 0
            for (i, e) in enumerate(self.events):
                if f(e):
                    dc += 1
                elif i in values:
                    newassigns.append(i - dc)
            if values and not newassigns:
                announce(b"sequence modification left %s empty" % name)
            self.assignments[name] = newassigns
    def declare_sequence_mutation(self, warning=None):
        "Mark the repo event sequence sequence modified."
        self._commits = None
        self._mark_to_index = {}
        self._namecache = {}
        if self.assignments and warning:
            self.assignments = {}
            announce(b"assignments invalidated by " + warning)
    def earliest_commit(self):
        "Return the earliest commit."
        return next(self.commits())
    def earliest(self):
        "Return the date of earliest commit."
        return next(self.commits()).committer.date
    def ancestors(self, ei):
        "Return ancestors of an event, in reverse order."
        trail = []
        while True:
            if not self.events[ei].has_parents():
                break
            else:
                efrom = self.find(self.events[ei].parent_marks()[0])
                trail.append(efrom)
                ei = efrom
        return trail
    #
    # Delete machinery begins here
    #
    def ancestor_count(self, event, path):
        "Count modifications of a path in this commit and its ancestors."
        count = 0
        while True:
            for fileop in event.operations():
                if fileop and fileop.op == "M" and fileop.path == path:
                    count += 1
                    break
            # 0, 1, and >1 are the interesting cases
            if count > 1:
                return count
            try:
                event = event.parents()[0]
            except IndexError:
                break
        return count
    def __compose(self, event, left, right):
        "Compose two relevant fileops."
        # Here's what the fields in the return value mean:
        # 0: Was this a modification
        # 1: Op to replace the first with (None means delete)
        # 2: Op to replace the second with (None means delete)
        # 3: If not None, a warning to emit
        # 4: Case number, for coverage analysis
        pair = (left.op, right.op)
        #
        # First op M
        #
        if pair == (b"M", b"M"):
            # Leave these in place, they get handled later.
            return (False, left, right, None, 0)
        # M a + D a -> D a
        # Or, could reduce to nothing if M a was the only modify..
        elif left.op == b"M" and right.op in b"D":
            if self.ancestor_count(event, left.path) == 1:
                return (True, None, None, None, 1)
            else:
                return (True, right, None, None, 2)
        elif left.op == b"M" and right.op == b"R":
            # M a + R a b -> R a b M b, so R falls towards start of list
            if left.path == right.source:
                if self.ancestor_count(event, left.path) == 1:
                    # M a has no ancestors, preceding R can be dropped
                    left.path = right.target
                    return (True, left, None, None, 3)
                else:
                    # M a has ancestors, R is still needed
                    left.path = right.target
                    return (True, right, left, None, 4)
            # M b + R a b can't happen.  If you try to generate this with
            # git mv it throws an error.  An ordinary mv results in D b M a.
            elif left.path == right.target:
                return(True, right, None, b"M followed by R to the M operand?", -1)
        # Correct reduction for this would be M a + C a b -> C a b + M a + M b,
        # that is we'd have to duplicate the modify. We'll leave it in place
        # for now.
        elif left.op == b"M" and right.op == b"C":
            return (False, left, right, None, 5)
        #
        # First op D or deleteall
        #
        # Delete followed by modify undoes delete, since M carries whole files.
        elif pair == (b"D", b"M"):
            return (True, None, right, None, 6)
        # But we have to leave deletealls in place, since they affect right ops
        elif pair == (b"deleteall", b"M"):
            return (False, left, right, None, 7)
        # These cases should be impossible.  But cvs2svn actually generates
        # adjacent deletes into Subversion dumpfiles which turn into (D, D).
        elif left.op == b"deleteall" and right.op != b"M":
            return (False, left, right,
                    "Non-M operation after deleteall?", -1)
        elif left.op == b"D" and right.op == b"D":
            return (True, left, None, None, -2)
        elif left.op == "D" and right.op in (b"R", b"C"):
            if left.path == right.source:
                return (False, left, right,
                        "R or C of %s after deletion?" % left.path, -3)
            else:
                return (False, left, right, None, 8)
        #
        # First op R
        #
        elif pair == (b"R", b"D"):
            if left.target == right.path:
                # Rename followed by delete of target composes to source delete
                right.path = left.source
                return (True, None, right, None, 9)
            else:
                # On rename followed by delete of source discard the delete
                # but user should be warned.
                return (False, left, None,
                        "delete of %s after renaming to %s?" % (right.path, left.source), -4)
        # Rename followed by deleteall shouldn't be possible
        elif pair == (b"R", b"deleteall") and left.target == right.path:
            return (False, None, right,
                    "rename before deleteall not removed?", -5)
        # Leave rename or copy followed by modify alone
        elif pair == (b"R", b"M") or pair == (b"C", b"M"):
            return (False, left, right, None, 10)
        # Compose renames where possible
        elif left.op == b"R" and right.op == b"R":
            if left.target == right.source:
                left.target = right.target
                return (True, left, None, None, 11)
            else:
                return (False, left, right,
                        "R %s %s is inconsistent with following operation" \
                        % (left.source, left.target), -6)
        # We could do R a b + C b c -> C a c + R a b, but why?
        if left.op == b"R" and right.op == b"C":
            return (False, left, right, None, 12)
        #
        # First op C
        #
        elif pair == (b"C", b"D"):
            if left.source == right.path:
                # Copy followed by delete of the source is a rename.
                left.setOp(b"R")
                return (True, left, None, None, 13)
            elif left.target == right.path:
                # This delete undoes the copy
                return (True, None, None, None, 14)
        elif pair == (b"C", b"R"):
            if left.source == right.source:
                # No reduction
                return (False, left, right, None, 15)
            else:
                # Copy followed by a rename of the target reduces to single copy
                if left.target == right.source:
                    left.target = right.target
                    return (True, left, None, None, 16)
        elif pair == (b"C", b"C"):
            # No reduction
            return (False, left, right, None, 17)
        #
        # Case not covered
        #
        raise Fatal(b"can't compose op '%s' and '%s'" % (left, right))
    def canonicalize(self, commit):
        "Canonicalize the list of file operations in this commit."
        coverage = set()
        # Handling deleteall operations is simple
        lastdeleteall = None
        for (i, a) in enumerate(commit.operations()):
            if a.op == "deleteall":
                lastdeleteall = i
        if lastdeleteall is not None:
            if debug_enable(DEBUG_DELETE):
                announce(b"removing all before rightmost deleteall")
            commit.set_operations(commit.operations()[lastdeleteall:])
            commit.invalidate_pathset_cache()
        # Composition in the general case is trickier.
        while True:
            # Keep making passes until nothing mutates
            mutated = False
            for i in range(len(commit.operations())):
                for j in range(i+1, len(commit.operations())):
                    a = commit.operations()[i]
                    b = commit.operations()[j]
                    if a is not None and b is not None and a.relevant(b):
                        (modified, newa, newb, warn, case) = self.__compose(commit, a, b)
                        if debug_enable(DEBUG_DELETE):
                            announce(b"Reduction case %d fired on %s" % (case, (i,j)))
                        if modified:
                            mutated = True
                            commit.operations()[i] = newa
                            commit.operations()[j] = newb
                            if debug_enable(DEBUG_DELETE):
                                announce(b"During canonicalization:")
                                commit.fileop_dump()
                            if warn:
                                complain(warn)
                            coverage.add(case)
            if not mutated:
                break
            commit.set_operations([x for x in commit.operations() if x is not None])
            commit.invalidate_pathset_cache()
        return coverage
    def squash(self, selected, policy):
        "Delete a set of events, or rearrange it forward or backwards."
        if debug_enable(DEBUG_DELETE):
            announce(b"Deletion list is %s" % [x+1 for x in selected])
        for qualifier in policy:
            if qualifier not in [b"--complain",
                                 b"--coalesce",
                                 b"--delete",
                                 b"--pushback",
                                 b"--pushforward",
                                 b"--tagify",
                                 b"--tagback",
                                 b"--tagforward",
                                 b"--quiet"]:
                raise Recoverable(b"no such deletion modifier as " + qualifier)
        # Make sure we do deletions from greatest commit number to least
        selected = sorted(selected, reverse=True)
        dquiet = b"--quiet" in policy
        delete = b"--delete" in policy
        tagify = b"--tagify" in policy
        tagback = b"--tagback" in policy
        tagforward = b"--tagforward" in policy or (not delete and not tagback)
        pushback = b"--pushback" in policy
        pushforward = b"--pushforward" in policy or (not delete and not pushback)
        # Sanity checks
        if not dquiet:
            for ei in selected:
                event = self.events[ei]
                if  isinstance(event, Commit):
                    if delete:
                        speak = b"warning: commit %s to be deleted has " % event.mark
                        if '/' in event.branch and not b'/heads/' in event.branch:
                            complain(speak + b"non-head branch attribute %s" % event.branch)
                        if not event.alldeletes():
                            announce(speak + b"non-delete fileops.")
                    if not delete:
                        if pushback and not event.has_parents():
                            complain(b"warning: "
                                     b"pushback of parentless commit %s" \
                                     % event.mark)
                        if pushforward and not event.has_children():
                            complain(b"warning: "
                                     b"pushforward of childless commit %s" \
                                     % event.mark)
        altered = []
        # Here are the deletions
        for e in self.events:
            e.deletehook = False
        for ei in selected:
            event = self.events[ei]
            if isinstance(event, Blob):
                # Never delete a blob except as a side effect of
                # deleting a commit.
                event.deletehook = False
            elif isinstance(event, (Tag, Reset, Passthrough)):
                event.deletehook = (b"--delete" in policy)
            elif isinstance(event, Commit):
                event.deletehook = True
                # Decide the new target for tags
                filter_only = True
                if tagforward and event.has_children():
                    filter_only = False
                    new_target = event.first_child()
                elif tagback and event.parents():
                    filter_only = False
                    new_target = event.parents()[0]
                # Reparent each child
                for child in list(event.children()):
                    # Insert event's parents in place of event in child's
                    # parent list. We keep existing duplicates in case they
                    # are wanted, but ensure we don't introduce new ones.
                    old_parents = list(child.parents())
                    event_pos = old_parents.index(event)
                    # Start with existing parents before us,
                    # including existing duplicates
                    new_parents = old_parents[:event_pos]
                    # Add our parents, with possible duplicates, but not if
                    # already present before.
                    to_add = [p for p in event.parents() if p not in new_parents]
                    new_parents.extend(to_add)
                    # Avoid duplicates due to event.parents() insertion.
                    new_parents.extend(
                            p
                            for p in itertools.islice(old_parents,
                                                      event_pos+1, None)
                            if p not in to_add)
                    # Prepend a copy of this event's file ops to
                    # all children with the event as their first
                    # parent, and mark each such child as needing
                    # resolution.
                    if pushforward and child.parents()[0] == event:
                        child.set_operations(copy.copy(event.operations()) + child.operations())
                        child.invalidate_pathset_cache()
                        altered.append(child)
                    # Really set the parents to the newly constructed list
                    child.set_parents(new_parents)
                    # If event was the first parent of child yet has no parents
                    # of its own, then child's first parent has changed.
                    # Prepend a deleteall to child's fileops to ensure it
                    # starts with an empty tree (as event does) instead of
                    # inheriting that of its new first parent.
                    if event_pos == 0 and not event.parents():
                        fileop = FileOp(self)
                        fileop.construct(b"deleteall")
                        child.prepend_operation(fileop)
                        child.invalidate_pathset_cache()
                        altered.append(child)
                # We might be trying to hand the event's fileops to its
                # primary parent.
                if pushback and event.has_parents():
                    # Append a copy of this event's file ops to its primary
                    # parent fileop list and mark the parent as needing
                    # resolution.
                    parent = event.parents()[0]
                    parent.set_operations(parent.operations() + copy.copy(event.operations()))
                    parent.invalidate_pathset_cache()
                    altered.append(parent)
                    # We need to ensure all fileop blobs are defined before the
                    # corresponding fileop, in other words ensure that the blobs
                    # appear before the primary parent in the stream.
                    earliest = parent.index()
                    swap_indices = set()
                    for fileop in event.operations():
                        if fileop.op == b'M':
                            blob_index = self.find(fileop.ref)
                            if blob_index > earliest: swap_indices.add(blob_index)
                    if swap_indices:
                        last = max(swap_indices)
                        neworder = itertools.chain(
                                swap_indices, # first take the blobs
                                # then all others
                                itertools.ifilterfalse(swap_indices.__contains__,
                                         range(earliest, last+1)) )
                        self.events[earliest:last+1] = map(
                                self.events.__getitem__, neworder)
                        self.declare_sequence_mutation(b"squash pushback")
                # Move tags and attachments
                if filter_only:
                    for e in event.attachments:
                        e.deletehook = True
                else:
                    if not tagify and event.branch and "/tags/" in event.branch \
                            and new_target.branch != event.branch:
                        # By deleting the commit, we would lose the fact that
                        # it moves its branch (to create a lightweight tag for
                        # instance): replace it by a Reset which will save this
                        # very information. The following loop will take care
                        # of moving the attachment to the new target.
                        reset = Reset(self, ref = event.branch,
                                            target = event)
                        self.events[ei] = reset
                    # use a copy of attachments since it will be mutated
                    for t in list(event.attachments):
                        t.forget()
                        t.remember(self, target=new_target)
                # And forget the deleted event
                event.forget()
        # Preserve assignments
        self.filter_assignments(lambda e: e.deletehook)
        # Do the actual deletions
        self.events = [e for e in self.events if not e.deletehook]
        self.declare_sequence_mutation()
        # Canonicalize all the commits that got ops pushed to them
        if not delete:
            for event in altered:
                if event.deletehook: continue
                if debug_enable(DEBUG_DELETE):
                    announce(b"Before canonicalization:")
                    event.fileop_dump()
                self.case_coverage |= self.canonicalize(event)
                if debug_enable(DEBUG_DELETE):
                    announce(b"After canonicalization:")
                    event.fileop_dump()
                # Now apply policy in the mutiple-M case
                cliques = event.cliques()
                if (b"--coalesce" not in policy and not delete) \
                        or debug_enable(DEBUG_DELETE):
                    for (path, oplist) in cliques.iteritems():
                        if len(oplist) > 1:
                            complain(b"commit %s has multiple Ms for %s"
                                    % (event.mark, path))
                if "--coalesce" in policy:
                    # Only keep last M of each clique, leaving other ops alone
                    event.set_operations( \
                           [op for (i, op) in enumerate(event.operations())
                            if (op.op != b"M") or (i == cliques[op.path][-1])])
                    event.invalidate_pathset_cache()
                if debug_enable(DEBUG_DELETE):
                    announce(b"Commit %d, after applying policy:" % (ei + 1,))
                    event.fileop_dump()
        # Cleanup
        for e in self.events:
            del e.deletehook
        if '--delete' in policy:
            self.gc_blobs()
    def delete(self, selected, policy=None):
        "Delete a set of events."
        policy = policy or []
        self.squash(selected, [b"--delete", b"--quiet"] + policy)
    def gc_blobs(self):
        "Garbage-collect blobs that no longer have references."
        backreferences = collections.Counter()
        for event in self.events:
            if isinstance(event, Commit):
                for fileop in event.operations():
                    if fileop.op == b'M':
                        backreferences[fileop.ref] += 1
        def eligible(e):
            return isinstance(e, Blob) and not backreferences[e.mark]
        self.filter_assignments(eligible)
        self.events = [e for e in self.events if not eligible(e)]
        self.invalidate_manifests()     # Might not be needed
        self.declare_sequence_mutation()
    def __delitem__(self, index):
        # To make Repository a proper container (and please pylint)
        self.squash([index], [b"--delete", b"--quiet", b"--tagback"])
    #
    # Delete machinery ends here
    #
    def front_events(self):
        "Return options, features."
        return [e for e in self.events \
                if isinstance(e, Passthrough) \
                and (e.text.startswith(b"option") or e.text.startswith(b"feature"))]
    def renumber(self, origin=1, baton=None):
        "Renumber the marks in a repo starting from a specified origin."
        markmap = {}
        def remark(m, e):
            try:
                return b":" + repr(markmap[m])
            except KeyError:
                raise Fatal(b"unknown mark %s in %s cannot be renumbered!" % \
                            (m, e.id_me()))
        if baton:
            count = len(self.events)
            baton.startcounter(b" %%%dd of %s" % (len(str(count)), count))
        newcount = 0
        for event in self.events:
            if hasattr(event, b"mark"):
                if event.mark is None:
                    continue
                elif not event.mark.startswith(b":"):
                    raise Fatal(b"field not in mark format")
                else:
                    markmap[event.mark] = origin + newcount
                    newcount += 1
        for event in self.events:
            for fld in (b"mark", b"committish"):
                try:
                    old = getattr(event, fld)
                    if old is not None:
                        new = remark(old, event)
                        if debug_enable(DEBUG_UNITE):
                            announce(b"renumbering %s -> %s in %s.%s" % (old, new,
                                                                        event.__class__.__name__,
                                                                        fld))
                        setattr(event, fld, new)
                except AttributeError:
                    pass
        for commit in self.commits():
            for fileop in commit.operations():
                if fileop.op == b"M" and fileop.ref.startswith(b":"):
                    new = remark(fileop.ref, fileop)
                    if debug_enable(DEBUG_UNITE):
                        announce(b"renumbering %s -> %s in fileop" % (fileop.ref, new))
                    fileop.ref = new
            if baton:
                baton.bumpcounter()
        self.invalidate_object_map()
        self._mark_to_index = {}
        if baton:
            baton.endcounter()
    def uniquify(self, color, persist=None):
        "Disambiguate branches, tags, and marks using the specified label."
        for event in self.events:
            for (objtype, attr) in ((Commit, b"branch"),
                                    (Reset, b"ref"),
                                    (Tag, b"name"),):
                if isinstance(event, objtype):
                    oldname = getattr(event, attr)
                    newname = None
                    if persist is None:
                        # we're not trying to preserve names
                        if objtype == Tag:
                            newname = color + b"-" + oldname
                        else:
                            newname = oldname + b"-" + color
                    elif not oldname in persist:
                        # record name as belonging to this repo
                        persist[oldname] = color
                        continue
                    elif persist.get(oldname) == color:
                        # name belongs here, do nothing
                        continue
                    else:
                        # collision - oldname belongs to a different repo
                        if objtype == Tag:
                            newname = color + b"-" + oldname
                        else:
                            newname = oldname + b"-" + color
                    if newname:
                        setattr(event, attr, newname)
                        if debug_enable(DEBUG_UNITE):
                            announce(b"moving %s -> %s in %s.%s"
                                     % (oldname, newname,
                                        objtype.__name__,
                                        attr))
                        if persist is not None:
                            persist[newname] = color
             # Disambiguate defining marks.
            for fld in (b"mark", b"committish"):
                if hasattr(event, fld):
                    old = getattr(event, fld)
                    if old is None:
                        continue
                    elif not old.startswith(b":"):
                        raise Fatal(b"field not in mark format")
                    else:
                        new = old + b"-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce(b"moving %s -> %s in %s.%s"
                                     % (old, new,
                                        event.__class__.__name__,
                                        fld))
                        setattr(event, fld, new)
            self.invalidate_object_map()
            # Now marks in fileops
            if isinstance(event, Commit):
                for fileop in event.operations():
                    if fileop.op == b"M" and fileop.ref.startswith(b":"):
                        new = fileop.ref + "-" + color
                        if debug_enable(DEBUG_UNITE):
                            announce(b"moving %s -> %s in fileop"
                                     % (fileop.ref, new))
                        fileop.ref = new
        return persist
    def absorb(self, other):
        # Only vcstype, sourcedir, and basedir are not copied here
        self.preserve_set |= other.preserve_set
        self.case_coverage |= other.case_coverage
        # Strip feature events off the front, they have to stay in front.
        while isinstance(other[0], Passthrough):
            lenfront = sum(1 for x in self.events if isinstance(x, Passthrough))
            self.events.insert(lenfront, other.events.pop(0))
        # Merge in the non-feature events and blobs
        self.events += other.events
        self.declare_sequence_mutation(b"absorb")
        # Transplant in fileops, blobs, and other impedimenta
        for event in other:
            if hasattr(event, b"moveto"):
                event.moveto(self)
        other.events = []
        other.cleanup()
        #del other
    def graft(self, graft_repo, graft_point, options):
        "Graft a repo on to this one at a specified point."
        if graft_point is None:
            persist = {}
        else:
            persist = None
            where = self.events[graft_point]
            if not isinstance(where, Commit):
                raise Recoverable(b"%s in %s is not a commit." % \
                                  (where.mark, self.name))
        # Errors aren't recoverable after this
        graft_repo.uniquify(graft_repo.name, persist)
        if graft_point is not None:
            graftroot = graft_repo.earliest_commit()
        self.absorb(graft_repo)
        if graft_point:
            graftroot.add_parent(where.mark)

        if "--prune" in options:
            # Prepend a deleteall. Roots have nothing upline to preserve.
            delop = FileOp(self)
            delop.construct(b"deleteall")
            graftroot.prepend_operation(delop)

        # Resolve all callouts
        for commit in graft_repo.commits():
            for (idx, parent) in enumerate(commit.parents()):
                if Commit.is_callout(parent.mark):
                    attach = self.named(parent.mark)
                    if len(attach) == 0:
                        raise Recoverable(b"no match for %s in %s" \
                                          % (parent.mark, graft_repo.name))
                    elif len(attach) >= 2:
                        raise Recoverable(b"%s is ambiguous in %s" \
                                          % (parent.mark, graft_repo.name))
                    else:
                        commit.remove_parent(parent)
                        newparent = self.events[list(attach)[0]]
                        commit.insert_parent(idx, newparent.mark)
        self.renumber()
    def __last_modification(self, commit, path):
        "Locate the last modification of the specified path before this commit."
        ancestors = commit.parents()
        while ancestors:
            backto = []
            # pylint: disable=useless-else-on-loop
            for ancestor in ancestors:
                # This is potential trouble if the file was renamed
                # down one side of a merge bubble but not the other.
                # Might cause an internal-error message, but no real
                # harm will be done.
                for (i, fileop) in enumerate(ancestor.operations()):
                    if fileop.op == b'R' and fileop.target == path:
                        path = fileop.source
                    elif fileop.op == b'M' and fileop.path == path:
                        return (ancestor, i)
                else:
                    backto += ancestor.parents()
            ancestors = backto
        return None
    def move_to_rename(self):
        "Make rename sequences from matched delete-modify pairs."
        # TODO: Actually use this somewhere...
        rename_count = 0
        # pylint: disable=unpacking-non-sequence
        for commit in self.commits():
            renames = []
            for (d, op) in enumerate(commit.operations()):
                if op.op == b'D':
                    previous = self.__last_modification(commit, op.path)
                    if not previous:
                        raise Recoverable(b"internal error looking for renames of %s" % op.path)
                    else:
                        (ancestor, i) = previous
                        for (m, op2) in enumerate(commit.operations()):
                            if op2.op == b'M' and \
                               ancestor.operations()[i].mode == op2.mode and \
                               ancestor.operations()[i].ref == op2.ref:
                                renames.append((d, m))
                                rename_count += 1
                                break
            for (d, m) in renames:
                commit.operations()[d].source = commit.operations()[d].path
                commit.operations()[d].target = commit.operations()[m].path
                del commit.operations()[d].path
                commit.operations()[d].op = b'R'
                commit.operations().pop(m)
                commit.invalidate_pathset_cache()
        return rename_count
    def path_walk(self, selection, hook=lambda path: path):
        "Apply a hook to all paths, returning the set of modified paths."
        modified = set()
        for ei in selection:
            event = self.events[ei]
            if isinstance(event, Commit):
                for fileop in event.operations():
                    if fileop.op in (b"M", b"D"):
                        newpath = hook(fileop.path)
                        if newpath != fileop.path:
                            modified.add(newpath)
                        fileop.path = newpath
                    elif fileop.op in (b"R", b"C"):
                        newpath = hook(fileop.source)
                        if newpath != fileop.source:
                            modified.add(newpath)
                        fileop.source = newpath
                        newpath = hook(fileop.target)
                        if newpath != fileop.target:
                            modified.add(newpath)
                        fileop.target = newpath
                event.invalidate_pathset_cache()
        return sorted(modified)
    def split_commit(self, where, splitfunc):
        event = self.events[where]
        # Fileop split happens here
        (fileops, fileops2) = splitfunc(event.operations())
        if fileops and fileops2:
            self.events.insert(where+1, event.clone())
            self.declare_sequence_mutation(b"commit split")
            event2 = self.events[where+1]
            # need a new mark
            assert(event.mark == event2.mark)
            if event.splits is None:
                event.splits = 1
            else:
                event.splits += 1
            event2.set_mark(b"%s.%s" % (event.mark, event.splits))
            self.invalidate_object_map()
            # Fix up parent/child relationships
            for child in list(event.children()):
                child.replace_parent(event, event2)
            event2.set_parents([event])
            # and then finalize the ops
            event2.set_operations(fileops2)
            event2.invalidate_pathset_cache()
            event.set_operations(fileops)
            event.invalidate_pathset_cache()
            return True
        return False
    def split_commit_by_index(self, where, splitpoint):
        return self.split_commit(where,
                                 lambda ops: (ops[splitpoint:],
                                              ops[:splitpoint]))
    def split_commit_by_prefix(self, where, prefix):
        return self.split_commit(where,
                                 lambda ops: ([op for op in ops if not op.path.startswith(prefix)],
                                              [op for op in ops if (op.path or op.target) and
                                                                   (op.path or op.target).startswith(prefix)]))

    # Sequence emulation methods
    def __len__(self):
        return len(self.events)
    def __getitem__(self, i):
        return self.events[i]
    def __setitem__(self, i, v):
        self.events[i] = v
    def iterevents(self, indices=None, types=None):
        "Iterate over events matching conditions."
        if indices is None:
            events = lambda: self.events
            withindices = enumerate(self.events)
        else:
            events = lambda: itertools.imap(self.events.__getitem__, indices)
            withindices = itertools.izip(indices, events())
        if types is None: return withindices
        isinstances = itertools.imap(isinstance,
                                     events(), itertools.repeat(types))
        return itertools.compress(withindices, isinstances)
    def __del__(self):
        if self.seekstream:
            self.seekstream.close()

def read_repo(source, options, preferred):
    "Read a repository using fast-import."
    if debug_enable(DEBUG_SHUFFLE):
        if preferred:
            announce(b"looking for a %s repo..." % preferred.name)
        else:
            announce(b"reposurgeon: looking for any repo at %s..." % \
                     os.path.abspath(source))
    hitcount = 0
    extractor = vcs = None
    for possible in vcstypes:
        if preferred and possible.name != preferred.name:
            continue
        subdir = os.path.join(source, possible.subdirectory)
        if os.path.exists(subdir) and os.path.isdir(subdir) and possible.exporter:
            vcs = possible
            hitcount += 1
    for possible in extractors:
        if preferred and possible.name not in [preferred.name, preferred.name + "-extractor"]:
            continue
        subdir = os.path.join(source, possible.subdirectory)
        if os.path.exists(subdir) and os.path.isdir(subdir):
            if possible.visible or preferred \
                   and possible.name == preferred.name:
                extractor = possible
                hitcount += 1
    if hitcount == 0:
        raise Recoverable(b"couldn't find a repo under %s" % os.path.relpath(source))
    elif hitcount > 1:
        raise Recoverable(b"too many repos under %s" % os.path.relpath(source))
    elif debug_enable(DEBUG_SHUFFLE):
        announce(b"found %s repository" % getattr(vcs or extractor, b"name"))
    repo = Repository()
    repo.sourcedir = source
    if vcs:
        repo.hint(vcs.name, strong=True)
        repo.preserve_set = vcs.preserve
        showprogress = (verbose > 0) and not "export-progress" in repo.export_style()
        context = {b"basename": os.path.basename(repo.sourcedir)}
    try:
        here = os.getcwd()
        os.chdir(repo.sourcedir)
        # We found a matching VCS type
        if vcs:
            if "%(tempfile)s" in repo.vcs.exporter:
                try:
                    (tfdesc, tfname) = tempfile.mkstemp()
                    assert tfdesc > -1    # pacify pylint
                    context[b"tempfile"] = tfname
                    do_or_die(repo.vcs.exporter % context, b"repository export")
                    with open(tfname, b"rb") as tp:
                        repo.fast_import(tp, options, progress=showprogress)
                finally:
                    os.remove(tfname)
                    os.close(tfdesc)
            else:
                with popen_or_die(repo.vcs.exporter % context, b"repository export") as tp:
                    repo.fast_import(tp, options, progress=showprogress)
            if repo.vcs.authormap and os.path.exists(repo.vcs.authormap):
                announce(b"reading author map.")
                with open(repo.vcs.authormap, b"rb") as fp:
                    repo.read_authormap(range(len(repo.events)),fp)
            legacy_map = os.path.join(vcs.subdirectory, b"legacy_map")
            if os.path.exists(legacy_map):
                with open(legacy_map, b"rb") as rfp:
                    repo.read_legacymap(rfp)
            if vcs.lister:
                def fileset(exclude):
                    allfiles = []
                    for root, dirs, files in os.walk(b"."):
                        allfiles += [os.path.join(root, name)[2:] for name in files]
                        for exdir in exclude:
                            if exdir in dirs:
                                dirs.remove(exdir)
                    return set(allfiles)
                with popen_or_die(vcs.lister) as fp:
                    repofiles = set(fp.read().split())
                allfiles = fileset([vcs.subdirectory] + glob.glob(b".rs*"))
                repo.preserve_set |= (allfiles - repofiles)
            # kluge: git-specific hook
            if repo.vcs.name == b"git":
                if os.path.exists(b".git/cvs-revisions"):
                    announce(b"reading cvs-revisions map.")
                    pathrev_to_hash = {}
                    # Pass 1: Get git's path/revision to hash mapping
                    for line in open(b".git/cvs-revisions", b"rb"):
                        (path, rev, hashv) = line.split()
                        pathrev_to_hash[(path, rev)] = hashv
                    # Pass 2: get git's hash to (time,person) mapping
                    hash_to_action = {}
                    stamp_set = set({})
                    with popen_or_die(b"git log --all --format='%H %ct %ce'", b"rb") as fp:
                        for line in fp:
                            (hashv, ctime, cperson) = line.split()
                            stamp = (int(ctime), cperson)
                            if stamp in stamp_set:
                                complain(b"more than one commit matches %s!%s (%s)" \
                                         % (rfc3339(int(ctime)), cperson, hashv))
                                if stamp in hash_to_action:
                                    del hash_to_action[hashv]
                            else:
                                hash_to_action[hashv] = stamp
                                stamp_set.add(stamp)
                        # Pass 3: build a (time,person) to commit mapping
                        action_to_mark = {}
                        for commit in repo.commits():
                            action_to_mark[(commit.committer.date.timestamp, commit.committer.email)] = commit
                        # Pass 4: use it to set commit properties
                        for ((path, rev), value) in pathrev_to_hash.iteritems():
                            if value in hash_to_action:
                                (ctime, cperson) = hash_to_action[value]
                                action_to_mark[(ctime, cperson)].legacy_id = "CVS:%s:%s" % (path, rev)
                        del pathrev_to_hash
                        del hash_to_action
                        del stamp_set
        # We found a matching custom extractor
        if extractor:
            repo.stronghint=True
            streamer = RepoStreamer(extractor)
            streamer.extract(repo, progress=verbose>0)
    finally:
        os.chdir(here)
    return repo

class CriticalRegion:
    "Encapsulate operations to try and make us un-interruptible."
    # This number is magic. Python sets a much higher signal.NSIG
    # value, but under Linux the signal calls start to trigger
    # runtime errors at this value and above.
    NSIG = 32
    def __init__(self):
        self.handlers = None    # Pacifies pylint
    def __enter__(self):
        "Begin critical region."
        if debug_enable(DEBUG_COMMANDS):
            complain(b"critical region begins...")
        # Alas that we lack sigblock support
        self.handlers = [None]*(CriticalRegion.NSIG+1)
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                self.handlers[sig] = signal.signal(sig, signal.SIG_IGN)
    def __exit__(self, extype_unused, value_unused, traceback_unused):
        "End critical region."
        for sig in range(1, CriticalRegion.NSIG):
            if not sig in (signal.SIGKILL, signal.SIGSTOP):
                signal.signal(sig, self.handlers[sig])
        if debug_enable(DEBUG_COMMANDS):
            complain(b"critical region ends.")
        return False

def rebuild_repo(repo, target, options, preferred):
    "Rebuild a repository from the captured state."
    if not target and repo.sourcedir:
        target = repo.sourcedir
    if target:
        target = os.path.abspath(target)
    else:
        raise Recoverable(b"no default destination for rebuild")
    vcs = preferred or repo.vcs
    if not vcs:
        raise Recoverable(b"please prefer a repo type first")
    if not hasattr(vcs, b"exporter") or vcs.importer is None:
        raise Recoverable(b"%s repositories are supported for read only." \
                          % preferred.name)

    if not os.path.join(b"refs", b"heads", b"master") in repo.branchset():
        complain(b"repository has no branch named master. git will have no HEAD commit after the import; consider using the branch command to rename one of your branches to master.")

    # Create a new empty directory to do the rebuild in
    if not os.path.exists(target):
        staging = target
        try:
            os.mkdir(target)
        except OSError:
            raise Recoverable(b"target directory creation failed")
    else:
        staging = target + b"-stage" + str(os.getpid())
        assert(os.path.isabs(target) and os.path.isabs(staging))
        try:
            os.mkdir(staging)
        except OSError:
            raise Recoverable(b"staging directory creation failed")

    # Try the rebuild in the empty staging directory
    here = os.getcwd()
    try:
        os.chdir(staging)
        if vcs.initializer:
            do_or_die(vcs.initializer, b"repository initialization")
        parameters = {b"basename": os.path.basename(target)}
        if "%(tempfile)s" in vcs.importer:
            try:
                (tfdesc, tfname) = tempfile.mkstemp()
                assert tfdesc > -1    # pacify pylint
                with open(tfname, b"wb") as tp:
                    repo.fast_export(range(len(repo)), tp, options, progress=verbose>0, target=preferred)
                parameters[b"tempfile"] = tfname
                do_or_die(vcs.importer % parameters, b"import")
            finally:
                os.remove(tfname)
                os.close(tfdesc)
        else:
            with popen_or_die(vcs.importer % parameters, b"import", mode=b"wb") as tp:
                repo.fast_export(range(len(repo)), tp, options,
                                 target=preferred,
                                 progress=verbose>0)
        if repo.write_legacy:
            try:
                legacyfile = os.path.join(vcs.subdirectory, b"legacy-map")
                with open(legacyfile, b"wb") as wfp:
                    repo.write_legacymap(wfp)
            except IOError:
                raise Recoverable(b"legacy-map file %s could not be written." \
                                  % legacyfile)

        do_or_die(vcs.checkout, b"repository_checkout")
        if verbose:
            announce(b"rebuild is complete.")

        os.chdir(here)
        if staging != target:
            # Rebuild succeeded - make an empty backup directory
            backupcount = 1
            while True:
                savedir = target + (b".~%d~" % backupcount)
                if os.path.exists(savedir):
                    backupcount += 1
                else:
                    break
            assert(os.path.abspath(savedir))
            os.mkdir(savedir)

            # This is a critical region.  Ignore all signals until we're done.
            with CriticalRegion():
                # Move the unmodified repo contents in target to the
                # backup directory.  Then move the staging contents to the
                # target directory.  Finally, restore designated files
                # from backup to target.
                for sub in os.listdir(target):
                    os.rename(os.path.join(target, sub),
                              os.path.join(savedir, sub))
                if verbose:
                    announce(b"repo backed up to %s." % os.path.relpath(savedir))
                for sub in os.listdir(staging):
                    os.rename(os.path.join(staging, sub),
                              os.path.join(target, sub))
                if verbose:
                    announce(b"modified repo moved to %s." % os.path.relpath(target))
            if repo.preserve_set:
                for sub in repo.preserve_set:
                    src = os.path.join(savedir, sub)
                    dst = os.path.join(target, sub)
                    if os.path.exists(src):
                        if os.path.exists(dst) and os.path.isdir(dst):
                            shutil.rmtree(dst)
                        if os.path.isdir(src):
                            shutil.copytree(src, dst)
                        else:
                            shutil.copy2(src, dst)
                if verbose:
                    announce(b"preserved files restored.")
            elif verbose:
                announce(b"no preservations.")
    finally:
        os.chdir(here)
        if staging != target:
            nuke(staging, b"reposurgeon: removing staging directory")

def do_or_die(dcmd, legend=b""):
    "Either execute a command or raise a fatal exception."
    if legend:
        legend = " "  + legend
    if debug_enable(DEBUG_COMMANDS):
        announce(b"executing '%s'%s" % (dcmd, legend))
    try:
        retcode = subprocess.call(dcmd, shell=True)
        if retcode < 0:
            raise Fatal(b"child was terminated by signal %d." % -retcode)
        elif retcode != 0:
            raise Fatal(b"child returned %d." % retcode)
    except (OSError, IOError) as e:
        raise Fatal(b"execution of %s%s failed: %s" % (dcmd, legend, e))

class popen_or_die:
    "Read or write from a subordinate process."
    def __init__(self, command, legend=b"", mode=b"rb"):
        assert mode in (b"rb", b"wb")
        self.command = command
        self.legend = legend
        self.mode = mode
        if self.legend:
            self.legend = b" "  + self.legend
        self.fp = None
    def __enter__(self):
        if debug_enable(DEBUG_COMMANDS):
            if self.mode == "rb":
                announce(b"%s: reading from '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
            else:
                announce(b"%s: writing to '%s'%s" % (rfc3339(time.time()), self.command, self.legend))
        try:
            self.fp = os.popen(self.command, self.mode)
            return self.fp
        except (OSError, IOError) as oe:
            raise Fatal(b"execution of %s%s failed: %s" \
                                 % (self.command, self.legend, oe))
    def __exit__(self, extype, value, traceback_unused):
        if extype:
            if verbose:
                complain(b"fatal exception in popen_or_die.")
        if self.fp.close() is not None and not extype:
            raise Fatal(b"%s%s returned error." % (self.command, self.legend))
        return False

class Recoverable(Exception):
    def __init__(self, msg):
        Exception.__init__(self)
        self.msg = msg

class RepositoryList:
    "A repository list with selection and access by name."
    def __init__(self):
        self.repo = None
        self.repolist = []
        self.cut_index = None
    def chosen(self):
        return self.repo
    def choose(self, repo):
        self.repo = repo
    def unchoose(self):
        self.repo = None
    def reponames(self):
        "Return a list of the names of all repositories."
        return [r.name for r in self.repolist]
    def uniquify(self, name):
        "Uniquify a repo name in the repo list."
        if name.endswith(b".fi"):
            name = name[:-3]
        elif name.endswith(b".svn"):
            name = name[:-4]
        if name not in self.reponames():
            return name
        else:
            # repo "foo" is #1
            seq = 2
            while name + str(seq) in self.reponames():
                seq += 1
            return name + str(seq)
    def repo_by_name(self, name):
        "Retrieve a repo by name."
        try:
            return self.repolist[self.reponames().index(name)]
        except ValueError:
            raise Recoverable("no repository named %s is loaded." % name)
    def remove_by_name(self, name):
        "Remove a repo by name."
        if self.repo and self.repo.name == name:
            self.unchoose()
        self.repolist.pop(self.reponames().index(name))
    def cut_conflict(self, early, late):
        "Apply a graph-coloring algorithm to see if the repo can be split here."
        self.cut_index = late.parent_marks().index(early.mark)
        late.remove_parent(early)
        def do_color(commit, color):
            commit.color = color
            for fileop in commit.operations():
                if fileop.op == b"M" and fileop.ref != b"inline":
                    blob = self.repo.find(fileop.ref)
                    assert isinstance(self.repo[blob], Blob)
                    self.repo[blob].colors.append(color)
        do_color(early, b"early")
        do_color(late, b"late")
        conflict = False
        keepgoing = True
        while keepgoing and not conflict:
            keepgoing = False
            for event in self.repo.commits():
                if event.color:
                    for neighbor in itertools.chain(event.parents(), event.children()):
                        if neighbor.color == None:
                            do_color(neighbor, event.color)
                            keepgoing = True
                            break
                        elif neighbor.color != event.color:
                            conflict = True
                            break
        return conflict
    def cut_clear(self, early, late):
        "Undo a cut operation and clear all colors."
        late.insert_parent(self.cut_index, early.mark)
        for event in self.repo:
            if hasattr(event, b"color"):
                event.color = None
            if hasattr(event, b"colors"):
                event.colors = []
    def cut(self, early, late):
        "Attempt to topologically cut the selected repo."
        if self.cut_conflict(early, late):
            self.cut_clear(early, late)
            return False
        # Repo can be split, so we need to color tags
        for t in self.repo.events:
            if isinstance(t, Tag):
                for c in self.repo.events:
                    if isinstance(c, Commit):
                        if c is t.target:
                            t.color = c.color
        # Front events go with early segment, they'll be copied to late one.
        for event in self.repo.front_events():
            event.color = "early"
        assert all(hasattr(x, b"color") or hasattr(x, b"colors") or isinstance(x, Reset) for x in self.repo)
        # Resets are tricky.  One may have both colors.
        # Blobs can have both colors too, through references in
        # commits on both sides of the cut, but we took care
        # of that earlier.
        trackbranches = {b"early": set(), b"late": set()}
        for commit in self.repo.commits():
            if commit.color is None:
                complain(b"%s is uncolored!" % commit.mark)
            else:
                trackbranches[commit.color].add(commit.branch)
        # Now it's time to do the actual partitioning
        early = Repository(self.repo.name + b"-early")
        os.mkdir(early.subdir())
        late = Repository(self.repo.name + b"-late")
        os.mkdir(late.subdir())
        for event in self.repo:
            if isinstance(event, Reset):
                if event.ref in trackbranches[b"early"]:
                    early.addEvent(copy.copy(event))
                if event.ref in trackbranches[b"late"]:
                    late.addEvent(copy.copy(event))
            elif isinstance(event, Blob):
                if "early" in event.colors:
                    early.addEvent(event.clone(early))
                if "late" in event.colors:
                    late.addEvent(event.clone(late))
            else:
                if event.color == b"early":
                    if hasattr(event, b"moveto"):
                        event.moveto(early)
                    early.addEvent(event)
                elif event.color == b"late":
                    if hasattr(event, b"moveto"):
                        event.moveto(late)
                    late.addEvent(event)
                else:
                    # TODO: Someday, color passthroughs that aren't fronted.
                    raise Fatal(b"coloring algorithm failed on %s" % event)
        # Options and features may need to be copied to the late fragment.
        late.events = copy.copy(early.front_events()) + late.events
        late.declare_sequence_mutation(b"cut operation")
        # Add the split results to the repo list.
        self.repolist.append(early)
        self.repolist.append(late)
        self.repo.cleanup()
        self.remove_by_name(self.repo.name)
        return True
    def unite(self, factors, options):
        "Unite multiple repos into a union repo."
        factors.sort(key=operator.methodcaller(b"earliest"))
        roots = [x.earliest_commit() for x in factors]
        union = Repository(b"+".join(r.name for r in factors))
        os.mkdir(union.subdir())
        factors.reverse()
        persist = {}
        for factor in factors:
            persist = factor.uniquify(factor.name, persist)
        factors.reverse()
        for factor in factors:
            union.absorb(factor)
            self.remove_by_name(factor.name)
        # Renumber all events
        union.renumber()
        # Sort out the root grafts. The way we used to do this
        # involved sorting the union commits by timestamp, but this
        # fails because in real-world repos timestamp order may not
        # coincide with mark order - leading to "mark not defined"
        # errors from the importer at rebuild time. Instead we graft
        # each root just after the last commit in the dump sequence
        # with a date prior to it.  This method gives less intuitive
        # results, but at least means we never need to reorder
        # commits.
        for root in roots[1:]:
            most_recent = None
            def predicate(event):
                return root.when() > event.when() \
                        or (most_recent and event.when() > most_recent.when())
            # Get last commit such that it and all before satisfy predicate()
            # Never raises IndexError since union.earliest_commit() is root[0]
            # which satisfies predicate() thanks to factors sorting.
            most_recent = collections.deque(
                    itertools.takewhile(predicate, union.commits()),
                    maxlen = 1).pop()
            if most_recent.mark is None:
                # This should never happen either.
                raise Fatal(b"can't link to commit with no mark")
            root.add_parent(most_recent.mark)
            # We may not want files from the ancestral stock to persist
            # in the grafted branch unless they have modify ops in the branch
            # root.
            if "--prune" in options:
                deletes = []
                for path in most_recent.manifest():
                    fileop = FileOp(self)
                    fileop.construct(b"D", path)
                    deletes.append(fileop)
                root.set_operations(deletes + root.operations())
                root.canonicalize()
        # Put the result on the load list
        self.repolist.append(union)
        self.choose(union)
    def expunge(self, selection, matchers):
        "Expunge a set of files from the commits in the selection set."
        def digest(toklist):
            digested = []
            for s in toklist:
                if s.startswith(b'/') and s.endswith(b'/'):
                    digested.append(b"(?:" + s[1:-1] + b")")
                else:
                    digested.append(b"^" + re.escape(s) + b"$")
            return re.compile(b"|".join(digested))
        try:
            # First pass: compute fileop deletions
            alterations = []
            expunge = digest(matchers)
            for ei in selection:
                event = self.repo[ei]
                deletia = []
                if hasattr(event, b"fileops"):
                    for (i, fileop) in enumerate(event.operations()):
                        if debug_enable(DEBUG_DELETE):
                            print(str(fileop))
                        if fileop.op in b"DM":
                            if expunge.search(fileop.path):
                                deletia.append(i)
                        elif fileop.op in b"RC":
                            fileop.sourcedelete = expunge.search(fileop.source)
                            fileop.targetdelete = expunge.search(fileop.target)
                            if fileop.sourcedelete:
                                deletia.append(i)
                                announce(b"following %s of %s to %s" %
                                         (fileop.op,
                                          fileop.source,
                                          fileop.target))
                                if fileop.op == b"R":
                                    try:
                                        matchers.remove(b"^" + fileop.source + b"$")
                                    except ValueError:
                                        pass
                                matchers.append(b"^" + fileop.target + b"$")
                                expunge = digest(matchers)
                            elif fileop.targetdelete:
                                if fileop.op == b"R":
                                    fileop.op = b"D"
                                elif fileop.op == b"C":
                                    deletia.append(i)
                                matchers.append(b"^" + fileop.target + b"$")
                                expunge = digest(matchers)
                alterations.append(deletia)
        except re.error:
            raise Recoverable(b"you confused the regexp processor!")
        # Second pass: perform actual fileop expunges
        expunged = Repository(self.repo.name + b"-expunges")
        expunged.seekstream = self.repo.seekstream
        expunged.makedir()
        for event in self.repo:
            event.deletehook = None
        for (ei, deletia) in zip(selection, alterations):
            if not deletia: continue
            event = self.repo[ei]
            keepers = []
            blobs = []
            for i in deletia:
                fileop = event.operations()[i]
                if fileop.op == b'D':
                    keepers.append(fileop)
                    if verbose:
                        announce(b"at %d, expunging D %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op == b'M':
                    keepers.append(fileop)
                    if fileop.ref != b'inline':
                        bi = self.repo.find(fileop.ref)
                        blob = self.repo[bi]
                        assert(isinstance(blob, Blob))
                        blobs.append(blob)
                    if verbose:
                        announce(b"at %d, expunging M %s" \
                                 % (ei+1, fileop.path))
                elif fileop.op in (b"R", b"C"):
                    assert(fileop.sourcedelete or fileop.targetdelete)
                    if fileop.sourcedelete and fileop.targetdelete:
                        keepers.append(fileop)
            deletia = set(deletia) # To speed up the following
            event.set_operations([op for (i, op) in enumerate(event.operations())
                                  if i not in deletia])
            event.invalidate_pathset_cache()
            # If there are any keeper fileops, hang them them and
            # their blobs on keeps, cloning the commit() for them.
            if keepers:
                newevent = event.clone(expunged)
                newevent.set_operations(keepers)
                newevent.invalidate_pathset_cache()
                for blob in blobs:
                    blob.deletehook = blob.clone(expunged)
                event.deletehook = newevent
        # Build the new repo and hook it into the load list
        expunged.events = copy.copy(self.repo.front_events())
        expunged.declare_sequence_mutation(b"expunge operation")
        expunged_branches = expunged.branchset()
        for event in self.repo:
            if event.deletehook:
                expunged.addEvent(event.deletehook)
                event.deletehook = None
            elif isinstance(event, Reset):
                if event.target is not None:
                    if event.target.deletehook:
                        expunged.addEvent(copy.deepcopy(event))
                elif isinstance(event, Reset) and event.ref in expunged_branches:
                    newreset = copy.copy(event)
                    newreset.repo = expunged
                    expunged.addEvent(newreset)
            elif isinstance(event, Tag) and \
                    event.target is not None and \
                    event.target.deletehook:
                expunged.addEvent(copy.deepcopy(event))
        for event in itertools.chain(self.repo.events, expunged.events):
            if hasattr(event, b"deletehook"):
                delattr(event, b"deletehook")
        expunged_marks = set(event.mark for event in expunged.events if hasattr(event, b"mark"))
        for event in expunged.events:
            if hasattr(event, b"parents"):
                # Parents still are Commits in the non-expunged repository
                # We use set_parent_marks so that the correct parents are
                # searched in the expunged repository.
                event.set_parent_marks(m for m in event.parent_marks()
                                         if m in expunged_marks)
        keeper_marks = set(event.mark for event in self.repo.events if hasattr(event, b"mark"))
        for event in self.repo.events:
            if hasattr(event, b"parents"):
                event.set_parents([e for e in event.parents() if e.mark in keeper_marks])
        backreferences = collections.Counter()
        for event in self.repo.events:
            if isinstance(event, Commit):
                for fileop in event.operations():
                    if fileop.op == b'M':
                        backreferences[fileop.ref] += 1
        # Now remove commits that no longer have fileops, and released blobs.
        # Announce events that will be deleted.
        if debug_enable(DEBUG_DELETE):
            to_delete = [i+1 for i,e in enumerate(self.repo.events)
                    if (isinstance(e, Blob) and not backreferences[e.mark])
                    or (isinstance(e, Commit) and not e.operations())]
            if not to_delete:
                announce(b"deletion set is empty.")
            else:
                announce(b"deleting blobs and empty commits %s" % to_delete)
            del to_delete
        # First delete the blobs.
        self.repo.events = [e for e in self.repo.events
                              if (not isinstance(e, Blob))
                              or backreferences[e.mark]]
        # Then tagify empty commits.
        self.repo.tagify_empty(canonicalize = False)
        # And tell we changed the manifests and the event sequence.
        self.repo.invalidate_manifests()
        self.repo.declare_sequence_mutation(b"expunge cleanup")
        # At last, add the expunged repository to the loaded list.
        self.repolist.append(expunged)

class RepoSurgeon(cmd.Cmd, RepositoryList):
    "Repository surgeon command interpreter."
    OptionFlags = (
        (b"canonicalize", b"""\
    If set, import stream reads and mailbox_in and edit will canonicalize
comments by replacing CR-LF with LF, stripping leading and trailing whitespace,
and then appending a LF.
"""),
        (b"compressblobs", b"""\
    Use compression for on-disk copies of blobs. Accepts an increase
in repository read and write time in order to reduce the amount of
disk space required while editing; this may be useful for large
repositories. No effect if the edit input was a dump stream; in that
case, reposurgeon doesn't make on-disk blob copies at all (it points
into sections of the input stream instead).

"""),
        )
    unclean = re.compile(b"[^\n]*\n[^\n]")
    class LineParse:
        "Parse a command line implementing shell-like syntax."
        def __init__(self, line, capabilities=None):
            self.line = line
            self.capabilities = capabilities or []
            self.stdin = sys.stdin
            self.infile = None
            self.stdout = sys.stdout
            self.outfile = None
            self.redirected = False
            self.options = set([])
            self.closem = []
        def __enter__(self):
            # Input redirection
            m = re.search(r"<\S+", self.line)
            if m:
                if "stdin" not in self.capabilities:
                    raise Recoverable(b"no support for < redirection")
                self.infile = m.group(0)[1:]
                if self.infile and self.infile != b'-':
                    try:
                        self.stdin = open(self.infile, b"r")
                        self.closem.append(self.stdin)
                    except (IOError, OSError):
                        raise Recoverable(b"can't open %s for read" \
                                          % self.infile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
                self.redirected = True
            # Output redirection
            m = re.search(r">>?\S+", self.line)
            if m:
                if "stdout" not in self.capabilities:
                    raise Recoverable(b"no support for > redirection")
                self.outfile = m.group(0)[m.group(0).count('>'):]
                if self.outfile and self.outfile != b'-':
                    if os.path.exists(self.outfile) and not os.path.isfile(self.outfile):
                        raise Recoverable(b"not a plain file")
                    try:
                        if m.group(0).count('>') > 1:
                            mode = b"ab"
                        else:
                            mode = b"wb"
                        self.stdout = open(self.outfile, mode)
                        self.closem.append(self.stdout)
                    except (IOError, OSError):
                        raise Recoverable(b"can't open %s for write" \
                                          % self.outfile)
                self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
                self.redirected = True
            # Options
            while True:
                m = re.search(r"--\S+", self.line)
                if not m:
                    break
                else:
                    self.options.add(m.group(0).strip())
                    self.line = self.line[:m.start(0)] + self.line[m.end(0)+1:]
            # Dash redirection
            if not self.redirected and self.line.strip() == b'-':
                if "stdin" not in self.capabilities and "stdout" not in self.capabilities:
                    raise Recoverable(b"no support for - redirection")
                else:
                    self.line = b""
                    self.redirected = True
            self.line = self.line.strip()
            return self
        def __exit__(self, extype_unused, value_unused, traceback_unused):
            for fp in self.closem:
                fp.close()
        def tokens(self):
            "Return the argument token list after the parse for redirects."
            return self.line.split()
    def __init__(self):
        cmd.Cmd.__init__(self)
        RepositoryList.__init__(self)
        self.use_rawinput = True
        self.echo = 0
        self.prompt = "reposurgeon% "
        self.prompt_format = "reposurgeon%% "
        self.preferred = None
        self.ignorename = None
        self.selection = []
        self.line = b""
        self.history = []
        self.callstack = []
        self.definitions = {}
        self.profile_log = None
        self.capture = None
        self.start_time = time.time()
        for option in dict(RepoSurgeon.OptionFlags):
            global_options[option] = False
        global_options[b'svn_branchify'] = [b'trunk', b'tags/*', b'branches/*', b'*']
        global_options[b'svn_branchify_mapping'] = []
    #
    # Housekeeping hooks.
    #
    def onecmd(self, line):
        "Execute one command, fielding interrupts for recoverable exceptions."
        try:
            cmd.Cmd.onecmd(self, line)
        except Recoverable as e:
            complain(e.msg)
    def postcmd(self, unused, line):
        assert unused is not []   # pacify pylint
        try:
            self.prompt = self.prompt_format % {"chosen":self.chosen() and self.chosen().name}
        except ValueError:
            announce("bad prompt format - remember, literal % must be doubled.")
        if line == "EOF":
            return True
    def emptyline(self):
        pass
    def precmd(self, line):
        "Pre-command hook."
        if self.capture is not None:
            if line.startswith(b"}"):
                self.capture = None
            else:
                self.capture.append(line)
            return b""
        self.history.append(line.rstrip())
        if self.echo:
            sys.stdout.write(line.rstrip()+b"\n")
        self.selection = None
        if line.startswith(b"#"):
            return b""
        m = re.compile(r"\s+#")
        if m:
            line = m.split(line)[0]
        # This is the only place in the implementation that knows
        # whether the syntax is VSO or SVO.
        if self.chosen():
            try:
                line = self.set_selection_set(line)
            except Recoverable as e:
                complain(e.msg)
                line = b""
        return line
    def do_shell(self, line):
        "Execute a shell command."
        sys.stdout.flush()
        sys.stderr.flush()
        if os.system(line):
            raise Recoverable(b"'shell %s' returned error." % line)
    def do_EOF(self, unused):
        "Terminate reposurgeon."
        assert unused is not None   # pacify pylint
        print(b"")
        return True
    def cleanup(self):
        "Tell all the repos we're holding to clean up."
        if debug_enable(DEBUG_SHUFFLE):
            announce(b"interpreter cleanup called.")
        for repo in self.repolist:
            repo.cleanup()
    def selected(self, types=None):
        "Iterate over the selection set."
        return self.chosen().iterevents(indices=self.selection, types=types)
    #
    # The selection-language parsing code starts here.
    #
    def set_selection_set(self, line):
        "Implement object-selection syntax."
        # Returns the line with the selection removed
        self.selection = None
        if not self.chosen():
            return line
        self.line = line
        try:
            if self.chosen().named(self.line):
                self.line = b"<" + self.line + b">"
        except Recoverable:
            pass
        self.selection = list(self.eval_expression(set(self.chosen().all())))
        if self.line.lstrip() == line.lstrip():
            self.selection = None
        else:
            # TODO: We probably want to stop doing this
            self.selection.sort()
        return self.line.lstrip()
    def peek(self):
        return self.line and self.line[0]
    def pop(self):
        if not self.line:
            return ''
        else:
            c = self.line[0]
            self.line = self.line[1:]
            return c
    def eval_expression(self, preselection):
        if debug_enable(DEBUG_LEXER):
            announce("eval_expression(%s)" % self.line)
        self.line = self.line.lstrip()
        value = self.eval_disjunct(preselection)
        c = self.peek()
        while True:
            c = self.peek()
            if c != '?':
                break
            self.pop()
            add_list = []
            remove_list = []
            for ei in value:
                event = self.chosen().events[ei]
                if isinstance(event, Commit):
                    for parent in event.parents():
                        add_list.append(self.chosen().find(parent.mark))
                    for child in event.children():
                        add_list.append(self.chosen().find(child.mark))
                elif isinstance(event, Blob):
                    remove_list.append(ei) # Don't select the blob itself
                    for i in preselection:
                        event2 = self.chosen().events[i]
                        if isinstance(event2, Commit):
                            for fileop in event2.operations():
                                if fileop.op == 'M' and fileop.ref==event.mark:
                                    add_list.append(i)
                elif isinstance(event, (Tag, Reset)):
                    if event.target:
                        add_list.append(event.target.index())
            value |= set(add_list)
            value -= set(remove_list)
        self.line = self.line.lstrip()
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_expression(), left = %s" % (value, repr(self.line)))
        return value
    def eval_disjunct(self, preselection):
        "Evaluate a disjunctive expression (| has lowest precedence)"
        if debug_enable(DEBUG_LEXER):
            announce("eval_disjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        unselected = set(preselection)
        while True:
            conjunct = self.eval_conjunct(unselected)
            if conjunct is None:
                break
            else:
                unselected -= conjunct
            self.line = self.line.lstrip()
            if self.peek() == '|':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_disjunct(), left = %s" % (conjunct, repr(self.line)))
        return preselection - unselected
    def eval_conjunct(self, preselection):
        "Evaluate a conjunctive expression (& has higher precedence)"
        if debug_enable(DEBUG_LEXER):
            announce("eval_conjunct(%s)" % self.line)
        self.line = self.line.lstrip()
        conjunct = set(preselection)
        while True:
            term = self.eval_term(conjunct)
            if term is None:
                break
            else:
                conjunct = conjunct & term
            self.line = self.line.lstrip()
            if self.peek() == '&':
                self.pop()
            else:
                break
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_conjunct(), left = %s" % (conjunct, repr(self.line)))
        return conjunct
    def eval_term(self, preselection):
        if debug_enable(DEBUG_LEXER):
            announce("eval_term(%s)" % self.line)
        self.line = self.line.lstrip()
        if self.peek() == '~':
            self.pop()
            allevents = set(self.chosen().all())
            return allevents - set(self.eval_expression(allevents))
        elif self.peek() == '(':
            self.pop()
            term = self.eval_expression(preselection)
            self.line = self.line.lstrip()
            if self.peek() != ')':
                raise Recoverable("trailing junk on inner expression")
            else:
                self.pop()
        else:
            term = self.eval_visibility(preselection)
            if term is None:
                term = self.eval_polyrange(preselection)
                if term is None:
                    term = self.eval_textsearch(preselection)
                    if term == None:
                        term = self.eval_pathset(preselection)
                        if term == None:
                            term = self.eval_funcall(preselection)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_term(), left = %s" % (term, repr(self.line)))
        return term
    def has_reference(self, event):
        "Does an event contain something that looks like a legacy reference?"
        self.chosen().parse_dollar_cookies()
        if hasattr(event, "comment"):
            text = event.comment
        elif hasattr(event, "text"):
            text = event.text
        else:
            return False
        if self.chosen().vcs is None or not self.chosen().vcs.cookies:
            return False
        for pattern in self.chosen().vcs.cookies:
            if re.search(pattern, text):
                return True
        return False
    def eval_visibility(self, preselection):
        "Parse a visibility spec."
        if debug_enable(DEBUG_LEXER):
            announce("eval_visibility(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == "=":
            visibility = None
        else:
            # Available: AEGJKQSVWXY
            typeletters = {
                "B" : lambda e: isinstance(e, Blob),
                "C" : lambda e: isinstance(e, Commit),
                "T" : lambda e: isinstance(e, Tag),
                "R" : lambda e: isinstance(e, Reset),
                "P" : lambda e: isinstance(e, Passthrough),
                "H" : lambda e: isinstance(e, Commit) and not e.has_children(),
                "O" : lambda e: isinstance(e, Commit) and not e.has_parents(),
                "U" : lambda e: isinstance(e, Commit) and e.has_callouts(),
                "Z" : lambda e: isinstance(e, Commit) and not e.operations(),
                "M" : lambda e: isinstance(e, Commit) and len(e.parents()) > 1,
                "F" : lambda e: isinstance(e, Commit) and len(e.children()) > 1,
                "L" : lambda e: isinstance(e, Commit) and RepoSurgeon.unclean.match(e.comment),
                "I" : lambda e: hasattr(e, b'undecodable') and e.undecodable(),
                "D" : lambda e: hasattr(e, b'alldeletes') and e.alldeletes(),
                "N" : lambda e: self.has_reference(e)
                }

            visible = set()
            self.pop()
            while self.peek() in typeletters:
                c = self.pop()
                if c in typeletters:
                    visible.add(typeletters[c])
            # We need a special check here because these expressions
            # could otherwise run onto the text part of the command.
            if self.peek() not in "()|& ":
                raise Recoverable("garbled type mask at %s" % repr(self.line))
            if debug_enable(DEBUG_LEXER):
                announce("visibility set is %s with %s left" % ([x.__name__ for x in visible], repr(self.line)))
            visibility = set()
            for i in preselection:
                event = self.chosen().events[i]
                if any(predicate(event) for predicate in visible):
                    visibility.add(i)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_visibility(), left = %s" % (visibility, repr(self.line)))
        return visibility
    def eval_polyrange(self, _preselection):
        "Parse a polyrange specification (list of intervals)."
        # preselection is not used since it is perfectly legal to have range
        # bounds be outside of the reduced set.
        if debug_enable(DEBUG_LEXER):
            announce("eval_polyrange(%s)" % self.line)
        self.line = self.line.lstrip()
        polyrange_initials = (b":",b"0",b"1",b"2",b"3",b"4",b"5",b"6",b"7",b"8",b"9","$", "<")
        if not self.peek() in polyrange_initials:
            polyrange = None
        # Avoid having an input redirect mistaken for the start of a literal.
        # This might break if a command can ever have both input and output
        # redirects.
        elif self.peek() == b"<" and b">" not in self.line:
            polyrange = None
        else:
            selection = []
            while self.peek() in polyrange_initials + (b".", b","):
                # First, literal command numbers (1-origin)
                match = re.match(b"[0-9]+", self.line)
                if match:
                    number = match.group()
                    selection.append(int(number)-1)
                    self.line = self.line[len(number):]
                    continue
                # Next, mark references
                match = re.match(b":[0-9]+", self.line)
                if match:
                    markref = match.group()
                    self.line = self.line[len(markref):]
                    for (i, event) in enumerate(self.chosen()):
                        if hasattr(event, b"mark") and event.mark == markref:
                            selection.append(i)
                            break
                        elif hasattr(event, "committish") and event.committish == markref:
                            selection.append(i)
                            break
                    else:
                        raise Recoverable("mark %s not found." % markref)
                    continue
                elif self.peek() == b':':
                    raise Recoverable("malformed mark")
                # $ means last commit, a la ed(1).
                if self.peek() == b"$":
                    selection.append(len(self.chosen())-1)
                    self.pop()
                    continue
                # Comma just delimits a location spec
                if self.peek() == b",":
                    self.pop()
                    continue
                # Following ".." means a span
                if self.line[:2] == b"..":
                    if selection:
                        selection.append(b"..")
                        self.line = self.line[2:]
                        continue
                    else:
                        raise Recoverable("start of span is missing")
                if self.peek() == b"<":
                    self.pop()
                    closer = self.line.find(b'>')
                    if closer == -1:
                        raise Recoverable("reference improperly terminated. '%s'" % self.line)
                    ref = self.line[:closer]
                    self.line = self.line[closer+1:]
                    lookup = self.chosen().named(ref)
                    if lookup:
                        # Choose to include *all* commits matching the date.
                        # Alas, results in unfortunate behavior when a date
                        # with multiple commits ends a span.
                        selection += list(lookup)
                    else:
                        raise Recoverable("couldn't match a name at <%s>" % ref)
            if debug_enable(DEBUG_LEXER):
                announce("location list is %s with %s left" % (selection, repr(self.line)))
            # Resolve spans
            resolved = []
            spanning = last = 0
            for elt in selection:
                if elt == '..':
                    spanning = True
                else:
                    if spanning:
                        resolved.extend(range(last+1, elt+1))
                        spanning = False
                    else:
                        resolved.append(elt)
                    last = elt
            selection = resolved
            if debug_enable(DEBUG_LEXER):
                announce(b"resolved list is %s with %s left" % (selection, repr(self.line)))
            # Sanity checks
            if spanning:
                raise Recoverable(b"incomplete range expression.")
            for elt in selection:
                if elt < 0 or elt > len(self.chosen())-1:
                    raise Recoverable("event number %s out of range" % (elt+1))
            polyrange = set(selection)
        if debug_enable(DEBUG_LEXER):
            announce("%s <- eval_polyrange(), left = %s" % (polyrange, repr(self.line)))
        return polyrange
    def eval_textsearch(self, preselection):
        "Parse a text search specification."
        if debug_enable(DEBUG_LEXER):
            announce("eval_textsearch(%s)" % self.line)
        self.line = self.line.lstrip()
        if not self.peek() == b'/':
            return None
        elif '/' not in self.line[1:]:
            raise Recoverable("malformed text search specifier")
        else:
            assert(self.pop() == b'/')
            endat = self.line.index(b'/')
            try:
                search = re.compile(self.line[:endat]).search
            except re.error:
                raise Recoverable("invalid regular expression")
            self.line = self.line[endat+1:]
            matchers = set()
            searchable_attrs = {b"b":"branch",          # commit
                                b"c":"comment",         # commit or tag
                                b"C":"committer",       # commit
                                b"r":"committish",      # tag or reset
                                b"p":"text",            # passthrough
                                b"t":"tagger",          # tag
                                b"n":"name"             # tag
                                }
            search_in = searchable_attrs.values()
            extractor_lambdas = {
                b"branch": lambda x: x.branch,
                b"comment": lambda x: x.comment,
                b"committer": lambda x: x.committer.who(),
                b"committish": lambda x: x.committish,
                b"text": lambda x: x.text,
                b"tagger": lambda x: x.tagger.who(),
                b"name": lambda x: x.name,
                }
            check_authors = False
            check_blobs = False
            check_branch = False
            if self.line and self.line[0].isalpha():
                search_in = []
                while self.line and (self.line[0] in searchable_attrs.iterkeys() or self.line[0] in (b'a', b'B')):
                    if self.line[0] == b'a':
                        check_authors = True
                    elif self.line[0] == b'B':
                        check_blobs = True
                    elif self.line[0] in searchable_attrs.keys():
                        search_in.append(searchable_attrs[self.line[0]])
                        if self.line[0] == b'b':
                            check_branch = True
                    else:
                        raise Recoverable("unknown textsearch flag")
                    self.line = self.line[1:]
            for i in preselection:
                e = self.chosen().events[i]
                if check_branch:
                    if isinstance(e, Tag):
                        e = e.target
                    elif isinstance(e, Blob):
                        events = self.chosen().events
                        for ci in range(i, len(events)):
                            possible = events[ci]
                            if isinstance(possible, Commit) and possible.references(e.mark):
                                # FIXME: Won't find multiple references
                                e = possible
                                break
                for searchable in search_in:
                    if hasattr(e, searchable):
                        key = extractor_lambdas[searchable](e)
                        if key is not None and search(key):
                            matchers.add(i)
                if check_authors and isinstance(e, Commit):
                    for ai in range(len(e.authors)):
                        if search(str(e.authors[ai])):
                            matchers.add(i)
                            break
                if check_blobs and isinstance(e, Blob) and search(e.get_content()):
                    matchers.add(i)
            if debug_enable(DEBUG_LEXER):
                announce("%s <- eval_textsearch(), left = %s" % (matchers, repr(self.line)))
            return matchers
    def eval_pathset(self, preselection):
        "Resolve a path name to the set of commits that refer to it."
        chosen = self.chosen()
        if self.peek() != "[":
            return None
        self.pop()
        depth = 1
        for (i, c) in enumerate(self.line):
            if c == b'[':
                depth += 1
            elif c == b']':
                depth -= 1
            if depth == 0:
                matcher = self.line[:i]
                self.line = self.line[i+1:]
                break
        else:
            raise Recoverable("malformed path matcher")
        if matcher.startswith(b'/'):
            flags = set()
            while matcher[-1] in ("a", "c", "D", "M", "R", "C", "N"):
                flags.add(matcher[-1])
                matcher = matcher[:-1]
            if matcher[-1] != b'/':
                raise Recoverable("regexp matcher missing trailing /")
            try:
                search = re.compile(matcher[1:-1]).search
            except re.error:
                raise Recoverable("invalid regular expression")
            if "c" in flags:
                return self.eval_pathset_full(search,
                                              preselection,
                                              b"a" in flags)
            all_or_any = all if b"a" in flags else any
            if "a" in flags:
                flags.remove("a")
            if "c" in flags:
                flags.remove("c")
            hits = set([])
            for (i, event) in chosen.iterevents(
                            preselection, types=(Commit, Blob)):
                if all_or_any(itertools.imap(search, event.paths(flags))):
                    hits.add(i)
            return hits
        else:
            return {i for (i, event) in chosen.iterevents(
                                preselection, types=(Commit, Blob))
                      if matcher in event.paths()}
    def eval_pathset_full(self, match_condition,
                                preselection,
                                match_all):
        # Try to match a regex in the trees. For each commit we remember
        # only the part of the tree that matches the regex. In most cases
        # it is a lot less memory and CPU hungry than running regexes on
        # the full commit manifests. In the match_all case we instead
        # select commits that nowhere match the opposite condition.
        match = match_condition
        if match_all:
            match = lambda p: not match_condition(p)
        match_trees = {}
        result = set()
        last_event = max(preselection)
        for (i, event) in self.chosen().iterevents(types=Commit):
            if i > last_event: break
            try:
                parent = event.parents()[0]
            except IndexError:
                tree = PathMap()
            else:
                tree = match_trees[parent.mark].snapshot()
            for fileop in event.operations():
                if fileop.op == b'M' and match(fileop.path):
                    tree[fileop.path] = True
                elif fileop.op in (b'C', b'R') and match(fileop.target):
                    tree[fileop.target] = True
                elif fileop.op == b'D' and match(fileop.path):
                    del tree[fileop.path]
                elif fileop.op == b'R' and match(fileop.source):
                    del tree[fileop.source]
                elif fileop.op == b'deleteall':
                    tree = PathMap()
            match_trees[event.mark] = tree
            if (not tree) == match_all:
                result.add(i)
        return result
    def eval_funcall(self, preselection):
        "Parse and evaluate a function call."
        if self.peek() != b"@":
            return None
        self.pop()
        funname = b""
        while self.peek().isalpha() or self.peek() == b'_':
            funname += self.pop()
        if not funname or self.peek() != '(':
            return None
        # Because the starting "(" is still there, eval_expression()
        # will eat the ending ")" itself through eval_term().
        subarg = self.eval_expression(preselection)
        try:
            func = getattr(self, funname + b"_handler")
        except AttributeError:
            raise Recoverable("no such function as @%s()" % funname)
        return func(subarg)
    def min_handler(self, subarg):
        "Minimum member of a selection set."
        try:
            return set([min(subarg)])
        except ValueError:
            raise Recoverable("cannot take minimum of empty set")
    def max_handler(self, subarg):
        "Maximum member of a selection set."
        try:
            return set([max(subarg)])
        except ValueError:
            raise Recoverable("cannot take maximum of empty set")
    def amp_handler(self, subarg):
        "Amplify - map empty set to empty, nonempty set to all."
        if subarg:
            return set(self.chosen().all())
        else:
            return subarg
    def chn_handler(self, subarg):
        "All children of commits in the selection set."
        return self._accumulate_commits(subarg,
                                        operator.methodcaller("children"),
                                        recurse=False)
    def dsc_handler(self, subarg):
        "All descendants of a selection set, recursively."
        return self._accumulate_commits(subarg,
                                        operator.methodcaller("children"))
    def par_handler(self, subarg):
        "All parents of a selection set."
        return self._accumulate_commits(subarg,
                                        operator.methodcaller("parents"),
                                        recurse=False)
    def anc_handler(self, subarg):
        "All ancestors of a selection set, recursively."
        return self._accumulate_commits(subarg,
                                        operator.methodcaller("parents"))
    def pre_handler(self, subarg):
        "Predecessors function; all events previous to argument set."
        if not subarg or min(subarg) == 0:
            return set()
        else:
            return set(range(0, min(subarg)))
    def suc_handler(self, subarg):
        "Successors function; all events following argument set."
        if not subarg or max(subarg) >= len(self.chosen().events) - 1:
            return set()
        else:
            return set(range(max(subarg)+1, len(self.chosen().events)))
    def _accumulate_commits(self, subarg, operation, recurse=True):
        repo = self.chosen()
        result = set()
        subiter = repo.iterevents(subarg, types=Commit)
        if not recurse:
            for _, commit in subiter:
                result.update(itertools.imap(repo.index, operation(commit)))
            return result
        result |= subarg
        # Populate the queue with selected commits
        queue = collections.deque(itertools.imap(
                            operator.itemgetter(1),
                            subiter))
        # Breadth-first traversal of the graph
        while queue:
            for commit in operation(queue.popleft()):
                ind = repo.index(commit)
                if ind not in result:
                    result.add(ind)
                    queue.append(commit)
        return result

    #
    # Helpers
    #
    def report_select(self, line, method, optargs=()):
        "Generate a repository report on all objects with a specified method."
        if not self.chosen():
            complain("no repo has been chosen.")
            return
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if self.selection is None and parse.line.strip():
                parse.line = self.set_selection_set(parse.line)
            elif self.selection is None:
                self.selection = self.chosen().all()
            for i, event in self.selected():
                if hasattr(event, method):
                    summary = getattr(event, method)(*((parse, i,)+optargs))
                    if summary:
                        parse.stdout.write(summary + b"\n")
    @staticmethod
    def pop_token(line):
        "Grab a whitespace-delimited token from the front of the line."
        tok = b""
        line = line.lstrip()
        while True:
            if not line or line[0].isspace():
                break
            else:
                tok += line[0]
                line = line[1:]
        line = line.lstrip()
        return (tok, line)
    def edit(self, selection, line):
        # Mailboxize and edit the non-blobs in the selection
        # Assumes that self.chosen() and selection are not None
        editor = line.strip() or os.getenv("EDITOR")
        if not editor:
            complain("you have not specified an editor and $EDITOR is not set")
            return
        # Special case: user selected a single blob
        if len(selection) == 1:
            singleton = self.chosen()[selection[0]]
            if isinstance(singleton, Blob):
                def find_successor(event, path):
                    here = []
                    for child in event.children():
                        for fileop in child.operations():
                            if fileop.op == "M" and fileop.path == path:
                                here.append(child.mark)
                        here += find_successor(child, path)
                    return here
                for event in self.chosen().commits():
                    for fileop in event.operations():
                        if fileop.op == 'M' and fileop.ref == singleton.mark:
                            if len(find_successor(event, fileop.path)) > 0:
                                complain("beware: not the last 'M %s' on its branch" % fileop.path)
                            break
                os.system(editor + " " + singleton.materialize())
                return
            # Fall through
        (tfdesc, tfname) = tempfile.mkstemp()
        assert tfdesc > -1    # pacify pylint
        try:
            with open(tfname, "wb") as tfp:
                for i in selection:
                    event = self.chosen()[i]
                    if hasattr(event, "email_out"):
                        tfp.write(event.email_out([], i))
        except IOError:
            raise Recoverable("write of editor tempfile failed")
        if os.system(editor + " " + tfname):
            raise Recoverable("%s returned a failure status" % editor)
        else:
            self.do_mailbox_in(b"<" + tfname)
        # No try/finally here - we want the tempfile to survive on fatal error
        # because it might have megabytes of metadata edits in it.
        os.remove(tfname)
        os.close(tfdesc)

    def data_traverse(self, prompt, hook, attributes, safety):
        "Filter commit metadata (and possibly blobs) through a specified hook."
        blobs = any(isinstance(self.chosen().events[i], Blob)
                    for i in self.selection)
        nonblobs = any(not isinstance(self.chosen().events[i], Blob)
                       for i in self.selection)
        # Try to prevent user from shooting self in foot
        if safety and blobs and nonblobs:
            raise Recoverable("cannot transform blobs and nonblobs in same command")
        # If user is transforming blobs, transform all inlines within the range.
        # This is an expensive step because of the sort; avoid doing it
        # when possible.
        if blobs and self.chosen().inlines > 0:
            for ei in range(self.selection[0], self.selection[-1]):
                event = self.chosen().events[ei]
                if isinstance(event, (Commit, Tag)):
                    for fileop in event.operations():
                        if fileop.inline is not None:
                            self.selection.append(ei)
            self.selection.sort()
        with Baton(prompt=prompt, enable=(verbose == 1)) as baton:
            altered = 0
            for _, event in self.selected():
                if isinstance(event, Tag):
                    if nonblobs:
                        oldcomment = event.comment
                        event.comment = hook(event.comment)
                        anychanged = (oldcomment != event.comment)
                        oldtagger = event.tagger.who()
                        newtagger = hook(oldtagger)
                        if oldtagger != newtagger:
                            newtagger += " " + str(event.tagger.date)
                            event.tagger = Attribution(newtagger)
                            anychanged = True
                        if anychanged:
                            altered += 1
                elif isinstance(event, Commit):
                    if nonblobs:
                        anychanged = False
                        if b"c" in attributes:
                            oldcomment = event.comment
                            event.comment = hook(event.comment)
                            if oldcomment != event.comment:
                                anychanged = True
                        if b"C" in attributes:
                            oldcommitter = event.committer.who()
                            newcommitter = hook(oldcommitter)
                            changed = (oldcommitter != newcommitter)
                            if changed:
                                newcommitter += " " + str(event.committer.date)
                                event.committer = Attribution(newcommitter)
                                anychanged = True
                        if b"a" in attributes:
                            for i in range(len(event.authors)):
                                oldauthor = event.authors[i].who()
                                newauthor = hook(oldauthor)
                                if oldauthor != newauthor:
                                    newauthor += " "+str(event.authors[i].date)
                                    event.authors[i] = Attribution(newauthor)
                                    anychanged = True
                        if anychanged:
                            altered += 1
                    if blobs and isinstance(event, Commit):
                        for fileop in event.operations():
                            if fileop.inline is not None:
                                oldinline = fileop.inline
                                fileop.inline = hook(fileop.inline, event.path)
                                altered += int(fileop.inline != oldinline)
                elif isinstance(event, Blob):
                    content = event.get_content()
                    modified = hook(content, " ".join(event.paths()))
                    if content != modified:
                        event.set_content(modified)
                    altered += (content != modified)
                baton.twirl()
        announce("%d items modified by %s." % (altered, prompt.lower()))

    def help_selection(self):
        print("""
A quick example-centered reference for selection-set syntax.

First, these ways of constructing singleton sets:

123        event numbered 123 (1-origin)
:345       event with mark 345
<456>      commit with legacy-ID 456 (probably a Subversion revsion)
<foo>      the tag named 'foo', or failing that the tip commmit of branch foo

You can select commits and tags by date, or by date and committer:

<2011-05-25>                  all commits and tags with this date
<2011-05-25!esr>              all with this date and committer
<2011-05-25T07:30:37Z>        all commits and tags with this date and time
<2011-05-25T07:30:37Z!esr>    all with this date and time and committer
<2011-05-25T07:30:37Z!esr#2>  event #2 (1-origin) in the above set

More ways to construct event sets:

/foo/      all commits and tags containing the string 'foo' in text or metadata
           suffix letters: a=author, b=branch, c=comment in commit or tag,
                           C=committer, r=committish, p=text, t=tagger, n=name,
                           B=blob content in blobs.
           A 'b' search also finds blobs and tags attached to commits on
           matching branches.
[foo]      all commits and blobs touching the file named 'foo'.
[/bar/]    all commits and blobs touching a file matching the regexp 'bar'. Suffix
           flags: a=all fileops must match other selectors, not just any one;
           c=match against checkout paths, DMRCN=match only against given
           fileop types (no-op when used with 'c').
=C         all commits
=H         all head (branch tip) commits
=T         all tags
=B         all blobs
=R         all resets
=P         all passthroughs
=O         all orphan (parentless) commits
=U         all commits with callouts as parents
=Z         all commits with no fileops
=M         all merge commits
=F         all fork (multiple-child) commits
=L         all commits with unclean multi-line comments
=I         all commits not decodable to UTF-8
=D         all commits in which every fileop is a D or deleteall
=N         all commits and tags matching a cookie (legacy-ID) format.

@min()     create singleton set of the least element in the argument
@max()     create singleton set of the greatest element in the argument

Other special functions are available: do 'help functions; for more. 

You can compose sets as follows:

:123,<foo>     the event marked 123 and the event referenced by 'foo'.
:123..<foo>    the range of events from mark 123 to the reference 'foo'

Sets can be composed with | (union) and & (intersection). | has lower
precedence than &, but set expressions can be grouped with ( ). Postfixing
a ? to a selection expression widens it to include all immediate neighbors
of the selection; you can do this repeatedly for effect. Do set negation with
prefix ~; it has higher precedence than & | but lower than ?.
""")

    def help_syntax(self):
        print(b"""
Commands are distinguished by a command keyword.  Most take a selection set
immediately before it; see 'help selection' for details.  Some
commands take additional modifier arguments after the command keyword.

Most report-generation commands support output redirection. When
arguments for these are parsed, any argument beginning with '>' is
extracted and interpreted as the name of a file to which command
output should be redirected.  Any remaining arguments are available to
the command logic.

Some commands support input redirection. When arguments for these are
parsed, any argument beginning with '<' is extracted and interpreted
as the name of a file from which command output should be taken.  Any
remaining arguments are available to the command logic.
""")

    def help_functions(self):
        print(b"The following special selection functions are available:")
        for attr in sorted(RepoSurgeon.__dict__.keys()):
            if attr.endswith(b"_handler"):
                print(b"@%s()\t%s" % (attr[:-8], getattr(RepoSurgeon, attr).__doc__))
    ##
    ## Command implementation begins here
    ##
    #
    # On-line help and instrumentation
    #
    def help_help(self):
        print(b"Show help for a command. Follow with space and the command name.")
    def help_verbose(self):
        print(b"""
Without an argument, this command requests a report of the verbosity
level.  'verbose 1' enables progress messages, 'verbose 0' disables
them. Higher levels of verbosity are available but intended for
developers only.
""")
    def do_verbose(self, line):
        global verbose
        if line:
            try:
                verbose = int(line)
            except ValueError:
                complain(b"verbosity value must be an integer")
        if not line or verbose:
            announce(b"verbose %d" % verbose)

    def help_quiet(self):
        print(b"""
Without an argument, this command requests a report of the quiet
boolean; with the argument 'on' or 'off' it is changed.  When quiet is
on, time-varying report fields which would otherwise cause spurious
failures in regression testing are suppressed.
""")
    def do_quiet(self, line):
        global quiet
        if line:
            if line == b"on":
                quiet = True
            elif line == b"off":
                quiet = False
        if not line:
            announce(b"quiet %s" % (b"on" if quiet else "off"))

    def help_echo(self):
        print(b"""
Set or clear echoing of commands before processing.
""")
    def do_echo(self, line):
        "Set or clear echoing commands before processing."
        try:
            self.echo = int(line)
        except ValueError:
            announce(b"echo value must be an integer")
        if verbose:
            announce(b"echo %d" % self.echo)

    def help_print(self):
        print(b"""
Print a literal string.
""")
    def do_print(self, line):
        "Print a literal string."
        print(line)

    def help_resolve(self):
        print(b"""
Does nothing but resolve a selection-set expression
and report the resulting event-number set to standard
output. The remainder of the line after the command is used
as a label for the output.

Implemented mainly for regression testing, but may be useful
for exploring the selection-set language.
""")
    def do_resolve(self, line):
        "Display the set of event numbers generated by a selection set."
        if self.selection is None:
            print(b"No selection")
        elif isinstance(self.selection, list):
            if line:
                sys.stdout.write(b"%s: " % line)
            print([x+1 for x in self.selection])
        else:
            complain(b"resolve didn't expect a selection of %s" % self.selection)

    def help_assign(self):
        print(b"""

Compute a leading selection set and assign it to a symbolic name,
which must follow the assign keyword. It is an error to assign to a
name that is already assigned, or to any existing branch name.
Assignments may be cleared by some sequence mutations (though not by
ordinary deletion); you will see a warning when this occurs.

With no selection set and no argument, list all assignments.

Use this to optimize out location and selection computations
that would otherwise be performed repeatedly, e.g. in macro calls.
""")
    def do_assign(self, line):
        repo = self.chosen()
        if not repo:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            if line:
                raise Recoverable(b"No selection")
            else:
                for n, v in repo.assignments.items():
                    announce("%s = %s" % (n, v))
                return
        name = line.strip()
        if name in repo.assignments:
            raise Recoverable(b"%s has already been set" % name)
        elif repo.named(name):
            raise Recoverable(b"%s conflicts with a branch, tag, legacy-ID, or date" % name)
        else:
            repo.assignments[name] = self.selection

    def help_unassign(self):
        print(b"""
Unassign a symbolic name.  Throws an error if the name is not assigned.
""")
    def do_unassign(self, line):
        repo = self.chosen()
        if not repo:
            complain(b"no repo has been chosen.")
            return
        if self.selection is not None:
            raise Recoverable(b"cannot take a selection")
        name = line.strip()
        if name in repo.assignments:
            del repo.assignments[name]
        else:
            raise Recoverable(b"%s has not been set" % name)

    def help_names(self):
        print(b"""
List all known symbolic names of branches and tags. Supports > redirection.
""")
    def do_names(self, line):
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            branches = list(self.chosen().branchset())
            branches.sort()
            for branch in branches:
                parse.stdout.write(b"branch %s\n" % branch)
            for event in self.chosen():
                if isinstance(event, Tag):
                    parse.stdout.write(b"tag    %s\n" % event.name)

    def do_script(self, line):
        "Read and execute commands from a named file."
        if not line:
            complain(b"script requires a file argument")
            return
        try:
            self.callstack.append(line.split())
            with open(self.callstack[-1][0], "r") as scriptfp:
                while True:
                    scriptline = scriptfp.readline()
                    if not scriptline:
                        break
                    # Handle multiline commands
                    while scriptline.endswith(b"\\\n"):
                        scriptline = scriptline[:-2] + scriptfp.readline()
                    # Simulate shell here-document processing
                    if b'<<' not in scriptline:
                        heredoc = None
                    else:
                        (scriptline, terminator) = scriptline.split(b"<<")
                        heredoc = tempfile.NamedTemporaryFile(mode=b"wb",
                                                              delete=False)
                        while True:
                            nextline = scriptfp.readline()
                            if nextline == b'':
                                break
                            elif nextline == terminator:
                                break
                            else:
                                heredoc.write(nextline)
                        heredoc.close()
                        # Note: the command must accept < redirection!
                        scriptline += b"<" + heredoc.name
                    # End of heredoc simulation
                    for i in range(len(self.callstack[-1])):
                        scriptline = scriptline.replace(b'$' + str(i), self.callstack[-1][i])
                    scriptline =  scriptline.replace(b'$$', str(os.getpid()))
                    self.onecmd(self.precmd(scriptline))
                    if heredoc:
                        os.remove(heredoc.name)
            self.callstack.pop()
        except IOError as e:
            complain(b"script failure on '%s': %s" % (line, e))

    def do_history(self, line):
        "Dump your command list from this session so far."
        for line in self.history:
            print(line)

    def do_coverage(self, unused):
        "Display the coverage-case set (developer instrumentation)."
        assert unused is not None   # pacify pylint
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        for e in self.chosen().commits():
            e.fileop_dump()
        sys.stdout.write(b"Case coverage: %s\n" % sorted(self.chosen().case_coverage))

    def help_index(self):
        print(b"""
Display four columns of info on selected objects: their number, their
type, the associate mark (or '-' if no mark) and a summary field
varying by type.  For a branch or tag it's the reference; for a commit
it's the commit branch; for a blob it's the repository path of the
file in the blob.  Supports > redirection.
""")
    def do_index(self, line):
        "Generate a summary listing of objects."
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        # We could do all this logic using report_select() and index() methods
        # in the objects, but that would have two disadvantages.  First, we'd
        # get a default-set computation we don't want.  Second, for this
        # function it's helpful to have the method strings close together so
        # we can maintain columnation.
        if self.selection is None:
            self.selection = [n for n, o1 in enumerate(self.chosen()) if not isinstance(o1, Blob)]
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for i, event in self.selected():
                if isinstance(event, Blob):
                    parse.stdout.write(b"%6d blob   %6s    %s\n" % (i+1, event.mark,b" ".join(event.paths())))
                    continue
                if isinstance(event, Commit):
                    parse.stdout.write(b"%6d commit %6s    %s\n" % (i+1, event.mark or b'-', event.branch))
                    continue
                if isinstance(event, Tag):
                    parse.stdout.write(b"%6d tag    %6s    %4s\n" % (i+1, event.committish, repr(event.name),))
                    continue
                if isinstance(event, Reset):
                    parse.stdout.write(b"%6d branch %6s    %s\n" % (i+1, event.committish or b'-', event.ref))
                    continue
                else:
                    parse.stdout.write(b"?      -      %s\n" % (event,))
    def help_profile(self):
        print(b"""
Enable profiling. Profile statistics are dumped to the path given as argument.
Must be one of the initial command-line arguments, and gathers statistics only
on code executed via '-'.
""")
    def do_profile(self, line):
        "Enable profiling."
        assert line is not None # Pacify pylint
        self.profile_log = line
        announce(b"profiling enabled.")

    def help_timing(self):
        print(b"""
Report phase-timing results from repository analysis.
""")
    def do_timing(self, _line):
        "Report repo-analysis times."
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        total = self.repo.timings[-1][1] - self.repo.timings[0][-1]
        commit_count = sum(1 for _ in self.repo.commits())
        if self.repo.legacy_count is None:
            print(b"        commits: %d" % commit_count)
        else:
            print(b"        commits: %d (from %d)" % (commit_count, self.repo.legacy_count))
        for (i, (phase, _interval)) in enumerate(self.repo.timings):
            if i > 0:
                interval = self.repo.timings[i][1] - self.repo.timings[i-1][1]
                print(b"%15s: %.3f (%2.2f%%)" % (phase,
                                              interval,
                                              (interval * 100)/total))
        print(b"          total: %.3f (%d/sec)" % (total, int((self.repo.legacy_count or commit_count))/total))

    #
    # Information-gathering
    #
    def help_stats(self):
        print(b"""
Report size statistics and import/export method information of the
currently chosen repository. Supports > redirection.
""")
    def do_stats(self, line):
        "Report information on repositories."
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if not parse.line:
                if not self.chosen():
                    complain(b"no repo has been chosen.")
                    return
                parse.line = self.chosen().name
                if parse.line is None:
                    complain(b"no repo has been chosen.")
                    return
            for name in parse.tokens():
                repo = self.repo_by_name(name)
                if repo is None:
                    raise Recoverable(b"no such repo as %s" % name)
                else:
                    def count(otype):
                        return sum(1 for x in repo.events if isinstance(x,otype))
                    parse.stdout.write(b"%s: %.0fK, %d events, %d blobs, %d commits, %d tags, %d resets, %s.\n" % \
                          (repo.name, repo.size() / 1000.0, len(repo),
                           count(Blob), count(Commit), count(Tag), count(Reset),
                           rfc3339(repo.readtime)))
                    if repo.sourcedir:
                        parse.stdout.write(b"  Loaded from %s\n" % repo.sourcedir)
                    #if repo.vcs:
                    #    parse.stdout.write(str(repo.vcs) + b"\n")

    def help_count(self):
        print(b"""
Report a count of items in the selection set. Default set is everything
in the currently-selected repo. Supports > redirection.
""")
    def do_count(self, line):
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        elif self.selection == None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            parse.stdout.write(b"%d\n" % len(self.selection))

    def help_list(self):
        print(b"""
Display commits in a human-friendly format; the first column is raw
event numbers, the second a timestamp in local time. If the repository
has legacy IDs, they will be displayed in the third column. The
leading portion of the comment follows. Supports > redirection.
""")
    def do_list(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, b"lister", (screenwidth(),))

    def help_tip(self):
        print(b"""
Display the branch tip names associated with commits in the selection
set.  These will not necessarily be the same as their branch fields
(which will often be tag names if the repo contains either annotated
or lightweight tags).

If a commit is at a branch tip, its tip is its branch name.  If it has
only one child, its tip is the child's tip.  If it has multiple children,
then if there is a child with a matching branch name its tip is the
child's tip.  Otherwise this function throws a recoverable error.

Supports > redirection.
""")
    def do_tip(self, line):
        "Generate a human-friendly listing of objects."
        self.report_select(line, b"tip", (screenwidth(),))

    def help_tags(self):
        print(b"""
Display tags and resets: three fields, an event number and a type and a name.
Branch tip commits associated with tags are also displayed with the type
field 'commit'. Supports > redirection.
""")
    def do_tags(self, line):
        "Generate a human-friendly listing of tags and resets."
        self.report_select(line, "tags", (screenwidth(),))

    def help_stamp(self):
        print(b"""
Display full action stamps correponding to commits in a select.
The stamp is followed by the first line of the commit message.
Supports > redirection.
""")
    def do_stamp(self,line):
        self.report_select(line, b"stamp", (screenwidth(),))

    def help_sizes(self):
        print(b"""
Print a report on data volume per branch; takes a selection set,
defaulting to all events. The numbers tally the size of uncompressed
blobs, commit and tag comments, and other metadata strings (a blob is
counted each time a commit points at it).  Not an exact measure of
storage size: intended mainly as a way to get information on how to
efficiently partition a repository that has become large enough to be
unwieldy. Supports > redirection.
""")
    def do_sizes(self, line):
        "Report branch relative sizes."
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        sizes = {}
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for _, event in self.selected():
                if isinstance(event, Commit):
                    if event.branch not in sizes:
                        sizes[event.branch] = 0
                    sizes[event.branch] += len(str(event.committer))
                    for author in event.authors:
                        sizes[event.branch] += len(str(author))
                    sizes[event.branch] += len(event.comment)
                    for fileop in event.operations():
                        if fileop.op == "M":
                            sizes[event.branch] += self.repo.objfind(fileop.ref).size
                elif isinstance(event, Tag):
                    commit = event.target
                    if commit.branch not in sizes:
                        sizes[commit.branch] = 0
                    sizes[commit.branch] += len(str(event.tagger))
                    sizes[commit.branch] += len(event.comment)
            total = sum(sizes.itervalues())
            def sz(n, s):
                parse.stdout.write(b"%9d\t%2.2f%%\t%s\n" \
                                   % (n, (n * 100.0) / total, s))
            for key in sorted(sizes.iterkeys()):
                sz(sizes[key], key)
            sz(total, b"")
    def help_lint(self):
        print(b"""
Look for DAG and metadata configurations that may indicate a
problem. Presently checks for: (1) Mid-branch deletes, (2)
disconnected commits, (3) parentless commits, (4) the existance of
multiple roots, (5) committer and author IDs that don't look
well-formed as DVCS IDs, (6) multiple child links with identical
branch labels descending from the same commit, (7) time and
action-stamp collisions.

Supports > redirection.
""")
    def do_lint(self, line):
        "Look for lint in a repo."
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            unmapped = re.compile(b"[^@]*$|[^@]*@" + str(self.chosen().uuid) + "$")
            shortset = set()
            deletealls = set()
            disconnected = set()
            roots = set()
            emptyaddr = set()
            emptyname = set()
            badaddress = set()
            for _, event in self.selected(Commit):
                if event.operations() and event.operations()[0].op == 'deleteall' and event.has_children():
                    deletealls.add(b"on %s at %s" % (event.branch, event.id_me()))
                if not event.has_parents() and not event.has_children():
                    disconnected.add(event.id_me())
                elif not event.has_parents():
                    roots.add(event.id_me())
                if unmapped:
                    for person in [event.committer] + event.authors:
                        if unmapped.match(person.email):
                            shortset.add(person.email)
                if not event.committer.email:
                    emptyaddr.add(event.id_me())
                elif b"@" not in event.committer.email:
                    badaddress.add(event.id_me())
                for author in event.authors:
                    if not author.email:
                        emptyaddr.add(event.id_me())
                    elif b"@" not in author.email:
                        badaddress.add(event.id_me())
                if not event.committer.name:
                    emptyname.add(event.id_me())
                for author in event.authors:
                    if not author.name:
                        emptyname.add(event.id_me())

            if not parse.options or "--deletealls" in parse.options \
                   or "-d" in parse.options:
                for item in deletealls:
                    parse.stdout.write(b"mid-branch delete: %s\n" % item)
            if not parse.options or "--connected" in parse.options \
                   or "-c" in parse.options:
                for item in disconnected:
                    parse.stdout.write(b"disconnected commit: %s\n" % item)
            if not parse.options or "--roots" in parse.options \
                   or "-r" in parse.options:
                if len(roots) > 1:
                    parse.stdout.write(b"multiple root commits: %s\n" % roots)
            if not parse.options or "--names" in parse.options \
                   or "-n" in parse.options:
                for item in shortset:
                    parse.stdout.write(b"unknown shortname: %s\n" % item)
                for item in emptyaddr:
                    parse.stdout.write(b"empty address: %s\n" % item)
                for item in emptyname:
                    parse.stdout.write(b"empty name: %s\n" % item)
                for item in badaddress:
                    parse.stdout.write(b"email address missing @: %s\n" % item)
            if not parse.options or "--uniqueness" in parse.options \
                   or "-u" in parse.options:
                self.chosen().check_uniqueness(True, announcer=lambda s: parse.stdout.write("reposurgeon: " + s + "\n"))
            if "--options" in parse.options or "-?" in parse.options:
                print("""\
--deletealls    -d     report mid-branch deletealls
--connected     -c     report disconnected commits
--roots         -r     report on multiple roots
--attributions  -a     report on anomalies in usernames and attributions
--uniqueness    -u     report on collisions among action stamps
--options       -?     list available options\
""")
    #
    # Housekeeping
    #
    def help_prefer(self):
        print(b"""
Report or set (with argument) the preferred type of repository. With
no arguments, describe capabilities of all supported systems. With
an argument (which must be the name of a supported system) this has
two effects:

First, if there are multiple repositories in a directory you do a read
on, reposurgeon will read the preferred one (otherwise it will
complain that it can't choose among them).

Secondly, this will change reposurgeon's preferred type for output.
This means that you do a write to a directory, it will build a repo of
the preferred type rather than its original type (if it had one).

If no preferred type has been explicitly selected, reading in a
repository (but not a fast-import stream) will implicitly set reposurgeon's
preference to the type of that repository.
""")
    def do_prefer(self, line):
        "Report or select the preferred repository type."
        if not line:
            for vcs in vcstypes:
                print(vcs)
            for option in file_filters:
                print(b"read and write have a --format=%s option that supports %s files."
                      % (option, option.capitalize()))
            if any(ext.visible and not ext.vcstype for ext in extractors):
                print(b"Other systems supported for read only: %s\n" \
                      % " ".join(ext.name for ext in extractors if ext.visible))
        else:
            for repotype in vcstypes + extractors:
                if line.lower() == repotype.name:
                    self.preferred = repotype
                    break
            else:
                complain(b"known types are %s." % " ".join([x.name for x in vcstypes] + [x.name for x in extractors if x.visible]))
        if verbose:
            if not self.preferred:
                print(b"No preferred type has been set.")
            else:
                print(b"%s is the preferred type." % self.preferred.name)

    def help_sourcetype(self):
        print(b"""
Report (with no arguments) or select (with one argument) the current
repository's source type.  This type is normally set at
repository-read time, but may remain unset if the source was a stream
file.

The source type affects the interpretation of legacy IDs (for
purposes of the =N visibility set and the 'references' command) by
controlling the regular expressions used to recognize them. If no
preferred output type has been set, it may also change the output
format of stream files made from the repository.

The repository source type is reliably set when reading a Subversion
stream.  
""")
    def do_sourcetype(self, line):
        "Report or select the current repository's source type."
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        if not line:
            if self.chosen().vcs:
                print("%s: %s" % (repo.name, repo.vcs.name))
            else:
                print("%s: no preferred type." % repo.name)
        else:
            for repotype in vcstypes + extractors:
                if line.lower() == repotype.name:
                    self.chosen().vcs = repotype
                    break
            else:
                complain(b"known types are %s." % " ".join([x.name for x in vcstypes] + [x.name for x in extractors if x.visible]))

    def help_choose(self):
        print(b"""
Choose a named repo on which to operate.  The name of a repo is
normally the basename of the directory or file it was loaded from, but
repos loaded from standard input are 'unnamed'. The program will add
a disambiguating suffix if there have been multiple reads from the
same source.

With no argument, lists the names of the currently stored repositories
and their load times.  The second column is '*' for the currently selected
repository, '-' for others.
""")
    def do_choose(self, line):
        "Choose a named repo on which to operate."
        if self.selection is not None:
            raise Recoverable(b"choose does not take a selection set")
        if not self.repolist:
            if verbose > 0:
                complain(b"no repositories are loaded.")
                return
        self.repolist.sort(key=operator.attrgetter(b"name"))
        if not line:
            for repo in self.repolist:
                status =  b'-'
                if self.chosen() and repo == self.chosen():
                    status = b'*'
                if not quiet:
                    sys.stdout.write(rfc3339(repo.readtime) + " ")
                sys.stdout.write(b"%s %s\n" % (status, repo.name))
        else:
            if line in self.reponames():
                self.choose(self.repo_by_name(line))
                if verbose:
                    self.do_stats(line)
            else:
                complain(b"no such repo as %s" % line)

    def help_drop(self):
        print(b"""
Drop a repo named by the argument from reposurgeon's list, freeing the memory
used for its metadata and deleting on-disk blobs. With no argument, drops the
currently chosen repo.
""")
    def do_drop(self, line):
        "Drop a repo from reposurgeon's list."
        if not self.reponames():
            if verbose:
                complain(b"no repositories are loaded.")
                return
        if self.selection is not None:
            raise Recoverable(b"drop does not take a selection set")
        if not line:
            if not self.chosen():
                complain(b"no repo has been chosen.")
                return
            line = self.chosen().name
        if line in self.reponames():
            if line == self.chosen().name:
                self.unchoose()
            holdrepo = self.repo_by_name(line)
            holdrepo.cleanup()
            self.remove_by_name(line)
            del holdrepo
        else:
            complain(b"no such repo as %s" % line)
        if verbose:
            # Emit listing of remaining repos
            self.do_choose(b'')

    def help_rename(self):
        print(b"""
Rename the currently chosen repo; requires an argument.  Won't do it
if there is already one by the new name.
""")
    def do_rename(self, line):
        "Rename a repository."
        if self.selection is not None:
            raise Recoverable(b"rename does not take a selection set")
        if line in self.reponames():
            complain(b"there is already a repo named %s." % line)
        elif not self.chosen():
            complain(b"no repository is currently chosen.")
        else:
            self.chosen().rename(line)

    def help_preserve(self):
        print(b"""
Add (presumably untracked) files or directories to the repo's list of
paths to be restored from the backup directory after a rebuild. Each
argument, if any, is interpreted as a pathname.  The current preserve
list is displayed afterwards.
""")
    def do_preserve(self, line):
        "Add files and subdirectories to the preserve set."
        if self.selection is not None:
            raise Recoverable(b"preserve does not take a selection set")
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        for filename in line.split():
            self.chosen().preserve(filename)
        announce(b"preserving %s." % list(self.chosen().preservable()))

    def help_unpreserve(self):
        print(b"""
Remove (presumably untracked) files or directories to the repo's list
of paths to be restored from the backup directory after a
rebuild. Each argument, if any, is interpreted as a pathname.  The
current preserve list is displayed afterwards.
""")
    def do_unpreserve(self, line):
        "Remove files and subdirectories from the preserve set."
        if self.selection is not None:
            raise Recoverable(b"unpreserve does not take a selection set")
        if not self.chosen():
            complain(b"no repo has been chosen.")
            return
        for filename in line.split():
            self.chosen().unpreserve(filename)
        announce(b"preserving %s." % list(self.chosen().preservable()))

    #
    # Serialization and de-serialization.
    #
    def help_read(self):
        print(b"""
A read command with no arguments is treated as 'read .', operating on the
current directory.

With a directory-name argument, this command attempts to read in the
contents of a repository in any supported version-control system under
that directory.

If input is redirected from a plain file, it will be read in as a
fast-import stream or Subversion dump, whichever it is.

With an argument of '-', this command reads a fast-import stream or
Subversion dump from standard input (this will be useful in filters
constructed with command-line arguments).

The --format option can be used to read in binary repository dump files.
For a list of supported types, invoke the 'prefer' command.
""")
    def do_read(self, line):
        "Read in a repository for surgery."
        if self.selection is not None:
            raise Recoverable(b"read does not take a selection set")
        with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
            if parse.redirected:
                repo = Repository()
                for option in parse.options:
                    if option.startswith(b"--format="):
                        vcs = option.split(b"=")[1]
                        try:
                            infilter = file_filters[vcs][0]
                            srcname = parse.stdin.name
                            parse.stdin.close()
                            parse.stdin = os.popen(infilter % srcname, "r")
                        except KeyError:
                            raise Recoverable(b"unrecognized --format")
                        break
                repo.fast_import(parse.stdin, parse.options, progress=(verbose==1 and not quiet))
            # This is slightly asymmetrical with the write side, which
            # interprets an empty argument list as '-'
            elif not parse.line or parse.line == b'.':
                repo = read_repo(os.getcwd(), parse.options, self.preferred)
            elif os.path.isdir(parse.line):
                repo = read_repo(parse.line, parse.options, self.preferred)
            else:
                raise Recoverable(b"read no longer takes a filename argument - use < redirection instead")
        self.repolist.append(repo)
        self.choose(repo)
        if self.chosen():
            if self.chosen().vcs:
                self.preferred = self.chosen().vcs
            name = self.uniquify(os.path.basename(self.chosen().sourcedir or parse.infile or "unnamed"))
            self.chosen().rename(name)
        if verbose:
            self.do_choose(b'')

    def help_write(self):
        print(b"""
Dump a fast-import stream representing selected events to standard
output (if second argument is empty or '-') or via > redirect to a file.
Alternatively, if there ia no redirect and the argument names a
directory the repository is rebuilt into that directory, with any
selection set argument being ignored; if that target directory is
nonempty its contents are backed up to a save directory.

Property extensions will be omitted if the importer for the
preferred repository type cannot digest them.

The --fossil option can be used to write out binary repository dump files.
For a list of supported types, invoke the 'prefer' command.
""")
    def do_write(self, line):
        "Stream out the results of repo surgery."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if line:
            line = os.path.expanduser(line)
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            # This is slightly asymmetrical with the read side, which
            # interprets an empty argument list as '.'
            if parse.redirected or not parse.line:
                for option in parse.options:
                    if option.startswith(b"--format="):
                        vcs = option.split(b"=")[1]
                        try:
                            outfilter = file_filters[vcs][1]
                            dstname = parse.stdout.name
                            parse.stdout.close()
                            parse.stdout = os.popen(outfilter % dstname, b"wb")
                        except KeyError:
                            raise Recoverable(b"unrecognized --format")
                        break
                self.chosen().fast_export(self.selection, parse.stdout, parse.options, progress=(verbose==1 and not quiet), target=self.preferred)
            elif os.path.isdir(parse.line):
                rebuild_repo(self.chosen(), parse.line, parse.options, self.preferred)
            else:
                raise Recoverable(b"write no longer takes a filename argument - use > redirection instead")

    def help_inspect(self):
        print(b"""
Dump a fast-import stream representing selected events to standard
output or via > redirect to a file.  Just like a write, except (1) the
progress meter is disabled, and (2) there is an identifying header
before each event dump.
""")
    def do_inspect(self, line):
        "Dump raw events."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            if self.selection is None and parse.line.strip():
                parse.line = self.set_selection_set(parse.line)
            elif self.selection is None:
                self.selection = self.chosen().all()
            for ei, event in self.selected():
                header = b"Event %s, " % repr(ei+1)
                header = header[:-2]
                header += b" " + ((72 - len(header)) * b"=") + b"\n"
                parse.stdout.write(header)
                if isinstance(event, Commit):
                    parse.stdout.write(event.dump())
                else:
                    parse.stdout.write(str(event))

    def help_strip(self):
        print(b"""
Replace the blobs in the selected repository with self-identifying stubs;
and/or strip out topologically uninteresting commits.  The modifiers for
this are 'blobs' and 'reduce' respectively; the default is 'blobs'.

A selection set is effective only with the 'blobs' option, defaulting to all
blobs. The 'reduce' mode always acts on the entire repository.

This is intended for producing reduced test cases from large repositories.
""")
    def do_strip(self, line):
        "Drop content to produce a reduced test case."
        repo = self.chosen()
        if repo is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line:
            striptypes = [b"blobs"]
        else:
            striptypes = line.split()
        if "blobs" in striptypes:
            for (_, event) in self.selected(Blob):
                event.set_content(b"Blob at %s\n" % event.mark)
        if "reduce" in striptypes:
            interesting = set([])
            for event in repo.events:
                if isinstance(event, Tag):
                    interesting.add(event.committish)
                elif isinstance(event, Reset):
                    interesting.add(event.ref)
                elif isinstance(event, Commit):
                    if len(event.children()) != 1 or len(event.parents()) != 1:
                        interesting.add(event.mark)
                    else:
                        for op in event.operations():
                            if op.op != b'M' or repo.ancestor_count(event.parents()[0], op.path) == 0:
                                interesting.add(event.mark)
                                break
            neighbors = set()
            for event in repo.events:
                if isinstance(event, Commit) and event.mark in interesting:
                    neighbors |= set(event.parent_marks())
                    neighbors |= set(event.child_marks())
            interesting |= neighbors
            repo.delete([i for i in range(len(repo.events)) \
                         if isinstance(event, Commit) and event.mark not in interesting])

    def help_graph(self):
        print(b"""
Dump a graph representing selected events to standard output in DOT markup
for graphviz. Supports > redirection.
""")
    def do_graph(self, line):
        "Dump a commit graph."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            parse.stdout.write(b"digraph {\n")
            for _, event in self.selected():
                if isinstance(event, Commit):
                    for parent in event.parent_marks():
                        if self.chosen().find(parent) in self.selection:
                            parse.stdout.write(b'\t%s -> %s;\n' \
                                               % (parent[1:], event.mark[1:]))
                if isinstance(event, Tag):
                    parse.stdout.write(b'\t"%s" -> "%s" [style=dotted];\n' \
                                       % (event.name, event.committish[1:], ))
                    parse.stdout.write(b'\t{rank=same; "%s"; "%s"}\n' \
                                       % (event.name, event.committish[1:], ))
            for _, event in self.selected():
                if isinstance(event, Commit):
                    summary = cgi.escape(event.comment.split(b'\n')[0][:42])
                    cid = event.mark
                    if event.legacy_id:
                        cid = event.showlegacy() + " &rarr; " + cid
                    parse.stdout.write(b'\t%s [shape=box,width=5,label=<<table cellspacing="0" border="0" cellborder="0"><tr><td><font color="blue">%s</font></td><td>%s</td></tr></table>>];\n' \
                                       % (event.mark[1:], cid, summary))
                    if all(event.branch != child.branch for child in event.children()):
                        parse.stdout.write(b'\t"%s" [shape=oval,width=2];\n' % event.branch)
                        parse.stdout.write(b'\t"%s" -> "%s" [style=dotted];\n' % (event.mark[1:], event.branch))
                if isinstance(event, Tag):
                    summary = cgi.escape(event.comment.split(b'\n')[0][:32])
                    parse.stdout.write(b'\t"%s" [label=<<table cellspacing="0" border="0" cellborder="0"><tr><td><font color="blue">%s</font></td><td>%s</td></tr></table>>];\n' \
                                       % (event.name, event.name, summary))
            parse.stdout.write(b"}\n")

    def help_rebuild(self):
        print(b"""
Rebuild a repository from the state held by reposurgeon.  The argument
specifies the target directory in which to do the rebuild; if the
repository read was from a repo directory (and not a git-import stream), it
defaults to that directory.  If the target directory is nonempty
its contents are backed up to a save directory.
""")
    def do_rebuild(self, line):
        "Rebuild a repository from the edited state."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is not None:
            raise Recoverable(b"rebuild does not take a selection set")
        with RepoSurgeon.LineParse(line) as parse:
            rebuild_repo(self.chosen(), parse.line, parse.options, self.preferred)

    #
    # Editing commands
    #
    def help_mailbox_out(self):
        print(b"""
Emit a mailbox file of messages in RFC822 format representing the
contents of repository metadata. Takes a selection set; members of the set
other than commits, annotated tags, and passthroughs are ignored (that
is, presently, blobs and resets). Supports > redirection.

May have an option --filter, followed by = and a /-enclosed regular expression.
If this is given, only headers with names matching it are emitted.  In this
context the name of the header includes its trailing colon.
""")
    def do_mailbox_out(self, line):
        "Generate a mailbox file representing object metadata."
        filter_regexp = None
        opts = shlex.split(line)
        for token in opts:
            if token.startswith("--filter="):
                token = token[9:]
                if len(token) < 2 or token[0] != '/' or token[-1] != '/':
                    raise Recoverable("malformed filter option in mailbox_out")
                try:
                    filter_regexp = re.compile(token[1:-1])
                except sre_constants.error as e:
                    raise Recoverable(b"filter compilation error - %s" % e)
            elif token.startswith(">"):
                continue
            else:
                raise Recoverable("unknown option %s in mailbox_out" % token)
        self.report_select(line, "email_out", (filter_regexp,))

    def help_mailbox_in(self):
        print(b"""
Accept on standard input a mailbox file of messages in RFC822 format
representing the contents of the metadata in selected commits and
annotated tags. Takes no selection set. Takes < redirection.

Users should be aware that modifying an Event-Number field will change
which event the update from that message is applied to.  This is
unlikely to have good results.

If the --create modifier is present, new tags and commits will be
appended to the repository.  In this case it is an error for a tag name
to match any exting tag name. Commit objects are created with no fileops.
If Committer-Date or Tagger-Date fields are not present they are filled
in with the time at which this command is executed. If Committer or Tagger
fields are not present, reposurgeon will attempt to deduce the user's
git-style identity and fill it in.

Otherwise, if the Event-Number field is absent, the mailbox_in logic will
attempt to match the commit or tag first by Legacy-ID, then by a unique
committer ID and timestamp pair.

If output is redirected and the modifier '--changed' appears, a minimal
set of modifications actually made is written to the output file (>
redirection is supported).
""")
    def do_mailbox_in(self, line):
        "Accept a mailbox file representing object metadata and update from it."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        with RepoSurgeon.LineParse(line, capabilities=["stdin","stdout"]) as parse:
            update_list = []
            while True:
                msg = RepoSurgeonEmail.readmsg(parse.stdin)
                if not msg:
                    break
                update_list.append(email.message_from_string(msg))
        # First, a validation pass
        attribution_map = {}
        name_map = {}
        attribution_counts = collections.Counter()
        for commit in self.chosen().commits():
            stamp = commit.action_stamp()
            attribution_map[stamp] = commit
            attribution_counts[stamp] += 1
        for event in self.chosen().events:
            if isinstance(event, Tag):
                if event.name:
                    name_map[event.name] = event
                if event.tagger:
                    stamp = event.tagger.action_stamp()
                    attribution_map[stamp] = event
                    attribution_counts[stamp] += 1
        legacy_map = {}
        for commit in self.chosen().commits():
            if commit.legacy_id:
                legacy_map[commit.legacy_id] = commit
        events = []
        errors = 0
        # Special case - event creattion
        if '--create' in parse.options:
            for (i, message) in enumerate(update_list):
                if b"Tag-Name" in message:
                    blank = Tag()
                    blank.tagger = Attribution()
                    blank.email_in(message, fill=True)
                    blank.committish = [c for c in self.chosen().commits()][-1].mark
                else:
                    blank = Commit()
                    blank.committer = Attribution()
                    blank.email_in(message, fill=True)
                    blank.mark = ":" + str(len(self.chosen().events) + 1)
                self.chosen().events.append(blank)
            return
        # Normal case - no --create
        for (i, message) in enumerate(update_list):
            event = None
            if b"Event-Number" in message:
                try:
                    eventnum = int(message[b"Event-Number"]) - 1
                except ValueError:
                    complain(b"event number garbled in update %d" % (i+1,))
                    errors += 1
                if eventnum < 0 or eventnum >= len(self.chosen()):
                    complain(b"event number %d out of range in update %d" \
                                      % (eventnum, i+1))
                    errors += 1
                event = self.chosen()[eventnum]
            elif b"Legacy-ID" in message:
                try:
                    event = legacy_map[message[b"Legacy-ID"]]
                except KeyError:
                    complain(b"no commit matches legacy-ID %s" \
                                      % message[b"Legacy-ID"])
                    errors += 1
            elif b"Event-Mark" in message:
                event = self.chosen().objfind(message[b"Event-Mark"])
                if not event:
                    complain(b"no commit matches mark %s" \
                             % message["Event-Mark"])
                    errors += 1
            elif b"Committer" in message and b"Committer-Date" in message:
                blank = Commit()
                blank.committer = Attribution()
                blank.email_in(message)
                stamp = blank.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain(b"no commit matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain(b"multiple events match %s" % stamp)
                    errors += 1
            elif b"Tagger" in message and b"Tagger-Date" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                stamp = blank.tagger.action_stamp()
                try:
                    event = attribution_map[stamp]
                except KeyError:
                    complain(b"no tag matches stamp %s" % stamp)
                    errors += 1
                if attribution_counts[stamp] > 1:
                    complain(b"multiple events match %s" % stamp)
                    errors += 1
            elif b"Tag-Name" in message:
                blank = Tag()
                blank.tagger = Attribution()
                blank.email_in(message)
                try:
                    event = name_map[blank.name]
                except KeyError:
                    complain(b"no tag matches name %s" % blank.name)
                    errors += 1
            else:
                complain(b"no commit matches update %d:\n%s" % (i+1, message))
                errors += 1
            if event is not None and not hasattr(event, "email_in"):
                try:
                    complain(b"event %d cannot be modified"%(event.index()+1))
                except AttributeError:
                    complain(b"event cannot be modified")
                errors += 1
            # Always append, even None, to stay in sync with update_list
            events.append(event)
        if errors > 0:
            raise Recoverable(b"%d errors in metadata updates" % errors)
        # Now apply the updates
        changers = []
        for (event, update) in zip(events, update_list):
            if event.email_in(update):
                changers.append(update)
        if verbose:
            if not changers:
                announce(b"no events modified by mailbox_in.")
            else:
                announce(b"%d events modified by mailbox_in." % len(changers))
        if parse.stdout != sys.stdout:
            if b"--changed" in parse.options:
                for update in changers:
                    parse.stdout.write(RepoSurgeonEmail.Divider + b"\n" + update.as_string(unixfrom=False))

    def help_edit(self):
        print(b"""
Report the selection set of events to a tempfile as mailbox_out does,
call an editor on it, and update from the result as mailbox_in does.
If you do not specify an editor name as second argument, it will be
taken from the $EDITOR variable in your environment.

Normally this command ignores blobs because mailbox_out does.
However, if you specify a selection set consisting of a single
blob, your editor will be called on the blob file.
""")
    def do_edit(self, line):
        "Edit metadata interactively."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            self.selection = [n for n, o2 in enumerate(self.chosen()) \
                              if hasattr(o2, b"email_out")]
        self.edit(self.selection, line)

    def help_filter(self):
        print(b"""
Run blobs, commit comments and committer/author names, or tag comments
and tag committer names in the selection set through the filter
specified on the command line.

With any mode other than --dedos, attempting to specify a selection
set including both blobs and non-blobs (that is, commits or tags)
throws an error. Inline content in commits is filtered when the
selection set contains (only) blobs and the commit is within the range
bounded by the earliest and latest blob in the specification.

When filtering blobs, if the command line contains the magic cookie
'%PATHS%' it is replaced with a space-separated list of all paths
that reference the blob.

With --shell, the remainder of the line specifies a filter as a
shell command. Each blob or comment is presented to the filter on
standard input; the content is replaced with whatever the filter emits
to standard output.

With --regex, the remainder of the line is expected to be a Python
regular expression substitution written as /from/to/ with 'from' and
'to' being passed as arguments to the standard re.sub() function and
that applied to modify the content. Actually, any non-space character
will work as a delimiter in place of the /; this makes it easier to
use / in patterns. Ordinarily only the first such substitution is
performed; putting 'g' after the slash replaces globally, and a
numeric literal gives the maximum number of substitutions to
perform. Other flags available restrict substiution scope - 'c' for
comment text only, 'C' for committer name only, 'a' for author names
only.

With --replace, the behavior is like --regexp but the expressions are
not interpreted as regular expressions. (This is slighly faster).

With --dedos, DOS/Windows-style \\r\\n line terminators are replaced with \\n.
""")
    def do_filter(self, line):
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if not line:
            complain(b"no filter is specified")
            return
        if self.selection is None:
            complain(b"no selection")
            return
        class FilterCommand:
            def __init__(self, repo, filtercmd):
                "Initialize the filter from the command line."
                self.repo = repo
                self.filtercmd = None
                self.sub = None
                self.regex = None
                self.attributes = set([])
                # Must not use LineParse here as it would try to strip options
                # in shell commands.
                if filtercmd.startswith(b'--shell'):
                    self.filtercmd = filtercmd[7:].lstrip()
                elif filtercmd.startswith(b'--regex') or filtercmd.startswith(b'--replace'):
                    firstspace = filtercmd.find(b' ')
                    if firstspace == -1:
                        raise Recoverable(b"missing filter specification")
                    stripped = filtercmd[firstspace:].lstrip()
                    parts = stripped.split(stripped[0])
                    subflags = parts[-1]
                    if len(parts) != 4:
                        raise Recoverable(b"malformed filter specification")
                    elif parts[0]:
                        raise Recoverable(b"bad prefix '%s' on filter specification" % parts[0])
                    elif subflags and not re.match(b"[0-9]*g?", subflags):
                        raise Recoverable(b"unrecognized filter flags")
                    elif "%PATHS%" in filtercmd:
                        raise Recoverable(b"%PATHS% is not yet supported in regex filters")
                    else:
                        subcount = 1
                        while subflags:
                            flag = subflags[0]
                            subflags = subflags[:-1]
                            if flag == b"g":
                                subcount = 0
                            elif flag in {b"c", "a", "C"}:
                                self.attributes.add(flag)
                            elif flag.isdigit():
                                subcount = int(subflags)
                            else:
                                raise Recoverable(b"unknown filter flag")
                        if not self.attributes:
                            self.attributes = {b"c", "a", "C"}
                        # subcount 0 does not reliably work as it should
                        if filtercmd.startswith(b'--regex'):
                            try:
                                self.regex = re.compile(parts[1])
                            except sre_constants.error as e:
                                raise Recoverable(b"filter compilation error - %s" % e)
                            self.sub = lambda s: self.regex.sub(parts[2],
                                                            s,
                                                            len(s) if subcount == 0 else subcount)
                        elif filtercmd.startswith(b'--replace'):
                            self.sub = lambda s: s.replace(parts[1],
                                                           parts[2],
                                                           len(s) if subcount == 0 else subcount)
                elif filtercmd.startswith(b'--dedos'):
                    if not self.attributes:
                        self.attributes = {b"c", "a", "C"}
                    self.sub = lambda s: s.replace(b"\r\n", b"\n")
                else:
                    raise Recoverable(b"--shell or --regex or --dedos required")
            def do(self, content, pathsubst=b""):
                "Perform the filter on string content or a file."
                if self.filtercmd:
                    if pathsubst:
                        filtercmd = self.filtercmd.replace(b"%PATHS%", pathsubst)
                    else:
                        filtercmd = self.filtercmd
                    (indesc, intmp) = tempfile.mkstemp(prefix=self.repo.subdir())
                    (outdesc, outtmp) = tempfile.mkstemp(prefix=self.repo.subdir())
                    try:
                        assert indesc > -1 and outdesc > -1    # pacify pylint
                        with open(intmp, b"wb") as wfp:
                            wfp.write(content)
                        return capture(b"%s <%s" % (filtercmd, intmp))
                    finally:
                        os.remove(intmp)
                        os.close(indesc)
                        os.remove(outtmp)
                        os.close(outdesc)
                elif self.sub:
                    return self.sub(content)
                else:
                    raise Recoverable(b"unknown mode in filter command")
        # Mainline of do_filter() continues:
        filterhook = FilterCommand(self.chosen(), line)
        self.data_traverse(prompt=b"Filtering",
                           hook=filterhook.do,
                           attributes=filterhook.attributes,
                           safety=not line.startswith(b'--dedos'))

    def help_transcode(self):
        print(b"""
Transcode blobs, commit comments and committer/author names, or tag
comments and tag committer names in the selection set to UTF-8 from
the character encoding specified on the command line.

Attempting to specify a selection set including both blobs and
non-blobs (that is, commits or tags) throws an error. Inline content
in commits is filtered when the selection set contains (only) blobs
and the commit is within the range bounded by the earliest and latest
blob in the specification.

The encoding argument must name one of the codecs known to the Python
standard codecs library. In particular, 'latin-1' is a valid codec name.

Errors in this command are fatal, because an error may leave
repository objects in a damaged state.
""")
    def do_transcode(self, line):
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        elif self.selection is None:
            self.selection = self.chosen().all()
        (codec, line) = RepoSurgeon.pop_token(line)
        def transcode(txt, _paths=None):
            return codecs.encode(codecs.decode(txt, codec), b"utf-8")
        try:
            self.data_traverse(prompt=b"Transcoding",
                               hook=transcode,
                               attributes={b"c", b"a", b"C"},
                               safety=True)
        except UnicodeError:
            raise Fatal(b"UnicodeError during transcoding")

    def help_setfield(self):
        print(b"""
In the selected objects (defaulting to none) set every instance of a
named field to a string value.  The string may be quoted to include
whitespace, and use backslash escapes interpreted by the Python
string-escape codec, such as \\n and \\t.

Attempts to set nonexistent attributes are ignored. Valid values for
the attribute are internal Python field names; in particular, for
commits, 'comment' and 'branch' are legal.  Consult the source code
for other interesting values.
""")
    def do_setfield(self, line):
        "Set an object field from a string."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            raise Recoverable(b"no selection")
        fields = shlex.split(line)
        if not fields or len(fields) != 2:
            raise Recoverable(b"missing or malformed setfield line")
        field = fields[0]
        value = fields[1].decode(b"string_escape")
        for _, event in self.selected():
            if hasattr(event, field):
                setattr(event, field, value)

    def help_append(self):
        print(b"""
Append text to the comments of commits and tags in the specified
selection set. The text is the first token of the command and may
be a quoted string. C-style escape sequences in the string are
interpreted using Python's string_decode codec.

If the option --rstrip is given, the comment is right-stripped before
the new text is appended.
""")
    def do_append(self, line):
        "Append a line to comments in the specified selection set."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            raise Recoverable(b"no selection")
        with RepoSurgeon.LineParse(line) as parse:
            fields = shlex.split(parse.line)
            if not fields:
                raise Recoverable(b"missing append line")
            line = fields[0].decode(b"string_escape")
            for _, event in self.selected((Tag, Commit)):
                if '--rstrip' in parse.options:
                    event.comment = event.comment.rstrip()
                event.comment += line

    def help_squash(self):
        print(b"""
Combine a selection set of events; this may mean deleting them or
pushing their content forward or back onto a target commit just
outside the selection range, depending on policy flags.

The default selection set for this command is empty.  Blobs cannot be
directly affected by this command; they move or are deleted only when
removal of fileops associated with commits requires this.
""")
    def do_squash(self, line):
        "Squash events in the specified selection set."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        with RepoSurgeon.LineParse(line) as parse:
            self.chosen().squash(self.selection, parse.options)
    def help_delete(self):
        print(b"""
Delete a selection set of events.  The default selection set for this
command is empty.  Tags, resets, and passthroughs are deleted with no
side effects.  Blobs cannot be directly deleted with this command; they
are removed only when removal of fileops associated with commits requires this.

When a commit is deleted, what becomes of tags and fileops attached to
it is controlled by policy flags.  A delete is equivalent to a
squash with the --delete flag.
""")
    def do_delete(self, line):
        "Delete events in the specified selection set."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        with RepoSurgeon.LineParse(line) as parse:
            self.chosen().squash(self.selection, set([b"--delete"]) | parse.options)

    def help_coalesce(self):
        print(b"""
Scan the selection set (defaulting to all) for runs of commits with
identical comments close to each other in time (this is a common form
of scar tissues in repository up-conversions from older file-oriented
version-control systems).  Merge these cliques by pushing their
fileops and tags up to the last commit, in order.

The optional argument, if present, is a maximum time separation in
seconds; the default is 90 seconds.

With the --changelog option, any commit with a comment containing the
string 'empty log message' (such as is generated by CVS) and containing
exactly one file operation modifing a path ending in 'ChangeLog' is
treated specially.  Such ChangeLog commits are considered to match any
commit before them by content, and will coalesce with it if the committer
matches and the commit separation is small enough.  This option handles
a convention used by Free Software Foundation projects.

With  the --debug option, show messages about mismatches.
""")
    def do_coalesce(self, line):
        "Coalesce events in the specified selection set."
        repo = self.chosen()
        if not repo:
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line) as parse:
            timefuzz = 90
            changelog = "--changelog" in parse.options
            if parse.line:
                try:
                    timefuzz = int(parse.line)
                except ValueError:
                    raise Recoverable(b"time-fuzz value must be an integer")
            def is_clog(commit):
                return b"empty log message" in commit.comment \
                           and len(commit.operations()) == 1 \
                           and commit.operations()[0].op == b"M" \
                           and commit.operations()[0].path.endswith(b"ChangeLog")
            def coalesce_match(cthis, cnext):
                if cthis.committer.email != cnext.committer.email:
                    if verbose >= DEBUG_DELETE or b'--debug' in parse.options:
                        announce(b"committer email mismatch at %s" % cnext.id_me())
                    return False
                if cthis.committer.date.delta(cnext.committer.date) >= timefuzz:
                    if verbose >= DEBUG_DELETE or b'--debug' in parse.options:
                        announce(b"time fuzz exceeded at %s" % cnext.id_me())
                    return False
                if changelog and not is_clog(cthis) and is_clog(cnext):
                    return True
                if cthis.comment != cnext.comment:
                    if verbose >= DEBUG_DELETE or b'--debug' in parse.options:
                        announce(b"comment mismatch at %s" % cnext.id_me())
                    return False
                return True
            eligible = {}
            squashes = []
            for (_i, commit) in self.selected(Commit):
                if commit.branch not in eligible:
                    # No active commit span for this branch - start one
                    # with the mark of this commit
                    eligible[commit.branch] = [commit.mark]
                elif coalesce_match(
                    repo.objfind(eligible[commit.branch][-1]),
                    commit):
                    # This commit matches the one at the end of its branch span.
                    # Append its mark to the span.
                    eligible[commit.branch].append(commit.mark)
                else:
                    # This commit doesn't match the one at the end of its span.
                    # Coalesce the span and start a new one with this commit.
                    if len(eligible[commit.branch]) > 1:
                        squashes.append(eligible[commit.branch])
                    eligible[commit.branch] = [commit.mark]
            for endspan in eligible.values():
                if len(endspan) > 1:
                    squashes.append(endspan)
            for span in squashes:
                # Prevent lossage when last is a ChangeLog commit
                repo.objfind(span[-1]).comment = repo.objfind(span[0]).comment
                repo.squash([repo.find(mark) for mark in span[:-1]], (b"--coalesce",))
    def help_add(self):
        print(b"""
From a specified commit, add a specified fileop. The syntax is

     add {D path | M perm mark path | R source target | C source target}

For a D operation to be valid there must be an M operation for the path
in the commit's ancestry.  For an M operation to be valid, the 'perm'
part must be a token ending with 755 or 644 and the 'mark' must
refer to a blob that precedes the commit location.  For an R or C
operation to be valid, there must be an M operation for the source
in the commit's ancestry.

""")
    def do_add(self, line):
        "Add a fileop to a specified commit."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        repo = self.chosen()
        fields = shlex.split(line)
        if len(fields) < 2:
            raise Recoverable(b"add requires an operation type and arguments")
        optype = fields[0]
        if optype == b"D":
            path = fields[1]
            for _, event in self.selected(Commit):
                if path in event.paths():
                    raise Recoverable(b"%s already has an op for %s" \
                                      % (event.mark, path))
                if repo.ancestor_count(event, path) == 0:
                    raise Recoverable(b"no previous M for %s" % path)
        elif optype == b"M":
            if len(fields) != 4:
                raise Recoverable(b"wrong field count in add command")
            elif fields[1].endswith(b"644"):
                perms = 0o100644
            elif fields[1].endswith(b"755"):
                perms = 0o100755
            mark = fields[2]
            if not mark.startswith(b":"):
                raise Recoverable(b"garbled mark %s in add command" % mark)
            try:
                markval = int(mark[1:])
            except ValueError:
                raise Recoverable(b"non-numeric mark %s in add command" % mark)
            if not isinstance(repo.objfind(mark), Blob):
                raise Recoverable(b"mark %s in add command does not refer to a blob" % mark)
            elif markval >= min(self.selection):
                raise Recoverable(b"mark %s in add command is after add location" % mark)
            path = fields[3]
            for _, event in self.selected(Commit):
                if path in event.paths():
                    raise Recoverable(b"%s already has on op for %s" \
                                      % (event.mark, path))
        elif optype in (b"R", b"C"):
            try:
                source = fields[1]
                target = fields[2]
            except IndexError:
                raise Recoverable(b"too few arguments in add %s" % optype)
            for _, event in self.selected(Commit):
                if source in event.paths() or target in event.paths():
                    raise Recoverable(b"%s already has on op for %s or %s" \
                                      % (event.mark, source, target))
                if repo.ancestor_count(event, source) == 0:
                    raise Recoverable(b"no previous M for %s" % source)
        else:
            raise Recoverable(b"unknown operation type %s in add command" % optype)
        for _, event in self.selected(Commit):
            event.invalidate_pathset_cache()
            fileop = FileOp(self.chosen())
            if optype == b"D":
                fileop.construct(b"D", path)
            elif optype == b"M":
                fileop.construct(b"M", perms, mark, path)
            elif optype in (b"R", b"C"):
                fileop.construct(optype, source, target)
            event.append_operation(fileop)

    def help_blob(self):
        print(b"""
Syntax:

     blob

Create a blob at mark :1 after renumbering other marks starting from
:2.  Data is taken from stdin, which may be a here-doc.  This can be
used with the add command to patch data into a repository.
""")
    def do_blob(self, line):
        "Add a fileop to a specified commit."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        self.chosen().renumber(2)
        blob = Blob(self.chosen())
        blob.set_mark(":1")
        self.chosen().addEvent(blob, where=0)
        with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
            blob.set_content(parse.stdin.read())

    def help_remove(self):
        print(b"""
From a specified commit, remove a specified fileop. The syntax is:

     remove [DMRCN] OP [to COMMIT]

The OP must be one of (a) the keyword 'deletes', (b) a file path, (c)
a file patch preceded by an op type set (some subset of the letters
DMRCN), or (c) a 1-origin numeric index.  The 'deletes' keyword
selects all D fileops in the commit; the others select one each.

If the to clause is present, the removed op is appended to the
commit specified by the following singleton selection set.  This option
cannot be combined with 'deletes'.

Note that this command does not attempt to scavenge blobs even if the
deleted fileop might be the only reference to them. This behavior may
change in a future release.
""")
    def do_remove(self, line):
        "Delete a fileop from a specified commit."
        if not self.chosen():
            complain(b"no repo is loaded")
            return
        if self.selection is None:
            self.selection = []
        orig = line
        (opindex, line) = RepoSurgeon.pop_token(line)
        # FIXME: This needs more general parsing
        optypes = "DMRCN"
        if re.match("[DMRCN]+$", opindex):
            optypes = opindex
            (opindex, line) = RepoSurgeon.pop_token(line)
        for _, event in self.selected(Commit):
            event.invalidate_pathset_cache()
            if opindex == b"deletes":
                event.set_operations([e for e in event.operations() if e.op != "D"])
                return
            for (ind, op) in enumerate(event.operations()):
                if hasattr(op, "op") and getattr(op, "op") not in optypes:
                    continue
                if hasattr(op, "path") and getattr(op, "path") == opindex:
                    break
                if hasattr(op, "source") and getattr(op, "source") == opindex:
                    break
                if hasattr(op, "target") and getattr(op, "target") == opindex:
                    break
            else:
                try:
                    ind = int(opindex) - 1
                except (ValueError, IndexError):
                    complain(b"invalid or missing fileop specification '%s' on %s" % (opindex, repr(orig)))
                    return
            target = None
            if line:
                (verb, line)  = RepoSurgeon.pop_token(line)
                if verb == b'to':
                    self.set_selection_set(line)
                    if len(self.selection) != 1:
                        raise Recoverable(b"remove to requires a singleton selection")
                    target = self.selection[0]
            try:
                removed = event.operations().pop(ind)
                if target:
                    self.chosen().events[target].append_operation(removed)
                # FIXME: Scavenge blobs left with no references
            except IndexError:
                complain(b"out-of-range fileop index %s" % ind)
                return

    def help_renumber(self):
        print(b"""
Renumber the marks in a repository, from :1 up to <n> where <n> is the
count of the last mark. Just in case an importer ever cares about mark
ordering or gaps in the sequence.
""")
    def do_renumber(self, unused):
        "Renumber the marks in the selected repo."
        assert unused is not None    # pacify pylint
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        self.repo.renumber()

    def help_timeoffset(self):
        print(b"""
Apply a time offset to all time/date stamps in the selected set.  An offset
argument is required; it may be in the form [+-]ss, [+-]mm:ss or [+-]hh:mm:ss.
The leading sign is required to distingush it from a selection expression.

Optionally you may also specify another argument in the form [+-]hhmm, a
timeone literal to apply.  To apply a timezone without an offset, use
an offset literal of +0 or -0.
""")
    def do_timeoffset(self, line):
        "Apply a time offset to all dates in selected events."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line:
            complain(b"a signed time offset argument is required.")
            return
        elif not line[0] in (b'-', b'+'):
            complain(b"time offset argument must begin with + or -.")
            return
        line = str(line)   # pacify pylint by forcing string type
        args = line.split()
        h = m = "0"
        if args[0].count(b":") == 0:
            s = args[0]
        elif args[0].count(b":") == 1:
            (m, s) = args[0].split(b":")
        elif args[0].count(b":") == 2:
            (h, m, s) = args[0].split(b":")
        else:
            complain(b"too many colons")
            return
        try:
            offset = int(h)*360 + int(m)*60 + int(s)
        except ValueError:
            complain(b"expected numeric literals in date format")
            return
        if len(args) > 1:
            if not re.match(b"[+-][0-9][0-9][0-9][0-9]", args[1]):
                complain(b"expected timezone literal to be [+-]hhmm")
        for _, event in self.selected():
            if isinstance(event, Tag):
                if event.tagger:
                    event.tagger.date.timestamp += offset
                    if len(args) > 1:
                        event.tagger.date.timezone = args[1]
            elif isinstance(event, Commit):
                event.committer.date.timestamp += offset
                if len(args) > 1:
                    event.committer.date.timezone = args[1]
                for author in event.authors:
                    author.date.timestamp += offset
                    if len(args) > 1:
                        author.date.timezone = args[1]

    def help_divide(self):
        print(b"""
Attempt to partition a repo by cutting the parent-child link
between two specified commits (they must be adjacent). Does not take a
general selection-set argument.  It is only necessary to specify the
parent commit, unless it has multiple children in which case the child
commit must follow (separate it with a comma).

If the repo was named 'foo', you will normally end up with two repos
named 'foo-early' and 'foo-late'.  But if the commit graph would
remain connected through another path after the cut, the behavior
changes.  In this case, if the parent and child were on the same
branch 'qux', the branch segments are renamed 'qux-early' and
'qux-late'.
""")
    def do_divide(self, _line):
        "Attempt to topologically partition the repo."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = []
        if len(self.selection) == 0:
            complain(b"one or possibly two arguments specifying a link are required")
            return
        early = self.chosen()[self.selection[0]]
        if not isinstance(early, Commit):
            complain(b"first element of selection is not a commit")
            return
        possibles = list(early.children())
        if len(self.selection) == 1:
            if len(possibles) > 1:
                complain(b"commit has multiple children, one must be specified")
                return
            elif len(possibles) == 1:
                late = possibles[0]
            else:
                complain(b"parent has no children")
                return
        elif len(self.selection) == 2:
            late = self.chosen()[self.selection[1]]
            if not isinstance(late, Commit):
                complain(b"last element of selection is not a commit")
                return
            if early.mark not in late.parent_marks():
                complain(b"not a parent-child pair")
                return
        elif len(self.selection) > 2:
            complain(b"too many arguments")
        assert(early and late)
        # Try the topological cut first
        if not self.cut(early, late):
            # If that failed, cut anyway and rename the branch segments
            late.remove_parent(early)
            if early.branch != late.branch:
                announce(b"no branch renames were required")
            else:
                basename = early.branch
                announce(b"%s has been split into %s-early and %s-late" \
                         % (basename, basename, basename))
                for (i, event) in enumerate(self.chosen().events):
                    if hasattr(event, "branch") and event.branch == basename:
                        if i <= self.selection[0]:
                            event.branch += b"-early"
                        else:
                            event.branch += b"-late"
        if verbose:
            self.do_choose(b"")

    def help_expunge(self):
        print(b"""
Expunge files from the selected portion of the repo history; the
default is the entire history.  The arguments to this command may be
paths or Python regular expressions matching paths (regexps must
be marked by being surrounded with //).

All filemodify (M) operations and delete (D) operations involving a
matched file in the selected set of events are disconnected from the
repo and put in a removal set.  Renames are followed as the tool walks
forward in the selection set; each triggers a warning message. If a
selected file is a copy (C) target, the copy will be deleted and a
warning message issued. If a selected file is a copy source, the copy
target will be added to the list of paths to be deleted and a warning
issued.

After file expunges have been performed, any commits with no
remaining file operations will be deleted, and any tags pointing to
them. Commits with deleted fileops pointing both in and outside the
path set are not deleted, but are cloned into the removal set.

The removal set is not discarded. It is assembled into a new
repository named after the old one with the suffix "-expunges" added.
Thus, this command can be used to carve a repository into sections by
file path matches.
""")
    def do_expunge(self, line):
        "Expunge files from the chosen repository."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        self.expunge(self.selection, line.split())

    def help_split(self):
        print(b"""
Split a specified commit in two, the opposite of join.

    split at M
    split by PREFIX

The selection set is required to be a commit location; the modifier is
a preposition which indicates which splitting method to use. If the
preposition is 'at', then the third argument must be an integer
1-origin index of a file operation within the commit. If it is 'in',
then the third argument must be a pathname to be matched.

The commit is copied and inserted into a new position in the
event sequence, immediately following itself; the duplicate becomes
the child of the original, and replaces it as parent of the original's
children. Commit metadata is duplicated; the mark of the new commit is
then changed, with 'bis' added as a suffix.

Finally, some file operations - starting at the one matched or indexed
by the split argument - are moved forward from the original commit
into the new one.  Legal indices are 2-n, where n is the number of
file operations in the original commit.
""")
    def do_split(self, line):
        "Split a commit."
        if self.chosen() is None:
            raise Recoverable(b"no repo has been chosen.")
        if self.selection is None:
            self.selection = []
        if len(self.selection) != 1:
            raise Recoverable(b"selection of a single commit required for this command")
        where = self.selection[0]
        event = self.chosen()[where]
        if not isinstance(event, Commit):
            raise Recoverable(b"fileop argument doesn't point at a commit")
        line = str(line)   # pacify pylint by forcing string type
        (prep, obj) = line.split()
        if prep == 'at':
            try:
                splitpoint = int(obj) - 1
                if splitpoint not in range(1, len(event.operations())):
                    raise Recoverable(b"fileop index out of range")
                self.chosen().split_commit_by_index(where, splitpoint)
            except ValueError:
                raise Recoverable(b"expected integer fileop index (1-origin)")
        elif prep == 'in':
            split = self.chosen().split_commit_by_prefix(where, obj)
            if not split:
                raise Recoverable(b"couldn't find '%s' in a fileop path." \
                                  % obj)
        else:
            raise Recoverable(b"don't know what to do for preposition %s" % prep)
        if verbose:
            announce("new commits are events %s and %s." % (where+1, where+2))

    def help_unite(self):
        print(b"""
Unite repositories. Name any number of loaded repositories; they will
be united into one union repo and removed from the load list.  The
union repo will be selected.

The root of each repo (other than the oldest repo) will be grafted as
a child to the last commit in the dump with a preceding commit date.
Running last to first, duplicate names will be disambiguated using the
source repository name (thus, recent duplicates will get priority over
older ones). After all grafts, marks will be renumbered.

The name of the new repo will be the names of all parts concatenated,
separated by '+'. It will have no source directory or preferred system
type.

With the option --prune, at each join generate D ops for every
file that doesn't have a modify operation in the root commit of the
branch being grafted on.
""")
    def do_unite(self, line):
        "Unite repos together."
        self.unchoose()
        factors = []
        with RepoSurgeon.LineParse(line) as parse:
            for name in parse.line.split():
                repo = self.repo_by_name(name)
                if repo is None:
                    raise Recoverable(b"no such repo as %s" % name)
                else:
                    factors.append(repo)
            if not factors or len(factors) < 2:
                raise Recoverable(b"unite requires repo name arguments")
            self.unite(factors, parse.options)
        if verbose:
            self.do_choose(b'')

    def help_graft(self):
        print(b"""
For when unite doesn't give you enough control. This command may have
either of two forms, selected by the size of the selection set.  The
first argument is always required to be the name of a loaded repo.

If the selection set is of size 1, it must identify a single commit in
the currently chosen repo; in this case the name repo's root will
become a child of the specified commit. If the selection set is
empty, the named repo must contain one or more callouts matching a
commits in the currently chosen repo.

Labels and branches in the named repo are prefixed with its name; then
it is grafted to the selected one. Any other callouts in the named repo are also
resolved in the context of the currently chosen one. Finally, the
named repo is removed from the load list.

With the option --prune, prepend a deleteall operation into the root
of the grafted repository.
""")
    def do_graft(self, line):
        "Graft a named repo onto the selected one."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if not self.repolist:
            raise Recoverable(b"no repositories are loaded.")
        with RepoSurgeon.LineParse(line) as parse:
            if parse.line in self.reponames():
                graft_repo = self.repo_by_name(parse.line)
            else:
                raise Recoverable(b"no such repo as %s" % parse.line)
            require_graft_point = True
            if self.selection is not None and len(self.selection) == 1:
                graft_point = self.selection[0]
            else:
                for commit in graft_repo.commits():
                    for parent in commit.parents():
                        if Commit.is_callout(parent.mark):
                            require_graft_point = False
                if not require_graft_point:
                    graft_point = None
                else:
                    raise Recoverable(b"a singleton selection set is required.")
            # OK, we've got the two repos and the graft point.  Do it.
            self.chosen().graft(graft_repo, graft_point, parse.options)
            self.remove_by_name(graft_repo.name)

    def help_debranch(self):
        print(b"""
Takes one or two arguments which must be the names of source and target
branches; if the second (target) argument is omitted it defaults to 'master'.
The history of the source branch is merged into the history of the target
branch, becoming the history of a subdirectory with the name of the source
branch. Any trailing segment of a branch name is accepted as a synonym for
it; thus 'master' is the same as 'refs/heads/master'.  Any resets of the
source branch are removed.
""")
    def do_debranch(self, line):
        "Turn a branch into a subdirectory."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
        args = line.split()
        if not args:
            complain(b"debranch command requires at least one argument")
        else:
            target = b'refs/heads/master'
            source = args[0]
            if len(args) == 2:
                target = args[1]
            repo = self.chosen()
            branches = repo.branchmap()
            if not source in branches.iterkeys():
                for candidate in branches.iterkeys():
                    if candidate.endswith(os.sep + source):
                        source = candidate
                        break
                else:
                    complain(b"no branch matches source %s" % source)
                    return
            if not target in branches.iterkeys():
                for candidate in branches.iterkeys():
                    if candidate.endswith(os.sep + target):
                        target = candidate
                        break
                else:
                    complain(b"no branch matches %s" % target)
                    return
            # Now that the arguments are in proper form, implement
            stip = repo.find(branches[source])
            scommits = repo.ancestors(stip) + [stip]
            scommits.sort()
            ttip = repo.find(branches[target])
            tcommits = repo.ancestors(ttip) + [ttip]
            tcommits.sort()
            # Don't touch commits up to the branch join.
            last_parent = []
            while scommits and tcommits and scommits[0] == tcommits[0]:
                last_parent = [repo.events[scommits[0]].mark]
                scommits.pop(0)
                tcommits.pop(0)
            pref = os.path.basename(source)
            for ci in scommits:
                found = False
                for fileop in repo.events[ci].operations():
                    if fileop.op in (b"D", "M"):
                        fileop.path = os.path.join(pref, fileop.path)
                        found = True
                    elif fileop.op in (b"R", "C"):
                        fileop.source = os.path.join(pref, fileop.source)
                        fileop.target = os.path.join(pref, fileop.target)
                        found = True
                if found:
                    repo.events[ci].invalidate_pathset_cache()
            merged = sorted(set(scommits + tcommits))
            source_reset = None
            for i in merged:
                event = repo.events[i]
                if last_parent is not None:
                    event.set_parent_marks(last_parent + event.parent_marks()[1:])
                event.set_branch(target)
                last_parent = [event.mark]
            for (i, event) in enumerate(self.repo.events):
                if isinstance(event, Reset) and event.ref == source:
                    source_reset = i
            if source_reset is not None:
                del repo.events[source_reset]
            repo.declare_sequence_mutation(b"debranch operation")

    def help_path(self):
        print(b"""
Rename a path in every fileop of every selected commit.  The
default selection set is all commits. The first argument is interpreted as a
Python regular expression to match against paths; the second may contain
back-reference syntax.

Ordinarily, if the target path already exists in the fileops, or is visible
in the ancestry of the commit, this command throws an error.  With the
--force option, these checks are skipped.
""")
    def do_path(self, line):
        "Rename paths in the history."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = repo.all()
        (source_re, line) = RepoSurgeon.pop_token(line)
        (verb, line) = RepoSurgeon.pop_token(line)
        with RepoSurgeon.LineParse(line) as parse:
            if verb == b"rename":
                force = b'--force' in parse.options
                (target_re, _) = RepoSurgeon.pop_token(parse.line)
                if not target_re:
                    raise Recoverable(b"no target specified in rename")
                actions = []
                for _,commit in repo.iterevents(types=Commit):
                    drop = []
                    for fileop in commit.operations():
                        for attr in (b"path", b"source", b"target"):
                            if hasattr(fileop, attr):
                                oldpath = getattr(fileop, attr)
                                if oldpath and re.search(source_re, oldpath):
                                    newpath = re.sub(source_re, target_re, oldpath)
                                    if not force and commit.visible(newpath):
                                        raise Recoverable(b"rename at %s failed, %s visible in ancestry" % (commit.id_me(), newpath))
                                    elif not force and newpath in commit.paths():
                                        raise Recoverable(b"rename at %s failed, %s exists there" % (commit.id_me(), newpath))
                                    else:
                                        actions.append((fileop, attr, newpath))
                    for fileop in drop:
                        commit.operations().remove(fileop)
                    if drop:
                        commit.invalidate_pathset_cache()
                # All checks must pass before any renames
                for (fileop, attr, newpath) in actions:
                    setattr(fileop, attr, newpath)
            else:
                raise Recoverable(b"unknown verb '%s' in path command." % verb)

    def help_paths(self):
        print(b"""
Without a modifier, list all paths touched by fileops in
the selection set (which defaults to the entire repo). This
variant does > redirection.

With the 'sub' modifier, take a second argument that is a directory
name and prepend it to every path. With the 'sup' modifier, strip the
first directory component from every path.
""" )
    def do_paths(self, line):
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line.startswith((b"sub", b"sup")):
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                allpaths = set()
                for _, event in self.selected(Commit):
                    allpaths.update(event.paths())
                parse.stdout.write(b"\n".join(sorted(allpaths)) + b"\n")
                return
        fields = line.split()
        if fields[0] == b"sub":
            prefix = fields[1]
            modified = self.chosen().path_walk(self.selection,
                                               lambda f: os.path.join(prefix,f))
            print(b"\n".join(modified))
        elif fields[0] == b"sup":
            try:
                modified = self.chosen().path_walk(self.selection,
                                               lambda f: f[f.find(os.sep)+1:])
                print(b"\n".join(modified))
            except IndexError:
                raise Recoverable(b"no / in sup path.")
        self.chosen().invalidate_manifests()

    def help_manifest(self):
        print(b"""
Print commit trees contents. Takes an optional selection set argument
defaulting to all commits, and an optional Python regular expression.
For each commit in the selection set, print the mapping of all paths in
that commit tree to the corresponding blob marks, mirroring what files
would be created in a checkout of the commit. If a regular expression
is given, only print "path -> mark" lines for paths matching it.
This command supports > redirection.
""")
    def do_manifest(self, line):
        "Print all files (matching the regex) in the selected commits trees."
        if self.chosen() is None:
            raise Recoverable(b"no repo has been chosen")
        if self.selection is None:
            self.selection = self.chosen().all()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            filter_func = None
            line = parse.line.strip()
            if line:
                try:
                    filter_func = re.compile(line).search
                except re.error:
                    raise Recoverable(b"invalid regular expression")
            for ei, event in self.selected(Commit):
                header = "Event %s, " % repr(ei+1)
                header = header[:-2]
                header += " " + ((72 - len(header)) * "=") + b"\n"
                parse.stdout.write(header)
                if event.legacy_id:
                    parse.stdout.write(b"# Legacy-ID: %s\n" % event.legacy_id)
                parse.stdout.write(b"commit %s\n" % event.branch)
                if event.mark:
                    parse.stdout.write(b"mark %s\n" % event.mark)
                parse.stdout.write(b"\n")
                if filter_func is None:
                    parse.stdout.write(b"\n".join(b"%s -> %s" % (path, mark)
                            for path, (mode, mark, _)
                            in event.manifest().iteritems()))
                else:
                    parse.stdout.write(b"\n".join(b"%s -> %s" % (path, mark)
                            for path, (mode, mark, _)
                            in event.manifest().iteritems()
                            if filter_func(path)))
                parse.stdout.write(b"\n")

    def help_tagify(self):
        print(b"""
Search for empty commits and turn them into tags. Takes an optional selection
set argument defaulting to all commits. For each commit in the selection set,
turn it into a tag with the same message and author information if it has no
fileops. By default merge commits are not considered, even if they have no
fileops (thus no tree differences with their first parent). To change that, see
the '--tagify-merges' option.

The name of the generated tag wiill be 'emptycommit-<ident>', where <ident>
is generated from the legacy_id of the deleted commit, or from its
mark, or from its index in the repository, with a disambiguation
suffix if needed.

tagify currently recognizes three options: first is '--canonicalize' which
makes tagify try harder to detect trivial commits by first ensuring that all
fileops of selected commits will have an actual effect when processed by
fast-import.

The second option is '--tipdeletes' which makes tagify also consider branch
tips with only deleteall fileops to be candidates for tagification. The
corresponding tags get names of the form 'tipdelete-<branchname>' rather than
the default 'emptycommit-<ident>'.

The third option is '--tagify-merges' that makes reposurgeon also
tagify merge commits that have no fileops.  When this is done the
merge link is moved to the tagified commit's parent.
""")
    def do_tagify(self, line):
        "Search for empty commits and turn them into tags."
        repo = self.chosen()
        if repo is None:
            raise Recoverable(b"no repo has been chosen")
        if self.selection is None:
            self.selection = repo.all()
        with RepoSurgeon.LineParse(line) as parse:
            if parse.line:
                raise Recoverable(b"too many arguments for tagify.")
            before = len([c for c in repo.commits()])
            repo.tagify_empty(
                    commits = self.selection,
                    canonicalize = "--canonicalize" in parse.options,
                    tipdeletes = "--tipdeletes" in parse.options,
                    tagify_merges = "--tagify-merges" in parse.options)
            after = len([c for c in repo.commits()])
            announce("%d commits tagified." % (before - after))

    def help_merge(self):
        print(b"""
Create a merge link. Takes a selection set argument, ignoring all but
the lowest (source) and highest (target) members.  Creates a merge link
from the highest member (child) to the lowest (parent).
""" )
    def do_merge(self, _line):
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        try:
            commits = sorted(self.selected(Commit))
            commits[1:-1] = [] # Drop all but first and last
            (_, earlier), (_, later) = commits
        except (TypeError, ValueError):
            raise Recoverable(b"merge requires a selection set "
                              b"with at least two commits.")
        later.add_parent(earlier)
        #earlier_id = "%s (%s)" % (earlier.mark, earlier.branch)
        #later_id = "%s (%s)" % (later.mark, later.branch)
        #announce(b"%s added as a parent of %s" % (earlier_id, later_id))

    def help_unmerge(self):
        print(b"""
Linearizes a commit. Takes a selection set argument, which must resolve to a
single commit, and removes all its parents except for the first. It is
equivalent to reparent {first parent},{commit} rebase, where {commit} is the
selection set given to unmerge and {first parent} is a set resolving to that
commit's first parent, but doesn't need you to find the first parent yourself.
""" )
    def do_unmerge(self, _line):
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        try:
            if len(self.selection) != 1: raise ValueError()
            (_, commit), = self.selected(Commit)
        except (TypeError, ValueError):
            raise Recoverable(b"unmerge requires a single commit.")
        commit.set_parents(commit.parents()[:1])

    def help_reparent(self):
        print(b"""
Changes the parent list of a commit. Takes a selection set argument and an
optional policy argument. The selection set must resolve to exactly two
commits, the latest of which is the commit to modify, and the earliest is the
new first parent. All other parents links are cleared; if you want you can
recreate them with the 'merge' command.

By default, the manifest of the reparented commit is computed before
modifying it; a deleteall and fileops are prepended so that the
manifest stays unchanged even when the first parent has been
changed. Using the keyword 'rebase' as a third argument inhibits this
behavior - no deleteall is issued and the tree contents of all
descendents can be modified as a result.
""")
    def do_reparent(self, line):
        repo = self.chosen()
        if repo is None:
            complain(b"no repo has been chosen.")
            return
        try:
            if len(self.selection) != 2: raise ValueError()
            (_, parent), (_, child) = sorted(self.selected(Commit))
        except (TypeError, ValueError):
            raise Recoverable(b"reparent requires exactly two selected commits")
        if line and line != "rebase":
            raise Recoverable(b"unknown policy for reparent")
        if line != "rebase":
            # Recreate the state of the tree
            f = FileOp(repo)
            f.construct(b"deleteall")
            newops = [f]
            for (path, (mode, mark, inline)) in child.manifest().iteritems():
                f = FileOp(repo)
                f.construct(b"M", mode, mark, path)
                if mark == "inline":
                    f.inline = inline
                newops.append(f)
            newops.extend(child.operations())
            child.set_operations(newops)
        child.set_parents([parent])

    def help_branch(self):
        print(b"""
Rename or delete a branch (and any associated resets).  First argument
must be an existing branch name; second argument must one of the verbs
'rename' or 'delete'.

For a 'rename', the third argument may be any token that is a syntactically
valid branch name (but not the name of an existing branch). For a 'delete',
no third argument is required.

For either name, if it does not contain a '/' the prefix 'heads/'
is prepended. If it does not begin with 'refs/', 'refs/' is prepended.
""")
    def do_branch(self, line):
        "Rename a branch or delete it."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        (branchname, line) = RepoSurgeon.pop_token(line)
        if not "/" in branchname:
            branchname = 'refs/heads/' + branchname
        if branchname not in repo.branchset():
            raise Recoverable(b"no such branch as %s" % branchname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable(b"new branch name must be nonempty.")
            if not "/" in newname:
                newname = 'refs/heads/' + newname
            if newname in repo.branchset():
                raise Recoverable(b"there is already a branch named '%s'." \
                                  % newname)
            for event in repo:
                if isinstance(event, Commit):
                    if event.branch == branchname:
                        event.set_branch(newname)
                elif isinstance(event, Reset):
                    if event.ref == branchname:
                        event.ref = newname
        elif verb == "delete":
            repo.delete([i for i in range(len(repo.events)) if
                         (isinstance(repo.events[i], Reset) and repo.events[i].ref == branchname) \
                         or \
                         (isinstance(repo.events[i], Commit) and repo.events[i].branch == branchname)])
        else:
            raise Recoverable(b"unknown verb '%s' in branch command." % verb)

    def help_tag(self):
        print(b"""
Move, rename, or delete a tag.  First argument must be an existing
name referring to a tag object, lightweight tag, or reset; second
argument must be one of the verbs 'move', 'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token that is a
syntactically valid tag name (but not the name of an existing
tag). For a 'delete', no third argument is required.

The behavior of this command is complex because features which present
as tags may be any of three things: (1) True tag objects, (2)
lightweight tags, actually sequences of commits with a common
branchname beginning with 'refs/tags' - in this case the tag is
considered to point to the last commit in the sequence, (3) Reset
objects.  These may occur in combination; in fact, stream exporters
from systems with annotation tags commonly express each of these as a
true tag object (1) pointing at the tip commit of a sequence (2) in
which the basename of the common branch field is identical to the tag
name.  An exporter that generates lightweight-tagged commit sequences (2)
may or may not generate resets pointing at their tip commits.

This command tries to handle all combinations in a natural way by
doing up to three operations on any true tag, commit sequence, and
reset matching the source name. In a rename, all are renamed together.
In a delete, any matching tag or reset is deleted; then matching
branch fields are changed to match the branch of the unique descendent
of the tagged commit, if there is one.  When a tag is moved, no branch
fields are changed and a warning is issued.
""")
    def do_tag(self, line):
        "Move a tag to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        # A tag name can erfere to one of the following things:
        # (1) A tag object, by name
        # (2) A reset object having a name in the tags/ namespace
        # (3) The tip commit of a branch with branch fields
        # These things often occur in combination. Notably, git-fast-export
        # generates for each tag object corresponding branch labels on
        # some ancestor commmits - the rule for where this stops is unclear.
        (tagname, line) = RepoSurgeon.pop_token(line)
        tag = None
        resets = []
        commits = []
        fulltagname = Tag.branchname(tagname)
        for event in repo.events:
            if isinstance(event, Tag) and event.name == tagname:
                tag = event
            elif isinstance(event, Reset) and event.ref == fulltagname:
                resets.append(event)
            elif isinstance(event, Commit) and event.branch == fulltagname:
                commits.append(event)
        if not tag and not resets and not commits:
            raise Recoverable(b"no such tag as %s" % tagname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            self.set_selection_set(line)
            try:
                if len(self.selection) != 1: raise ValueError()
                (_, target), = self.selected(Commit)
            except (TypeError, ValueError):
                raise Recoverable(b"tag move requires a singleton commit set.")
            if tag:
                tag.forget()
                tag.remember(repo, target=target)
            if resets:
                if len(resets) == 1:
                    resets[0].committish = target.mark
                else:
                    complain(b"cannot move multiple tags.")
            if commits:
                complain(b"warning - tag move does not modify branch fields")
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable(b"new tag name must be nonempty.")
            if tag:
                for event in repo.events:
                    if isinstance(event, Tag) and event != tag and event.name == tag.name:
                        raise Recoverable(b"tag name collision, not renaming.")
                tag.name = newname
            fullnewname = Tag.branchname(newname)
            for reset in resets:
                reset.ref = fullnewname
            for event in commits:
                event.branch = fullnewname
        elif verb == "delete":
            if tag:
                tag.forget()
                repo.events.remove(tag)
                repo.declare_sequence_mutation(b"tag deletion")
            for reset in resets:
                reset.forget()
                repo.events.remove(reset)
                repo.declare_sequence_mutation(b"reset deletion")
            if commits:
                successors = {child.branch for child in commits[-1].children() if child.parents()[0] == commits[-1]}
                if len(successors) == 1:
                    successor = successors.pop()
                    for event in commits:
                        event.branch = successor
                else:
                    complain(b"couldn't determine a unique successor for %s at %s" % (tagname, commits[-1].id_me()))
        else:
            raise Recoverable(b"unknown verb '%s' in tag command." % verb)

    def help_reset(self):
        print(b"""
Move, rename, or delete a reset.  First argument must match an
existing reset name; second argument must be one of the verbs 'move',
'rename', or 'delete'.

For a 'move', a third argument must be a singleton selection set. For
a 'rename', the third argument may be any token that can be interpreted
as a valid reset name (but not the name of an existing
reset). For a 'delete', no third argument is required.

An argument matches a reset's name if it is either the entire
reference (refs/heads/FOO or refs/tags/FOO for some some value of FOO)
or the basename (e.g. FOO), or a suffix of the form heads/FOO or tags/FOO.
An unqualified basename is assumed to refer to a head.

When a reset is renamed, commit branch fields matching the tag are
renamed with it to match.  When a reset is deleted, matching branch
fields are changed to match the branch of the unique descendent of the
tip commit of the associated branch, if there is one.  When a reset is
moved, no branch fields are changed.
""")
    def do_reset(self, line):
        "Move a reset to point to a specified commit, or rename it, or delete it."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        (resetname, line) = RepoSurgeon.pop_token(line)
        if not "/" in resetname:
            resetname = "heads/" + resetname
        if not resetname.startswith(b"refs/"):
            resetname = "refs/" + resetname
        resets = [e for _,e in repo.iterevents(types=Reset)
                      if e.ref == resetname]
        if not resets:
            raise Recoverable(b"no such reset as %s" % resetname)
        (verb, line) = RepoSurgeon.pop_token(line)
        if verb == "move":
            if len(resets) == 1:
                reset = resets[0]
            else:
                raise Recoverable(b"can't move multiple resets")
            self.set_selection_set(line)
            reset.forget()
            try:
                if len(self.selection) != 1: raise ValueError()
                target, = self.selected(Commit)
            except (TypeError, ValueError):
                raise Recoverable(b"reset move requires a singleton commit set.")
            reset.forget()
            reset.remember(repo, target=target)
        elif verb == "rename":
            (newname, line) = RepoSurgeon.pop_token(line)
            if not newname:
                raise Recoverable(b"new reset name must be nonempty.")
            if newname.count(b"/") == 0:
                newname = "heads/" + newname
            if not newname.startswith(b"refs/"):
                newname = "refs/" + newname
            if any(r.ref == newname for _,r in repo.iterevents(types=Reset)) \
                    or any(c.branch == newname
                           for _,c in repo.iterevents(types=Commit)):
                raise Recoverable(b"reset reference collision, not renaming.")
            for reset in resets:
                reset.ref = newname
            for event in repo.iterevents(types=Commit):
                if event.branch == resetname:
                    event.branch = newname
        elif verb == "delete":
            tip = next((c for _,c in repo.iterevents(types=Commit)
                          if c.branch == resetname),
                       None)
            if tip and len(tip.children()) == 1:
                successor = tip.children()[0].branch
                for event in repo.iterevents(types=Commit):
                    if event.branch == resetname:
                        event.branch = successor
            for reset in resets:
                reset.forget()
                repo.events.remove(reset)
            repo.declare_sequence_mutation(b"reset delete")
        else:
            raise Recoverable(b"unknown verb '%s' in reset command." % verb)

    def help_ignores(self):
        print(b"""Intelligent handling of ignore-pattern files.
This command fails if no repository has been selected or no preferred write
type has been set for the repository.  It does not take a selection set.

If the rename modifier is present, this command attempts to rename all
ignore-pattern files to whatever is appropriate for the preferred type
- e.g. .gitignore for git, .hgignore for hg, etc.  This option does not
cause any translation of the ignore files it renames.

If the translate modifier is present, syntax translation of each ignore
file is attempted. At present, the only transformation the code knows
is to prepend a 'syntax: glob' header if the preferred type is hg.

If the defaults modifier is present, the command attempts to prepend
these default patterns to all ignore files. If no ignore file is
created by the first commit, it will be modified to create one
containing the defaults.  This command will error out on prefer types
that have no default ignore patterns (git and hg, in particular).  It
will also error out when it knows the import tool has already set
default patterns.
""")
    def do_ignores(self, line):
        "Manipulate ignore patterns in the repo."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        if self.preferred and not self.ignorename:
            self.ignorename = self.preferred.ignorename
        if not self.preferred:
            raise Recoverable(b"preferred repository type has not been set")
        if not self.ignorename:
            raise Recoverable(b"preferred repository type has no declared ignorename")
        def isignore(blob):
            return len(blob.pathlist) \
                       and all(x.endswith(self.ignorename) for x in blob.pathlist)
        for verb in line.split():
            if verb == 'defaults':
                if "import-defaults" in self.preferred.styleflags:
                    raise Recoverable(b"importer already set default ignores")
                elif not self.preferred.dfltignores:
                    raise Recoverable(b"no default ignores in %s" % self.preferred.name)
                else:
                    changecount = 0
                    # Modify existing ignore files
                    for (_, blob) in repo.iterevents(indices=None, types=(Blob,)):
                        if isignore(blob):
                            blob.set_content(self.preferred.dfltignores \
                                         + blob.get_content())
                            changecount += 1
                    # Create an early ignore file if required.
                    # Don't move this before the modification pass!
                    earliest = repo.earliest_commit()
                    if not [fileop for fileop in earliest.operations() if fileop.op == "M" and fileop.path.endswith(self.ignorename)]:
                        blob = Blob(repo)
                        blob.pathlist.append(self.ignorename)
                        blob.set_content(self.preferred.dfltignores)
                        blob.mark = ":insert"
                        repo.events.insert(repo.index(earliest), blob)
                        repo.declare_sequence_mutation(b"ignore creation")
                        newop = FileOp(self.chosen())
                        newop.construct(b"M", 0o100644, ":insert", self.ignorename)
                        earliest.append_operation(newop)
                        repo.renumber()
                        announce(b"initial %s created." % self.ignorename)
                announce(b"%d %s blobs modified." % (changecount, self.ignorename))
            elif verb == 'rename':
                changecount = 0
                for (_, event) in repo.iterevents(indices=None, types=(Commit,)):
                    for fileop in event.operations():
                        for attr in (b"path", "source", "target"):
                            if hasattr(fileop, attr):
                                oldpath = getattr(fileop, "path")
                                if oldpath and oldpath.endswith(self.ignorename):
                                    newpath = os.path.join(os.path.dirname(oldpath),
                                                       self.preferred.ignorename)
                                    setattr(fileop, attr, newpath)
                                    changecount += 1
                                    if fileop.op == "M":
                                        blob = repo.objfind(fileop.ref)
                                        if blob.pathlist[0] == oldpath:
                                            blob.pathlist[0] = newpath
                announce(b"%d ignore files renamed (%s -> %s)."
                         % (changecount,
                            self.ignorename,
                            self.preferred.ignorename))
                self.ignorename = self.preferred.ignorename
            elif verb == 'translate':
                changecount = 0
                for (_, blob) in repo.iterevents(indices=None, types=(Blob,)):
                    if isignore(blob):
                        if self.preferred.name == "hg":
                            if not blob.get_content().startswith(b"syntax: glob\n"):
                                blob.set_content(b"syntax: glob\n" + blob.get_content())
                                changecount += 1
                announce(b"%d %s blobs modified." % (changecount, self.ignorename))
            else:
                raise Recoverable("unknown verb %s in ignores line" % verb)
    #
    # Artifact removal
    #
    def help_authors(self):
        print(b"""
Apply or dump author-map information for the specified selection
set, defaulting to all events.

Lifts from CVS and Subversion may have only usernames local to
the repository host in committer and author IDs. DVCSes want email
addresses (net-wide identifiers) and complete names. To supply the map
from one to the other, an authors file is expected to consist of
lines each beginning with a local user ID, followed by a '=' (possibly
surrounded by whitespace) followed by a full name and email address.

When an authors file is applied, email addresses in committer and author
metdata for which the local ID matches between &lt; and @ are replaced
according to the mapping (this handles git-svn lifts). Alternatively,
if the local ID is the entire address, this is also considered a match
(this handles what git-cvsimport and cvs2git do)

With the 'read' modifier, or no modifier, apply author mapping data
(from standard input or a <-redirected input file).  May be useful if
you are editing a repo or dump created by cvs2git or by
cvs-fast-export or git-svn invoked without -A.

With the 'write' modifier, write a mapping file that could be
interpreted by 'authors read', with entries for each unique committer,
author, and tagger (to standard output or a >-redirected file). This
may be helpful as a start on building an authors file, though each
part to the right of an equals sign will need editing.
""")
    def do_authors(self, line):
        "Apply or dump author-mapping file."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if self.selection is None:
            self.selection = self.chosen().all()
        if line.startswith(b"write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable(b"authors write no longer takes a filename argument - use > redirection instead")
                self.chosen().write_authormap(self.selection, parse.stdout)
        else:
            if line.startswith(b"read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable(b"authors read no longer takes a filename argument - use < redirection instead")
                self.chosen().read_authormap(self.selection, parse.stdin)

    #
    # Reference lifting
    #
    def help_legacy(self):
        print(b"""
Apply or list legacy-reference information. Does not take a
selection set. The 'read' variant reads from standard input or a
<-redirected filename; the 'write' variant writes to standard
output or a >-redirected filename.
""")
    def do_legacy(self, line):
        "Apply a reference-mapping file."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        if line.startswith(b"write"):
            line = line[5:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                if parse.tokens():
                    raise Recoverable(b"legacy write does not take a filename argument - use > redirection instead")
                self.chosen().write_legacymap(parse.stdout)
        else:
            if line.startswith(b"read"):
                line = line[4:].strip()
            with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
                if parse.tokens():
                    raise Recoverable(b"legacy read does not take a filename argument - use < redirection instead")
                self.chosen().read_legacymap(parse.stdin)

    def help_references(self):
        print(b"""
With the 'list' modifier, produces a listing of events that may have
Subversion or CVS commit references in them.  This version
of the command supports >-redirection.  Equivalent to '=N list'.

With the modifier 'edit', edit this set. Equivalent to '=N edit'.

With the modifier 'lift', transform commit-reference cookies from CVS
and Subversion into action stamps.  This command expects cookies
consisting of the leading string '[[', followed by a VCS identifier
(currently SVN or CVS) followed by VCS-dependent information, followed
by ']]'. An action stamp pointing at the corresponding commit is
substituted when possible.  Enables writing of the legacy-reference
map when the repo is written or rebuilt.
""")
    def do_references(self, line):
        "Look for things that might be CVS or Subversion revision references."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = self.chosen().all()
        if "lift" in line:
            hits = 0
            def substitute(getter, matchobj):
                payload = matchobj.group(0)[2:-2]
                commit = getter(payload)
                if commit is None:
                    complain(b"no commit matches " + repr(payload))
                    return matchobj.group(0) # no replacement
                elif commit:
                    text = commit.action_stamp()
                    return text
                else:
                    complain(b"cannot resolve %s" % payload)
                    return matchobj.group(0) # no replacement
            for (regexp, getter) in \
                    ((r"CVS:[^:\]]+:[0-9.]+",
                      lambda p: repo.legacy_map.get(p) or repo.dollar_map.get(p)),
                     (b"SVN:[0-9]+",
                      lambda p: repo.legacy_map.get(p) or repo.dollar_map.get(p)),
                     (b":[0-9]+",
                      lambda p: repo.objfind(p)),
                     ):
                match_re = re.compile(re.escape(b"[[")+regexp+re.escape(b"]]"))
                for _, event in self.selected():
                    if isinstance(event, (Commit, Tag)):
                        event.comment, new_hits = match_re.subn(
                            lambda m: substitute(getter, m),
                            event.comment)
                        hits += new_hits
            announce(b"%d references resolved." % hits)
            repo.write_legacy = True
        else:
            self.selection = [e for e in range(len(repo.events)) if self.has_reference(repo.events[e])]
            if self.selection:
                if line.startswith(b"edit"):
                    self.edit(self.selection, line[4:].strip())
                else:
                    with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
                        for ei in self.selection:
                            event = repo.events[ei]
                            if hasattr(event, "lister"):
                                summary = event.lister(None, ei, screenwidth())
                                if summary:
                                    parse.stdout.write(summary + b"\n")

    #
    # Examining tree states
    #
    def help_checkout(self):
        print(b"""
Check out files for a specified commit into a directory.  The selection
set must resolve to a singleton commit.
""")
    def do_checkout(self, line):
        "Check out files for a specified commit into a directory."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = self.chosen().all()
        if not line:
            raise Recoverable(b"no target directory specified.")
        if len(self.selection) == 1:
            commit = repo.events[self.selection[0]]
            if not isinstance(commit, Commit):
                raise Recoverable(b"not a commit.")
        else:
            raise Recoverable(b"a singleton selection set is required.")
        commit.checkout(line)

    def help_diff(self):
        print(b"""
Display the difference between commits. Takes a selection-set argument which
must resolve to exactly two commits. Supports > redirection.
""")
    def do_diff(self,line):
        "Display a diff between versions."
        if self.chosen() is None:
            complain(b"no repo has been chosen.")
            return
        repo = self.chosen()
        if self.selection is None:
            self.selection = self.chosen().all()
        bounds = tuple(repo.events[i] for i in sorted(self.selection))
        if len(self.selection) != 2 or \
               not isinstance(bounds[0], Commit) or \
               not isinstance(bounds[1], Commit):
            raise Recoverable(b"a pair of commits is required.")
        dir1 = set(bounds[0].manifest())
        dir2 = set(bounds[1].manifest())
        allpaths = list(dir1 | dir2)
        allpaths.sort()
        with RepoSurgeon.LineParse(line, capabilities=["stdout"]) as parse:
            for path in allpaths:
                if path in dir1 and path in dir2:
                    # FIXME: Can we detect binary files and do something
                    # more useful here?
                    fromtext = bounds[0].blob_by_name(path)
                    totext = bounds[1].blob_by_name(path)
                    # Don't list identical files
                    if fromtext != totext:
                        lines0 = fromtext.split('\n')
                        lines1 = totext.split('\n')
                        file0 = path + " (" + bounds[0].mark + ")"
                        file1 = path + " (" + bounds[1].mark + ")"
                        for line in difflib.unified_diff(lines0, lines1,
                                                         fromfile=file0,
                                                         tofile=file1,
                                                         lineterm=""):
                            parse.stdout.write(line + "\n")
                elif path in dir1:
                    parse.stdout.write("%s: removed\n" % path)
                elif path in dir2:
                    parse.stdout.write("%s: added\n" % path)
                else:
                    raise Recoverable("internal error - missing path in diff")

    #
    # Setting paths to branchify
    #
    def help_branchify(self):
        print(b"""
Specify the list of directories to be treated as potential branches (to
become tags if there are no modifications after the creation copies)
when analyzing a Subversion repo. This list is ignored when reading
with the --nobranch option.  It defaults to the 'standard layout'
set of directories, plus any unrecognized directories in the
repository root.

With no arguments, displays the current branchification set.

An asterisk at the end of a path in the set means 'all immediate
subdirectories of this path, unless they are part of another (longer)
path in the branchify set'.

Note that the branchify set is a property of the reposurgeon interpreter, not
of any individual repository, and will persist across Subversion
dumpfile reads. This may lead to unexpected results if you forget
to re-set it.
""")
    def do_branchify(self, line):
        if self.selection is not None:
            raise Recoverable(b"branchify does not take a selection set")
        if line.strip():
            global_options['svn_branchify'] = line.strip().split()
        announce(b"branchify " + " ".join(global_options['svn_branchify']))
    #
    # Setting branch name rewriting
    #
    def help_branchify_map(self):
        print(b"""
Specify the list of regular expressions used for mapping the svn branches that
are detected by branchify. If non of the expressions match the default behaviour
applies. Which maps a branch to the name of the last directory, except for trunk
and '*' which are mapped to master and root.

With no arguments the current regex replacement pairs are shown. Passing 'reset'
will clear the reset mapping.

Syntax: branchify_map /regex1/branch1/ /regex2/branch2/ ...

Will match each branch name against regex1 and if it matches rewrite its branch
name to branch1. If not it will try regex2 and so forth until it either found a
matching regex or there are no regexs left. The regular expressions should be in
python's format (see http://docs.python.org/2/library/re.html). The branch name
can use backreferences (see the sub function in the python documentation).

Note that the regular expressions are appended to 'refs/' without either the
needed 'heads/' or 'tags/'. This allows for choosing the right kind of branch
type.

While the syntax template above uses slashes, any first character will
be used as a delimeter (and you will need to use a different one in the
common case that the paths contain slashes).

Note that the branchify_map set is a property of the reposurgeon interpreter,
not of any individual repository, and will persist across Subversion
dumpfile reads. This may lead to unexpected results if you forget
to re-set it.
""")
    def do_branchify_map(self, line):
        if self.selection is not None:
            raise Recoverable(b"branchify_map does not take a selection set")
        line = line.strip()
        if line == "reset":
            global_options['svn_branchify_mapping'] = []
        elif line:
            def split_regex(regex):
                separator = regex[0]
                if separator is not regex[-1]:
                    raise Recoverable(b"Regex '%s' did not end with separator character" % regex)
                match, _, replace = regex[1:-1].partition(separator)
                if not replace or not match:
                    raise Recoverable(b"Regex '%s' has an empty search or replace part" % regex)
                return match ,replace
            global_options['svn_branchify_mapping'] = \
                    map(split_regex, line.split())
        if global_options['svn_branchify_mapping']:
            announce(b"branchify_map, regex -> branch name:")
            for match, replace in global_options['svn_branchify_mapping']:
                announce( "\t" + match + " -> " + replace)
        else:
            announce("branchify_map is empty.")

    #
    # Setting options
    #
    def help_set(self):
        print(b"""
Set a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags and options. The following flags and
options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_set(self, line):
        if not line.strip():
            for (opt, _expl) in RepoSurgeon.OptionFlags:
                print(b"\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain(b"no such option flag as '%s'" % option)
                else:
                    global_options[option] = True
    def help_clear(self):
        print(b"""
Clear a boolean option to control reposurgeon's behavior.   With no arguments,
displays the state of all flags. The following flags and options are defined:
""")
        for (opt, expl) in RepoSurgeon.OptionFlags:
            print(opt + ":\n" + expl)
    def do_clear(self, line):
        if not line.strip():
            for opt in dict(RepoSurgeon.OptionFlags):
                print(b"\t%s = %s" % (opt, global_options.get(opt, False)))
        else:
            for option in line.split():
                if option not in dict(RepoSurgeon.OptionFlags):
                    complain(b"no such option flag as '%s'" % option)
                else:
                    global_options[option] = False

    #
    # Macros and custom extensions
    #
    def help_define(self):
        print(b"""
Define a macro.  The first whitespace-separated token is the name; the
remainder of the line is the body, unless it is '{', which begins a
multi-line macro terminated by a line beginning with '}'.

A later 'do' call can invoke this macro.

'define' by itself without a name or body produces a macro list.
""")
    def do_define(self, line):
        "Define a macro"
        try:
            name = line.split()[0]
        except IndexError:
            name = line.strip()
        body = line[len(name):].strip()
        if not body:
            for (name, body) in self.definitions.items():
                if len(body) == 1:
                    sys.stdout.write(b"define %s %s\n" % (name, body[0]))
                else:
                    sys.stdout.write(b"define %s {\n" % name)
                    for line in body:
                        sys.stdout.write(line)
                    sys.stdout.write(b"}\n")
        elif body[0] != '{':
            self.definitions[name] = [body]
        else:
            self.capture = self.definitions[name] = []

    def help_do(self):
        print(b"""
Expand and perform a macro.  The first whitespace-separated token is
the name of the macro to be called; remaining tokens replace {0},
{1}... in the macro definition (the conventions used are those of the
Python format method). Tokens may contain whitespace if they are
string-quoted; string quotes are stripped. Macros can call macros.
If the macro expansion does not itself begin with a selection set,
whatever set was specified before the 'do' keyword is available to
the command generated by the expansion.
""")
    def do_do(self, line):
        "Do a macro."
        try:
            name = line.split()[0]
        except IndexError:
            complain(b"no macro name was givenn.")
            return
        if name not in self.definitions:
            raise Recoverable(b"'%s' is not a defined macro" % name)
        try:
            args = shlex.split(line[len(name):])
        except ValueError as e:
            raise Recoverable(b"macro parse failed, %s" % e)
        do_selection = self.selection
        for line in self.definitions[name]:
            try:
                line = line.format(*args)
            except IndexError:
                raise Recoverable(b"macro argument error")
            # If a leading portion of the expansion body is a selection
            # expression, use it.  Otherwise we'll restore whatever
            # selection set came before the do keyword.
            expansion = self.precmd(line)
            if self.selection is None:
                self.selection = do_selection
            # Call the base method so RecoverableExceptions
            # won't be caught; we want them to abort macros.
            self.onecmd(expansion)
    def help_undefine(self):
        print(b"""
Undefine the macro named in this command's first argument.
""")
    def do_undefine(self, line):
        try:
            name = line.split()[0]
        except IndexError:
            complain(b"no macro name was givenn.")
            return
        if name not in self.definitions:
            raise Recoverable(b"'%s' is not a defined macro" % name)
        else:
            del self.definitions[name]

    def help_exec(self):
        print(b"""
Execute custom code from standard input (normally a file via < redirection).

Use this to set up custom extension functions for later calls. The
code has full access to all internal data structures. Functions
defined are accessible to later 'eval' calls.
""")
    def do_exec(self, line):
        "Execute custom python code."
        with RepoSurgeon.LineParse(line, capabilities=["stdin"]) as parse:
            try:
                # The additional args are required to make the function
                # visible to a later eval.
                execfile(parse.stdin.name, locals(), globals())
            except SyntaxError as e:
                raise Recoverable(b"extension function - %s\n%s" % (e, e.text))
            except IOError:
                raise Recoverable(b"I/O error, can't find or open input source")

    def help_eval(self):
        print(b"""
Evaluate a line of code in the current interpreter context.
Typically this will be a call to a function defined by a previous exec.
The variables '_repository' and '_selection' will have the obvious values.
Note that '_selection' will be a list of integers, not objects.
""")
    def do_eval(self, line):
        "Call a function from custom python code."
        if self.selection is None:
            print(b"no selection")
        else:
            _selection = self.selection
            _repository = self.chosen()
            try:
                eval(line)
            except (NameError, SyntaxError) as e:
                raise Recoverable(str(e))

    #
    # Version binding
    #
    def help_version(self):
        print(b"""
With no argument, display the reposurgeon version and supported VCSes.
With argument, declare the major version (single digit) or full
version (major.minor) under which the enclosing script was developed.
The program will error out if the major version has changed (which
means the surgical language is not backwards compatible).
""")
    def do_version(self, line):
        if not line:
            announce(b"reposurgeon " + version + " supporting " + " ".join(x.name for x in (vcstypes+extractors)))
        else:
            (vmajor, _) = version.split(b".")
            if '.' in line:
                try:
                    (major, _) = line.strip().split(b".")
                except ValueError:
                    complain(b"invalid version.")
                    return
            else:
                major = line.strip()
            if major != vmajor:
                raise Fatal(b"major version mismatch, aborting.")
            elif verbose > 0:
                announce(b"version check passed.")

    #
    # Exiting (in case EOT has been rebound)
    #
    def help_exit(self):
        print(b"""
Exit the program cleanly, emitting a goodbye message.

Typing EOT (usually Ctrl-D) will exit quietly.
""")
    def do_exit(self, _line):
        announce(b"exiting, elapsed time %d sec." % (time.time() - self.start_time))
        sys.exit(0)

    #
    # Prompt customization
    #
    def help_prompt(self):
        print(b"""
Set the command prompt format to the value of the command line; with
an empty command line, display it. The prompt format is evaluated in Python
after each command with the following dictionary substitutions:

chosen: The name of the selected repository, or None if none currently selected.

Thus, one useful format might be 'rs[%(chosen)s]%% '

More format items may be added in the future.  The default prompt corresponds
to the format 'reposurgeon%% '. The format line is evaluated with shell quotng
of tokens, so that spaces can be included.
""")
    def do_prompt(self, line):
        if line:
            self.prompt_format = " ".join(shlex.split(line))
        else:
            print("prompt = %s" % self.prompt_format)

    #
    # Running unit tests (undocumented)
    #
    def help_runtests(self):
        print(b"""
Runs the unit tests and reports the results.
""")
    def do_runtests(self, line):
        def runtest(name):
            result = unittest.TextTestRunner().run(unittest.defaultTestLoader.loadTestsFromTestCase(globals()[name]))
            if not result.wasSuccessful():
                raise Recoverable(b"unit tests failed")
        available = ["DateTests"]
        if line:
            if line in available:
                runtest(line)
            else:
                complain(b"no test class known as '%s'" % line)
        else:
            for name in available:
                runtest(name)

def main():
    # Increase max stack size from 8MB to 512MB
    # Needed to handle really large repositories.
    try:
        sys.setrecursionlimit(10**6)
        import resource
        resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
    except ImportError:
        # Don't fail to start if 'resource' isn't available
        pass
    except ValueError:
        # May not be allowed on some systems.  Whether or not we can do it
        # isn't interesting, it only matters whether the limit is actually
        # blown.
        pass
    try:
        def interactive():
            global verbose
            interpreter.use_rawinput = True
            if verbose == 0:
                verbose = 1
            interpreter.cmdloop()
            interpreter.use_rawinput = False
        interpreter = RepoSurgeon()
        interpreter.use_rawinput = False
        if not sys.argv[1:]:
            sys.argv.append(b"-")
        try:
            for arg in sys.argv[1:]:
                for arg in arg.split(b";"):
                    if arg == '-':
                        if interpreter.profile_log is None:
                            interactive()
                        elif interpreter.profile_log:
                            cProfile.run(b'interactive()', \
                                         interpreter.profile_log)
                        else:
                            cProfile.run(b'interactive()')
                    else:
                        # A minor concession to people used to GNU conventions.
                        # Makes "reposurgeon --help" and "reposurgeon --version"
                        # work as expected.
                        if arg.startswith(b"--"):
                            arg = arg[2:]
                        # Call the base method so RecoverableExceptions
                        # won't be caught; we want them to abort scripting.
                        cmd.Cmd.onecmd(interpreter, interpreter.precmd(arg))
        finally:
            interpreter.cleanup()
    except (Recoverable, Fatal) as xe:
        complain(xe.msg)
        sys.exit(1)
    except KeyboardInterrupt:
        print(b"")

if __name__ == '__main__':
    main()

# The following sets edit modes for GNU EMACS
# Local Variables:
# mode:python
# End:
# end
