diff --git a/cvs2svn_lib/checkout_internal.py b/cvs2svn_lib/checkout_internal.py index 8fdc132c9..6ce437612 100644 --- a/cvs2svn_lib/checkout_internal.py +++ b/cvs2svn_lib/checkout_internal.py @@ -102,7 +102,10 @@ class TextRecord(object): - """Bookkeeping data for the text of a single CVSRevision.""" + """Bookkeeping data for the text of a single CVSRevision. + + The text is stored as the list of logical lines manipulated + by RCSStream.""" __slots__ = ['id', 'refcount'] @@ -134,15 +137,23 @@ def decrement_refcount(self, text_record_db): if self.refcount == 0: text_record_db.discard(self.id) - def checkout(self, text_record_db): + def checkout_as_lines(self, text_record_db): """Workhorse of the checkout process. - Return the text for this revision, decrement our reference count, - and update the databases depending on whether there will be future - checkouts.""" + Return the text for this revision as a list of logical lines, + decrement our reference count, and update the databases depending + on whether there will be future checkouts.""" raise NotImplementedError() + def checkout(self, text_record_db): + """Return the text for this revision. + + Just as checkout_as_lines(), but returns the text as a flat text + string.""" + + return "".join(self.checkout_as_lines(text_record_db)) + def free(self, text_record_db): """This instance will never again be checked out; free it. @@ -158,7 +169,7 @@ class FullTextRecord(TextRecord): These records are used for revisions whose fulltext was determined by the InternalRevisionCollector during FilterSymbolsPass. The fulltext for such a revision is is stored in the delta_db as a - single string.""" + list of logical lines manipulated by RCSStream.""" __slots__ = [] @@ -168,10 +179,10 @@ def __getstate__(self): def __setstate__(self, state): (self.id, self.refcount,) = state - def checkout(self, text_record_db): - text = text_record_db.delta_db[self.id] + def checkout_as_lines(self, text_record_db): + lines = text_record_db.delta_db[self.id] self.decrement_refcount(text_record_db) - return text + return lines def free(self, text_record_db): del text_record_db.delta_db[self.id] @@ -205,26 +216,26 @@ def __setstate__(self, state): def increment_dependency_refcounts(self, text_record_db): text_record_db[self.pred_id].refcount += 1 - def checkout(self, text_record_db): - base_text = text_record_db[self.pred_id].checkout(text_record_db) - rcs_stream = RCSStream(base_text) + def checkout_as_lines(self, text_record_db): + base_lines = text_record_db[self.pred_id].checkout_as_lines(text_record_db) + rcs_stream = RCSStream(base_lines) delta_text = text_record_db.delta_db[self.id] rcs_stream.apply_diff(delta_text) - text = rcs_stream.get_text() + lines = rcs_stream.get_lines() del rcs_stream self.refcount -= 1 if self.refcount == 0: - # This text will never be needed again; just delete ourselves + # This lines will never be needed again; just delete ourselves # without ever having stored the fulltext to the checkout # database: del text_record_db[self.id] else: # Store a new CheckedOutTextRecord in place of ourselves: - text_record_db.checkout_db['%x' % self.id] = text + text_record_db.checkout_db['%x' % self.id] = lines new_text_record = CheckedOutTextRecord(self.id) new_text_record.refcount = self.refcount text_record_db.replace(new_text_record) - return text + return lines def free(self, text_record_db): del text_record_db.delta_db[self.id] @@ -241,7 +252,8 @@ class CheckedOutTextRecord(TextRecord): These records are used for revisions whose fulltext has been computed already during OutputPass. The fulltext for such a - revision is stored in the text_record_db as a single string.""" + revision is stored in the text_record_db as a list of logical + lines manipulated by RCSStream.""" __slots__ = [] @@ -251,10 +263,10 @@ def __getstate__(self): def __setstate__(self, state): (self.id, self.refcount,) = state - def checkout(self, text_record_db): - text = text_record_db.checkout_db['%x' % self.id] + def checkout_as_lines(self, text_record_db): + lines = text_record_db.checkout_db['%x' % self.id] self.decrement_refcount(text_record_db) - return text + return lines def free(self, text_record_db): del text_record_db.checkout_db['%x' % self.id] @@ -533,7 +545,7 @@ def set_revision_info(self, revision, log, text): # This is revision 1.1. Write its fulltext: text_record = FullTextRecord(cvs_rev_id) self.revision_collector._writeout( - text_record, self._rcs_stream.get_text() + text_record, self._rcs_stream.get_lines() ) # There will be no more trunk revisions delivered, so free the diff --git a/cvs2svn_lib/rcs_stream.py b/cvs2svn_lib/rcs_stream.py index 0963956dc..c4ad79a07 100644 --- a/cvs2svn_lib/rcs_stream.py +++ b/cvs2svn_lib/rcs_stream.py @@ -174,13 +174,21 @@ class RCSStream: def __init__(self, text): """Instantiate and initialize the file content with TEXT.""" - self.set_text(text) + if isinstance(text, bytes): + self.set_text(text) + else: + self.set_lines(text) def get_text(self): """Return the current file content.""" return "".join(self._lines) + def get_lines(self): + """Return the current file content as list of logical lines.""" + + return self._lines + def set_lines(self, lines): """Set the current contents to the specified LINES. diff --git a/run-tests.py b/run-tests.py index 7eaee0858..dd7594d85 100755 --- a/run-tests.py +++ b/run-tests.py @@ -3345,6 +3345,40 @@ def internal_co_keywords(): raise Failure() +@Cvs2SvnTestFunction +def internal_co_broken_rcsfile(): + "check if internal co can handle broken delta" + + # This is a test for GitHub issue 18. + + # Check if internal co can handle broken delta which inserts unterminated + # line other than after last line. Although it does not seem such a + # "broken" RCS file could be generated by normal RCS/CVS operation, + # RCS/CVS can handle it. + + ### TODO: Once an unterminated line is inserted in a middle of lines + ### in line buffer, there can be more critical situations than one + ### that the irregular.txt,v has. So we need to add such test data. + + rcs_conv = ensure_conversion( + 'gh-issue-18-irregular-rcsfile', + args=['--use-rcs', '--default-eol=native'], + dumpfile='broken-rcsfile-rcs.dump', + ) + conv = ensure_conversion( + 'gh-issue-18-irregular-rcsfile', + args=['--default-eol=native'], + dumpfile='broken-rcsfile-int.dump', + ) + if conv.output_found(r'WARNING\: internal problem\: leftover revisions'): + raise Failure() + rcs_lines = list(open(rcs_conv.dumpfile, 'rb')) + lines = list(open(conv.dumpfile, 'rb')) + # Compare all lines following the repository UUID: + if lines[3:] != rcs_lines[3:]: + raise Failure() + + @Cvs2SvnTestFunction def timestamp_chaos(): "test timestamp adjustments" @@ -4237,10 +4271,11 @@ def vendor_1_1_not_root(): internal_co_exclude, internal_co_trunk_only, internal_co_keywords, + internal_co_broken_rcsfile, leftover_revs, requires_internal_co, - timestamp_chaos, # 140: + timestamp_chaos, symlinks, empty_trunk_path, preferred_parent_cycle, @@ -4250,8 +4285,8 @@ def vendor_1_1_not_root(): add_on_branch, main_git, main_git2, - main_git_merged, # 150: + main_git_merged, main_git2_merged, git_options, main_hg, @@ -4261,8 +4296,8 @@ def vendor_1_1_not_root(): EOLVariants('LF'), EOLVariants('CR'), EOLVariants('CRLF'), - EOLVariants('native'), # 160: + EOLVariants('native'), no_revs_file, mirror_keyerror_test, exclude_ntdb_test, @@ -4272,8 +4307,8 @@ def vendor_1_1_not_root(): missing_deltatext, transform_unlabeled_branch_name, ignore_unlabeled_branch, - exclude_unlabeled_branch, # 170: + exclude_unlabeled_branch, unlabeled_branch_name_collision, collision_with_unlabeled_branch_name, many_deletes, @@ -4283,8 +4318,8 @@ def vendor_1_1_not_root(): exclude_symbol_default, add_on_branch2, branch_from_vendor_branch, - strange_default_branch, # 180: + strange_default_branch, move_parent, log_message_eols, missing_vendor_branch, diff --git a/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v new file mode 100644 index 000000000..c6a7b2471 --- /dev/null +++ b/test-data/gh-issue-18-irregular-rcsfile-cvsrepos/irregular.txt,v @@ -0,0 +1,122 @@ +head 1.6; +access; +symbols; +locks; strict; +comment @# @; + + +1.6 +date 2021.12.21.11.21.57; author futatuki; state Exp; +branches; +next 1.5; + +1.5 +date 2021.11.26.04.16.45; author futatuki; state Exp; +branches; +next 1.4; + +1.4 +date 2021.11.26.04.14.23; author futatuki; state Exp; +branches; +next 1.3; + +1.3 +date 2021.11.26.03.46.06; author futatuki; state Exp; +branches; +next 1.2; + +1.2 +date 2021.11.26.03.41.39; author futatuki; state Exp; +branches + 1.2.1.1; +next 1.1; + +1.1 +date 2021.11.26.03.34.28; author futatuki; state Exp; +branches; +next ; + +1.2.1.1 +date 2021.12.21.11.44.31; author futatuki; state Exp; +branches; +next ; + + +desc +@create a new file irregular.txt +@ + + +1.6 +log +@r1.6: +@ +text +@aaa +bbb +bc +cc +hhh@ + + +1.5 +log +@r1.5: +@ +text +@d3 1 +a3 1 +c@ + + +1.4 +log +@r1.4: +@ +text +@d5 1 +a5 1 +ggg@ + + +1.3 +log +@r1.3: s/eee/fff/ +@ +text +@d4 1 +a4 1 +fff@ + + +1.2 +log +@r1.2: s/ddd/eee/ +@ +text +@d4 1 +a4 1 +eee@ + + +1.2.1.1 +log +@rev:1.2.1.1 +@ +text +@d4 1 +a4 2 +ccc +eeeggg +@ + + +1.1 +log +@r1.1 +@ +text +@d4 1 +a4 1 +ddd@ +