From 5ceb868bfb690190557c083518053da7dd5a7510 Mon Sep 17 00:00:00 2001 From: Mitchell Blank Jr Date: Mon, 14 Jan 2019 00:59:13 +0000 Subject: [PATCH] Replace std::vector; also fix clang C++11 In my application I need to sort a vector of pointers. I was profiling timsort and was surprised at how much time was being spent in copy_to_tmp() This method was implemented as: tmp_.clear(); tmp_.reserve(len); GFX_TIMSORT_MOVE_RANGE(begin, begin + len, std::back_inserter(tmp_)); Unfortunately this performs badly on simple types like pointers. THe GFX_TIMSORT_MOVE_RANGE() macro itself can reduce to a single memmove() but using std::back_inserter (which is required for move-only types) breaks this optimization. Instead of a nice SSE-optimized memory copy you end up with an element-by-element loop with a vector capacity check each time. As an experiment I did some metaprogramming so that trivially_constructable types would just do: tmp_.assign(begin, begin + len); This basiscally fixed the problem, but it's still not *quite* perfect, since the non-trivial case still is doing extra capacity checks that aren't required. I did a more aggressive fix that replaced the std::vector use entirely with a custom datastructure (since we don't really need a general purpose vector here, just a managed array) which bought another couple percent in speed. First I had to make two prepratory changes, though: 1. The C++11 support was broken on libc++ (which is the default STL for most clang installs, including Xcode) The changes @mattreecebentley made to auto-detect C++11 were mostly good but they specifically rejected anything with "_LIBCPP_VERSION" set. Not sure why. Rather than one large binary expression I instead used a larger (but hopefully easier to understand) ifdef decision tree. This can also be overridden on the command-line with -DGFX_TIMSORT_USE_CXX11=[0|1] As before we default to using C++11 constructs unless we see some evidence that it won't work. However we now let modern versions of clang/libc++ pass. 2. The parts of the "TimSort" class that don't very based on LessFunction are now in their own set of classes such as "TimSortState" This is partially just a cleanup I needed to make some template metaprogramming less gross. However, it's a good idea in any case. It's not unusual for a program to need to sort a type of data multiple ways, which means expanding the "TimSort" multiple times. With this change, the compiler can reuse the expansion of "TimSortState" between them. If nothing else, this should compile faster. Now with that out of the way, I could get to the meat of the change: replacing the "std::vector tmp_;" with a custom "TimSortMergeSpace<>" type. This class allocates itself like a vector, but the only "setter" is a "move_in()" method that replaces its contents via move construction (similar to what the std::back_inserter loop was doing) We don't construct elements before they're needed (even if we allocated them) so it will work even for non-default-constructable types. The move-loop is faster than before since we don't need to re-check for capacity at every insertion. However, on C++11 we do even better: we use template-specialization to provide an alternate implementation of this data type for types that pass std::is_trivially_copyable<>. The big advantage is that we can just use std::memcpy() to refill the merge buffer. The code is also simpler in general since we don't need to worry about construction/destruction of the buffer elements. Since a lot of the overall cost of the timsort algorithm is spent merging, making this data structure as fast as possible is important. This change makes soring randomized sequences about 10% faster when working with trivially-copyable types. While I was there I also replaced the "std::vector pending_" with my own "TimSortRunStack<>" type. This doesn't have the same importance for performance, but it's another place where we don't really need the full STL vector support... just a simple resizable stack. Since I was replacing vector I thought it was more consistent to just replace both. This also removes the header depenedncy on RESULTS: "make bench" on Xcode 10.1, Mac Pro: RANDOMIZED SEQUENCE [int] size=100K Before: 0.851 After: 0.745 RANDOMIZED SEQUENCE [std::string] size=100K Before: 5.389 After: 3.735 The improvement with "int" is due to the vector replacement. The bigger improvement with "std::string" is making C++11 work with the libc++ STL so that move optimizations get applied. --- README.md | 2 +- test/test.cpp | 2 +- timsort.hpp | 468 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 398 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 6af46f4..aa28835 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ COMPATIBILITY This library is compatible with C++98, but if you give compile it with C++11 or later, this library uses `std::move()` instead of value copy and thus you can sort move-only types (see [#9](https://github.com/gfx/cpp-TimSort/pull/9) for details). -You can disable use of `std::move()` by passing the macro '-DDISABLE_STD_MOVE'. +You can disable use of `std::move()` by passing the macro '-DGFX_TIMSORT_USE_CXX11=0' SEE ALSO ================== diff --git a/test/test.cpp b/test/test.cpp index 5efbcd7..6f37116 100644 --- a/test/test.cpp +++ b/test/test.cpp @@ -10,7 +10,7 @@ #include "timsort.hpp" -#if ENABLE_STD_MOVE +#if GFX_TIMSORT_USE_CXX11 #warning std::move() enabled #else #warning std::move() disabled diff --git a/timsort.hpp b/timsort.hpp index 77dd7a2..a9fe3cd 100644 --- a/timsort.hpp +++ b/timsort.hpp @@ -29,7 +29,7 @@ #ifndef GFX_TIMSORT_HPP #define GFX_TIMSORT_HPP -#include +#include // std::allocator #include #include // std::copy #include // std::less @@ -41,10 +41,88 @@ #define GFX_TIMSORT_LOG(expr) ((void)0) #endif -// If compiler supports both type traits and move semantics - will cover most but not all compilers/std libraries: -#if (defined(_MSC_VER) && _MSC_VER >= 1700) || ((defined(__cplusplus) && __cplusplus >= 201103L && !defined(_LIBCPP_VERSION)) && ((!defined(__GNUC__) || __GNUC__ >= 5)) && (!defined(__GLIBCXX__) || __GLIBCXX__ >= 20150422)) +// The "GFX_TIMSORT_USE_CXX11" define can be used to control whether we +// use C++11 extensions like type-traits and move semantics. By default +// it is enabled unless we suspect that the compiler or STL is too old +// to support them: +#ifndef GFX_TIMSORT_USE_CXX11 +# define GFX_TIMSORT_USE_CXX11 1 +# ifdef _MSC_VER +# if _MSC_VER < 1700 +# undef GFX_TIMSORT_USE_CXX11 +# endif +# else +# ifdef __cplusplus +# if __cplusplus < 201103L +# undef GFX_TIMSORT_USE_CXX11 +# endif +# else +# undef GFX_TIMSORT_USE_CXX11 +# endif +# if defined(__cplusplus) && __cplusplus < 201103L +# undef GFX_TIMSORT_USE_CXX11 +# endif +# ifdef _GLIBCXX_RELEASE + // This setting only got added in gcc 7.1, so its presence always + // indicates a C++11-ready STL +# elif defined(__GLIBCXX__) + // Before 7.1, the only way to test the version of libstdc++ is the + // __GLIBCXX__ date macro. However, it's not monotonically increasing + // releases kepy being made from older branches. The best we can do is + // to disallow any version that is definitely before gcc 5.1 (the first + // version that had enough C++11 support for us) and then blacklist + // dates that are known to correspond with non-working versions. + // + // Note this really only is a problem when mixing compilers and STL (i.e. + // compiling using clang but using gcc's libstdc++) Otherwise we'll + // correctly reject the gcc compiler if it's too old later. +# if __GLIBCXX__ < 20150422 +# undef GFX_TIMSORT_USE_CXX11 +# endif +# if __GLIBCXX__ == 20150426 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.8.4+patches shipped with Ubuntu LTS 14.04 +# endif +# if __GLIBCXX__ == 20150623 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.8.5 +# endif +# if __GLIBCXX__ == 20150626 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.9.3 +# endif +# if __GLIBCXX__ == 20160803 +# undef GFX_TIMSORT_USE_CXX11 // gcc 4.9.5 +# endif +# elif defined(__GLIBCPP__) // *really* old version of libstdc++ +# undef GFX_TIMSORT_USE_CXX11 +# endif +# ifdef _LIBCPP_VERSION +# if defined(_LIBCPP_HAS_NO_RVALUE_REFERENCES) || defined(_LIBCPP_CXX03_LANG) +# undef GFX_TIMSORT_USE_CXX11 +# endif +# endif +# ifdef __clang__ +# ifdef __has_feature +# if !(__has_feature(cxx_rvalue_references) && __has_feature(is_trivially_copyable)) +# undef GFX_TIMSORT_USE_CXX11 +# endif +# else +# undef GFX_TIMSORT_USE_CXX11 +# endif +# elif defined(__GNUC__) +# if __GNUC__ < 5 +# undef GFX_TIMSORT_USE_CXX11 +# endif +# endif +# endif +# ifndef GFX_TIMSORT_USE_CXX11 +# define GFX_TIMSORT_USE_CXX11 0 +# endif +#endif + +#if GFX_TIMSORT_USE_CXX11 + #include #include // iterator_traits #include // std::move + #include // std::memcpy #define GFX_TIMSORT_MOVE(x) (std::is_move_constructible::value && std::is_move_assignable::value) ? std::move(x) : (x) #define GFX_TIMSORT_MOVE_RANGE(in1, in2, out) \ @@ -126,34 +204,299 @@ template class Compare { func_type less_; }; -template class TimSort { +// Some details shared between the two different implementations of TimSortMergeSpace<> +template struct TimSortMergeSpaceBase : public std::allocator { + value_t *startp_; + value_t *endp_; + value_t const *alloc_limitp_; + TimSortMergeSpaceBase() : startp_(0), endp_(0), alloc_limitp_(0) { + } + LengthType next_capacity() const { + if (startp_ == 0) { + return (sizeof(*startp_) > 32) ? 1 : (64 / sizeof(*startp_)); + } + LengthType const old_cap = alloc_limitp_ - startp_; + assert(old_cap > 0); + return (old_cap * 3) >> 1; + } +#if GFX_TIMSORT_USE_CXX11 + private: + TimSortMergeSpaceBase(const TimSortMergeSpaceBase&) = delete; + TimSortMergeSpaceBase& operator=(const TimSortMergeSpaceBase&) = delete; +#endif +}; + +// This is a generic memory buffer for temporary holding space during TimSort +// merge operations. This implementation will work for any supported "value_t" +// type (one that is at least move-constructable and move-copyable) +// +// This just provides a temporary buffer which can have elements moved +// into it via the move_in() method and then iterated using begin()/size() +template class TimSortMergeSpace { + public: + typedef value_t *iterator; + + iterator begin() { + return base_.startp_; + } + LengthType size() const { + return base_.endp_ - base_.startp_; + } + private: + TimSortMergeSpaceBase base_; + value_t *ctor_limitp_; + + void destruct() { + if (base_.startp_ != 0) { + iterator const e = ctor_limitp_; + iterator p = begin(); + do { + p->~value_t(); + } while (++p < e); + base_.deallocate(base_.startp_, base_.alloc_limitp_ - base_.startp_); + } + } + public: + TimSortMergeSpace() : ctor_limitp_(0) { + } + ~TimSortMergeSpace() { + destruct(); + } + void move_in(RandomAccessIterator const in_begin, LengthType const len) { + assert(len > 0); + value_t *nend = base_.startp_ + len; + RandomAccessIterator const in_end = in_begin + len; + if (nend <= ctor_limitp_) { + GFX_TIMSORT_MOVE_RANGE(in_begin, in_end, base_.startp_); + } else { + // We'll have to construct at least one new element; s..in_end + // represents the range of source elements that will need + // that treatment + RandomAccessIterator s; + if (nend <= base_.alloc_limitp_) { + // We don't need to allocate new memory, but we do need + // to call the constructor on some of our elements since + // we haven't been this size before. The others we can + // just use move_range() on: + s = in_begin + (ctor_limitp_ - base_.startp_); + GFX_TIMSORT_MOVE_RANGE(in_begin, s, base_.startp_); + } else { + // Our current allocation is too small so allocate a + // new array entirely + LengthType const new_cap = std::max(base_.next_capacity(), len); + destruct(); + value_t *nv; + try { + nv = base_.allocate(new_cap); + } catch (...) { + base_.startp_ = 0; + base_.endp_ = 0; + base_.alloc_limitp_ = 0; + ctor_limitp_ = 0; + throw; + } + base_.startp_ = nv; + base_.alloc_limitp_ = nv + new_cap; + ctor_limitp_ = nv; + nend = nv + len; + s = in_begin; + } + do { + new(ctor_limitp_) value_t(GFX_TIMSORT_MOVE(*s)); + ++ctor_limitp_; + } while (++s < in_end); + assert(nend <= ctor_limitp_); + assert(ctor_limitp_ <= base_.alloc_limitp_); + } + base_.endp_ = nend; + } +}; + +#if GFX_TIMSORT_USE_CXX11 +// Simpler specialization of TimSortMergeSpace<> for trivially_constructable +// value_t's. Here we don't need to bother calling the constructor/destructors +// on each element so we can use move_range(). This is significantly faster +// because it lets us use the well optimized memcpy() instead of a per-element loop. +template class TimSortMergeSpace { + public: + typedef value_t *iterator; + + iterator begin() { + return base_.startp_; + } + LengthType size() const { + return base_.endp_ - base_.startp_; + } + private: + TimSortMergeSpaceBase base_; + + void destruct() { + if (base_.startp_ != 0) { + base_.deallocate(base_.startp_, base_.alloc_limitp_ - base_.startp_); + } + } + public: + TimSortMergeSpace() { + } + ~TimSortMergeSpace() { + destruct(); + } + void move_in(RandomAccessIterator const in_begin, LengthType const len) { + assert(len > 0); + value_t *nend = base_.startp_ + len; + if (nend > base_.alloc_limitp_) { + // Our current allocation is too small so allocate a + // new array entirely + LengthType const new_cap = std::max(base_.next_capacity(), len); + destruct(); + value_t *nv; + try { + nv = base_.allocate(new_cap); + } catch (...) { + base_.startp_ = 0; + base_.endp_ = 0; + base_.alloc_limitp_ = 0; + throw; + } + base_.startp_ = nv; + base_.alloc_limitp_ = nv + new_cap; + nend = nv + len; + assert(nend <= base_.alloc_limitp_); + } + // The most correct way to copy these trivially-constructable elements + // into base_.startp_ would be to do: + // GFX_TIMSORT_MOVE_RANGE(in_begin, in_begin + len, base_.startp_); + // Since the types are default-constructable the STL will be able to + // reduce this to a call to std::memmove() However, we know that we're + // never dealing with overlapping memory here, so it's a tiny bit + // faster to use std::memcpy() instead. + std::memcpy(base_.startp_, &*in_begin, len * sizeof(base_.startp_[0])); + base_.endp_ = nend; + } +}; +#endif + +// Dynamically-allocated stack of pending "runs" that TimSort needs to merge. +template class TimSortRunStack { + public: + struct run { + RandomAccessIterator base; + LengthType len; + }; + private: + struct contents : public std::allocator { + contents() : startp_(0), endp_(0), alloc_limitp_(0) { + } + struct run *startp_; + struct run *endp_; + struct run const *alloc_limitp_; + + struct run *push_back_grow() { + assert(endp_ == alloc_limitp_); + LengthType const old_cap = (endp_ - startp_); + LengthType const new_cap = (startp_ == 0) ? 16 : ((old_cap * 3) >> 1); + assert(new_cap > old_cap); + struct run *nv = this->allocate(new_cap); + if (startp_ != 0) { + std::copy(startp_, endp_, nv); + this->deallocate(startp_, endp_ - startp_); + } + startp_ = nv; + // endp_ is set by our caller + alloc_limitp_ = nv + new_cap; + return nv + old_cap + 1; + } + }; + struct contents c_; +#if GFX_TIMSORT_USE_CXX11 + TimSortRunStack(const TimSortRunStack&) = delete; + TimSortRunStack& operator=(const TimSortRunStack&) = delete; +#endif + public: + TimSortRunStack() { + } + ~TimSortRunStack() { + c_.deallocate(c_.startp_, c_.alloc_limitp_ - c_.startp_); + } + void push_back(RandomAccessIterator const runBase, LengthType const runLen) { + struct run *nend = c_.endp_ + 1; + if (nend > c_.alloc_limitp_) { + nend = c_.push_back_grow(); + } + c_.endp_ = nend; + nend[-1].base = runBase; + nend[-1].len = runLen; + } + void pop_back() { + assert(c_.endp_ > c_.startp_); + --c_.endp_; + } + LengthType size() const { + return c_.endp_ - c_.startp_; + } + struct run& operator[](LengthType const i) { + return c_.startp_[i]; + } +}; + +namespace timsort_constants { +static const int MIN_GALLOP = 7; +static const int MIN_MERGE = 32; +} // namespace + +// This holds all of the TimSort state that is invariant with respect to +// LessFunction. In other words, this template expansion can be shared +// by the compiler when sorting the same type in different orderings. +template class TimSortState { typedef RandomAccessIterator iter_t; typedef typename std::iterator_traits::value_type value_t; - typedef typename std::iterator_traits::reference ref_t; typedef typename std::iterator_traits::difference_type diff_t; - typedef Compare compare_t; - static const int MIN_MERGE = 32; + TimSortRunStack pending_; - compare_t comp_; - - static const int MIN_GALLOP = 7; +#if GFX_TIMSORT_USE_CXX11 + typedef TimSortMergeSpace::value> merge_space; +#else + typedef TimSortMergeSpace merge_space; +#endif + merge_space tmp_; // temp storage for merges int minGallop_; // default to MIN_GALLOP - std::vector tmp_; // temp storage for merges - typedef typename std::vector::iterator tmp_iter_t; + TimSortState() : minGallop_(timsort_constants::MIN_GALLOP) { + } + ~TimSortState() { + } - struct run { - iter_t base; - diff_t len; + static diff_t minRunLength(diff_t n) { + using namespace timsort_constants; + assert(n >= 0); - run(iter_t const b, diff_t const l) : base(b), len(l) { + diff_t r = 0; + while (n >= MIN_MERGE) { + r |= (n & 1); + n >>= 1; } - }; - std::vector pending_; + return n + r; + } + template friend class TimSort; +}; + +template class TimSort +{ + TimSortState state_; + + typedef RandomAccessIterator iter_t; + typedef typename std::iterator_traits::value_type value_t; + typedef typename std::iterator_traits::difference_type diff_t; + typedef typename std::iterator_traits::reference ref_t; + typedef typename TimSortState::merge_space::iterator tmp_iter_t; + typedef Compare compare_t; + + compare_t comp_; static void sort(iter_t const lo, iter_t const hi, compare_t c) { + using namespace timsort_constants; assert(lo <= hi); diff_t nRemaining = (hi - lo); @@ -169,7 +512,7 @@ template class TimSort { } TimSort ts(c); - diff_t const minRun = minRunLength(nRemaining); + diff_t const minRun = TimSortState::minRunLength(nRemaining); iter_t cur = lo; do { diff_t runLen = countRunAndMakeAscending(cur, hi, c); @@ -180,7 +523,7 @@ template class TimSort { runLen = force; } - ts.pushRun(cur, runLen); + ts.state_.pending_.push_back(cur, runLen); ts.mergeCollapse(); cur += runLen; @@ -189,10 +532,10 @@ template class TimSort { assert(cur == hi); ts.mergeForceCollapse(); - assert(ts.pending_.size() == 1); + assert(ts.state_.pending_.size() == 1); GFX_TIMSORT_LOG("size: " << (hi - lo) << " tmp_.size(): " << ts.tmp_.size() - << " pending_.size(): " << ts.pending_.size()); + << " pending_.size(): " << ts.state_.pending_.size()); } // sort() static void binarySort(iter_t const lo, iter_t const hi, iter_t start, compare_t compare) { @@ -234,35 +577,20 @@ template class TimSort { return runHi - lo; } - static diff_t minRunLength(diff_t n) { - assert(n >= 0); - - diff_t r = 0; - while (n >= MIN_MERGE) { - r |= (n & 1); - n >>= 1; - } - return n + r; - } - - TimSort(compare_t c) : comp_(c), minGallop_(MIN_GALLOP) { - } - - void pushRun(iter_t const runBase, diff_t const runLen) { - pending_.push_back(run(runBase, runLen)); + explicit TimSort(compare_t c) : comp_(c) { } void mergeCollapse() { - while (pending_.size() > 1) { - diff_t n = pending_.size() - 2; + while (state_.pending_.size() > 1) { + diff_t n = state_.pending_.size() - 2; - if ((n > 0 && pending_[n - 1].len <= pending_[n].len + pending_[n + 1].len) || - (n > 1 && pending_[n - 2].len <= pending_[n - 1].len + pending_[n].len)) { - if (pending_[n - 1].len < pending_[n + 1].len) { + if ((n > 0 && state_.pending_[n - 1].len <= state_.pending_[n].len + state_.pending_[n + 1].len) || + (n > 1 && state_.pending_[n - 2].len <= state_.pending_[n - 1].len + state_.pending_[n].len)) { + if (state_.pending_[n - 1].len < state_.pending_[n + 1].len) { --n; } mergeAt(n); - } else if (pending_[n].len <= pending_[n + 1].len) { + } else if (state_.pending_[n].len <= state_.pending_[n + 1].len) { mergeAt(n); } else { break; @@ -271,10 +599,10 @@ template class TimSort { } void mergeForceCollapse() { - while (pending_.size() > 1) { - diff_t n = pending_.size() - 2; + while (state_.pending_.size() > 1) { + diff_t n = state_.pending_.size() - 2; - if (n > 0 && pending_[n - 1].len < pending_[n + 1].len) { + if (n > 0 && state_.pending_[n - 1].len < state_.pending_[n + 1].len) { --n; } mergeAt(n); @@ -282,26 +610,26 @@ template class TimSort { } void mergeAt(diff_t const i) { - diff_t const stackSize = pending_.size(); + diff_t const stackSize = state_.pending_.size(); assert(stackSize >= 2); assert(i >= 0); assert(i == stackSize - 2 || i == stackSize - 3); - iter_t base1 = pending_[i].base; - diff_t len1 = pending_[i].len; - iter_t base2 = pending_[i + 1].base; - diff_t len2 = pending_[i + 1].len; + iter_t base1 = state_.pending_[i].base; + diff_t len1 = state_.pending_[i].len; + iter_t base2 = state_.pending_[i + 1].base; + diff_t len2 = state_.pending_[i + 1].len; assert(len1 > 0 && len2 > 0); assert(base1 + len1 == base2); - pending_[i].len = len1 + len2; + state_.pending_[i].len = len1 + len2; if (i == stackSize - 3) { - pending_[i + 1] = pending_[i + 2]; + state_.pending_[i + 1] = state_.pending_[i + 2]; } - pending_.pop_back(); + state_.pending_.pop_back(); diff_t const k = gallopRight(*base2, base1, len1, 0); assert(k >= 0); @@ -417,11 +745,12 @@ template class TimSort { } void mergeLo(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2) { + using namespace timsort_constants; assert(len1 > 0 && len2 > 0 && base1 + len1 == base2); - copy_to_tmp(base1, len1); + state_.tmp_.move_in(base1, len1); - tmp_iter_t cursor1 = tmp_.begin(); + tmp_iter_t cursor1 = state_.tmp_.begin(); iter_t cursor2 = base2; iter_t dest = base1; @@ -436,7 +765,7 @@ template class TimSort { return; } - int minGallop(minGallop_); + int minGallop(state_.minGallop_); // outer: while (true) { @@ -519,7 +848,7 @@ template class TimSort { minGallop += 2; } // end of "outer" loop - minGallop_ = std::min(minGallop, 1); + state_.minGallop_ = std::min(minGallop, 1); if (len1 == 1) { assert(len2 > 0); @@ -534,17 +863,18 @@ template class TimSort { } void mergeHi(iter_t const base1, diff_t len1, iter_t const base2, diff_t len2) { + using namespace timsort_constants; assert(len1 > 0 && len2 > 0 && base1 + len1 == base2); - copy_to_tmp(base2, len2); + state_.tmp_.move_in(base2, len2); iter_t cursor1 = base1 + (len1 - 1); - tmp_iter_t cursor2 = tmp_.begin() + (len2 - 1); + tmp_iter_t cursor2 = state_.tmp_.begin() + (len2 - 1); iter_t dest = base2 + (len2 - 1); *(dest--) = GFX_TIMSORT_MOVE(*(cursor1--)); if (--len1 == 0) { - GFX_TIMSORT_MOVE_RANGE(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1)); + GFX_TIMSORT_MOVE_RANGE(state_.tmp_.begin(), state_.tmp_.begin() + len2, dest - (len2 - 1)); return; } if (len2 == 1) { @@ -555,7 +885,7 @@ template class TimSort { return; } - int minGallop(minGallop_); + int minGallop(state_.minGallop_); // outer: while (true) { @@ -609,7 +939,7 @@ template class TimSort { break; } - count2 = len2 - gallopLeft(*cursor1, tmp_.begin(), len2, len2 - 1); + count2 = len2 - gallopLeft(*cursor1, state_.tmp_.begin(), len2, len2 - 1); if (count2 != 0) { dest -= count2; cursor2 -= count2; @@ -638,7 +968,7 @@ template class TimSort { minGallop += 2; } // end of "outer" loop - minGallop_ = std::min(minGallop, 1); + state_.minGallop_ = std::min(minGallop, 1); if (len2 == 1) { assert(len1 > 0); @@ -649,16 +979,10 @@ template class TimSort { assert(len2 != 0 && "Comparison function violates its general contract"); assert(len1 == 0); assert(len2 > 1); - GFX_TIMSORT_MOVE_RANGE(tmp_.begin(), tmp_.begin() + len2, dest - (len2 - 1)); + GFX_TIMSORT_MOVE_RANGE(state_.tmp_.begin(), state_.tmp_.begin() + len2, dest - (len2 - 1)); } } - void copy_to_tmp(iter_t const begin, diff_t const len) { - tmp_.clear(); - tmp_.reserve(len); - GFX_TIMSORT_MOVE_RANGE(begin, begin + len, std::back_inserter(tmp_)); - } - // the only interface is the friend timsort() function template friend void timsort(IterT first, IterT last, LessT c); };