From c8596aa9a0cae5ced38a73d4eaabc194eea8a0ab Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Fri, 18 Oct 2019 21:22:44 +0200
Subject: [PATCH 1/8] Fix make -j breakage

---
 Makefile | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 7bcc435..1c3f46c 100644
--- a/Makefile
+++ b/Makefile
@@ -35,13 +35,19 @@ build: $(TARGETS) $(SUBST_FILES)
 ifeq ($(PARSERGEN_ENABLED),yes)
 $(PARSER_GEN): | config.mk
 
-lex.cc scannerbase.h: checktestdata.l scanner.h scanner.ih
+scannerbase.h: checktestdata.l scanner.h scanner.ih
 	flexc++ $<
 	$(call INSERT_VERSION,FLEXCPP_VERSION,$(shell flexc++ --version))
+lex.cc: scannerbase.h
+	@# generated at the same time as scannerbase.h, nothing more to do here
+	@# (but we still need this dummy recipe)
 
-parse.cc parserbase.h: checktestdata.y parser.h parser.ih parsetype.hpp
+parserbase.h: checktestdata.y parser.h parser.ih parsetype.hpp
 	bisonc++ $<
 	$(call INSERT_VERSION,BISONCPP_VERSION,$(shell bisonc++ --version))
+parse.cc: parserbase.h
+	@# generated at the same time as parserbase.h, nothing more to do here
+	@# (but we still need this dummy recipe)
 endif
 
 checkcmd = ./checktestdata $$opts $$prog $$data

From 33eafcb028ed3a7f96e6491964e38498f6ef75fb Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Fri, 18 Oct 2019 21:29:45 +0200
Subject: [PATCH 2/8] Fix compilation with Bisonc++ 6

---
 parser.h  | 22 ++++++++++++++++------
 parser.ih | 17 +++++++++++++++--
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/parser.h b/parser.h
index b942323..34ca942 100644
--- a/parser.h
+++ b/parser.h
@@ -18,27 +18,37 @@ class Parser: public ParserBase
     public:
         Parser(): d_scanner() {}
         Parser(std::istream& in, int startState = 0): d_scanner(in)
-	        { d_scanner.parserStart = startState; }
+            { d_scanner.parserStart = startState; }
         int parse();
 
         // The final result of parsing:
         parse_t parseResult;
 
     private:
-        void error(char const *msg);    // called on (syntax) errors
         int lex();                      // returns the next token from the
                                         // lexical scanner.
         void print();                   // use, e.g., d_token, d_loc
 
-    // support functions for parse():
+#if ( BISONCPP_VERSION >= 60000LL )
+        void error();                   // called on (syntax) errors
+        void exceptionHandler(std::exception const &exc);
+#else
+        void error(const char *msg);    // called on (syntax) errors
+#if ( BISONCPP_VERSION >= 40200LL )
+        void exceptionHandler__(std::exception const &exc);
+#endif
+#endif
+
+    // support functions (the __ versions are used from version 6 and above)
         void executeAction(int ruleNr);
         void errorRecovery();
         int lookup(bool recovery);
         void nextToken();
+        void executeAction__(int ruleNr);
+        void errorRecovery__();
+        void nextCycle__();
+        void nextToken__();
         void print__();
-#if ( BISONCPP_VERSION >= 40200LL )
-        void exceptionHandler__(std::exception const &exc);
-#endif
 };
 
 #endif
diff --git a/parser.ih b/parser.ih
index 3af15cf..8424934 100644
--- a/parser.ih
+++ b/parser.ih
@@ -1,4 +1,4 @@
-// Generated by Bisonc++ V4.01.00 on Sat, 17 Aug 2013 12:52:37 +0100
+// Generated by Bisonc++ V6.01.00 on Fri, 18 Oct 2019 20:59:13 +0200
 
     // Include this file in the sources of the class Parser.
 
@@ -6,10 +6,18 @@
 #include "parser.h"
 
 
+#if ( BISONCPP_VERSION >= 60000LL )
+inline void Parser::error()
+{
+    std::cerr << "Syntax error on line " << d_scanner.lineNr() << std::endl;
+}
+#else
 inline void Parser::error(char const *msg)
 {
     std::cerr << msg << " on line " << d_scanner.lineNr() << std::endl;
 }
+#endif
+
 
 // $insert lex
 inline int Parser::lex()
@@ -25,12 +33,17 @@ inline void Parser::print()
 }
 
 #if ( BISONCPP_VERSION >= 40200LL )
+
+#if ( BISONCPP_VERSION >= 60000LL )
+inline void Parser::exceptionHandler(std::exception const &exc)
+#else
 inline void Parser::exceptionHandler__(std::exception const &exc)
+#endif
 {
     throw;              // re-implement to handle exceptions thrown by actions
 }
-#endif
 
+#endif
 
     // Add here includes that are only required for the compilation
     // of Parser's sources.

From f6095ec44b641519ac5da79f551d98f2d3cd4226 Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Sat, 19 Oct 2019 02:58:32 +0200
Subject: [PATCH 3/8] Don't use GMP for small integers

This cuts runtime down from ~4.7s to ~3s.
---
 bigint.hpp          | 180 ++++++++++++++++++++++++++++++++++++++++++++
 libchecktestdata.cc | 112 ++++++++++++++++-----------
 parsetype.cc        |   8 +-
 parsetype.hpp       |  10 ++-
 4 files changed, 261 insertions(+), 49 deletions(-)
 create mode 100644 bigint.hpp

diff --git a/bigint.hpp b/bigint.hpp
new file mode 100644
index 0000000..5f4a088
--- /dev/null
+++ b/bigint.hpp
@@ -0,0 +1,180 @@
+#ifndef BIGINT_HPP
+#define BIGINT_HPP
+
+/**
+ * Wrapper around gmp's mpz_class, with a fast path for values that fit in a long.
+ */
+class bigint {
+private:
+	void assign_from(const mpz_class& x) const {
+		large.reset(new mpz_class(x));
+		small = LONG_MIN;
+	}
+
+public:
+	// Invariant: if 'large' is set, 'small' must be set to LONG_MIN and not used.
+	// ('small' is allowed to be LONG_MIN if 'large' is null however.)
+	mutable long small;
+	mutable std::unique_ptr<mpz_class> large;
+
+	bigint(): small(0) {}
+	bigint(long x): small(x) {}
+	bigint(const mpz_class& x) { assign_from(x); }
+	bigint(bigint&& other): small(other.small), large(std::move(other.large)) {}
+	bigint(const bigint& other): small(other.small) {
+		if (other.large) {
+			assign_from(*other.large);
+		}
+	}
+	bigint(const std::string& str) {
+		bool neg = false;
+		size_t i = 0;
+		if (str[0] == '-') {
+			neg = true;
+			i = 1;
+		}
+		long val = 0;
+		for (; i < str.size(); i++) {
+			int dig = str[i] - '0';
+			if (__builtin_smull_overflow(val, 10, &val) ||
+			    __builtin_saddl_overflow(val, dig, &val)) {
+				assign_from(mpz_class(str));
+				return;
+			}
+		}
+		small = neg ? -val : val;
+	}
+	mpz_class to_mpz() const {
+		if (!large) {
+			assign_from(mpz_class(small));
+		}
+		return *large;
+	};
+	void shrink() const {
+		if (large && large->fits_slong_p()) {
+			small = large->get_si();
+			if (small != LONG_MIN) {
+				large.reset();
+			}
+		}
+	}
+	bigint& operator=(const bigint& other) {
+		small = other.small;
+		if (other.large) {
+			assign_from(*other.large);
+		} else {
+			large = nullptr;
+		}
+		return *this;
+	}
+	bigint& operator=(bigint&& other) {
+		small = other.small;
+		large = std::move(other.large);
+		return *this;
+	}
+	std::string get_str() const {
+		return large ? large->get_str() : std::to_string(small);
+	}
+	bool fits_ulong_p() const {
+		return large ? large->fits_ulong_p() : (0 <= small && (unsigned long) small < ULONG_MAX);
+	}
+	unsigned long get_ui() const {
+		return large ? large->get_ui() : (unsigned long) small;
+	}
+	long get_si() const {
+		return large ? large->get_si() : (long) small;
+	}
+};
+
+inline bigint operator+(const bigint& a, const bigint& b) {
+	long res;
+	if (!a.large && !b.large && !__builtin_saddl_overflow(a.small, b.small, &res)) {
+		return {res};
+	}
+	return {a.to_mpz() + b.to_mpz()};
+}
+
+inline bigint operator-(const bigint& a, const bigint& b) {
+	long res;
+	if (!a.large && !b.large && !__builtin_ssubl_overflow(a.small, b.small, &res)) {
+		return {res};
+	}
+	return {a.to_mpz() - b.to_mpz()};
+}
+
+inline bigint operator-(const bigint& a) {
+	if (!a.large && a.small != LONG_MIN) {
+		return {-a.small};
+	}
+	return {-*a.large};
+}
+
+inline bigint operator*(const bigint& a, const bigint& b) {
+	long res;
+	if (!a.large && !b.large && !__builtin_smull_overflow(a.small, b.small, &res)) {
+		return {res};
+	}
+	return {a.to_mpz() * b.to_mpz()};
+}
+
+inline bigint operator/(const bigint& a, const bigint& b) {
+	if (!a.large && !b.large && a.small != LONG_MIN) {
+		return {a.small / b.small};
+	}
+	return {a.to_mpz() / b.to_mpz()};
+}
+
+inline bigint operator%(const bigint& a, const bigint& b) {
+	if (!a.large && !b.large && a.small != LONG_MIN) {
+		return {a.small % b.small};
+	}
+	return {a.to_mpz() % b.to_mpz()};
+}
+
+inline bigint& operator%=(bigint& a, const bigint& b) { a = a % b; return a; }
+
+#define BINOP(op, opeq) \
+	inline bigint operator op(const bigint& a, int b) { return a op bigint(b); } \
+	inline bigint operator op(int a, const bigint& b) { return bigint(a) op b; } \
+	inline mpf_class operator op(const bigint& a, const mpf_class& b) { return a.to_mpz() op b; } \
+	inline mpf_class operator op(const mpf_class& a, const bigint& b) { return a op b.to_mpz(); } \
+	inline bigint& operator opeq(bigint& a, const bigint& b) { a = a op b; return a; }
+
+#define RELOP(op) \
+	inline bool operator op(const bigint& a, const mpf_class& b) { return a.to_mpz() op b; } \
+	inline bool operator op(const mpf_class& a, const bigint& b) { return a op b.to_mpz(); } \
+	inline bool operator op(const bigint& a, const bigint& b) { \
+		return !a.large && !b.large ? a.small op b.small : a.to_mpz() op b.to_mpz(); \
+	} \
+	inline bool operator op(const bigint& a, int b) { \
+		return !a.large ? a.small op b : a.to_mpz() op b; \
+	} \
+	inline bool operator op(int a, const bigint& b) { \
+		return !b.large ? a op b.small : a op b.to_mpz(); \
+	}
+
+
+BINOP(+, +=)
+BINOP(-, -=)
+BINOP(*, *=)
+BINOP(/, /=)
+
+RELOP(<)
+RELOP(>)
+RELOP(<=)
+RELOP(>=)
+RELOP(==)
+RELOP(!=)
+
+#undef BINOP
+#undef RELOP
+
+inline std::ostream& operator<<(std::ostream& os, const bigint& x) {
+	if (x.large) {
+		return os << *x.large;
+	} else {
+		return os << x.small;
+	}
+}
+
+#endif /* BIGINT_HPP */
diff --git a/libchecktestdata.cc b/libchecktestdata.cc
index 38a02fd..971f7e0 100644
--- a/libchecktestdata.cc
+++ b/libchecktestdata.cc
@@ -62,8 +62,8 @@ vector<command> program;
 // This stores array-type variables like x[i,j] as string "x" and
 // vector of the indices. Plain variables are stored using an index
 // vector of zero length.
-typedef map<vector<mpz_class>,value_t> indexmap;
-typedef map<value_t,set<vector<mpz_class>>> valuemap;
+typedef map<vector<bigint>,value_t> indexmap;
+typedef map<value_t,set<vector<bigint>>> valuemap;
 map<string,indexmap> variable, preset;
 map<string,valuemap> rev_variable, rev_preset;
 
@@ -197,15 +197,17 @@ long string2int(string s)
 	return res;
 }
 
-value_t eval(const expr&); // forward declaration
+// forward declarations
+value_t eval(const expr&);
+bigint evalAsInt(const expr& e);
 
 value_t getvar(const expr& var, int use_preset = 0)
 {
-	// Construct index array. The cast to mpz_class automatically
-	// verifies that the index value is of type mpz_class.
-	vector<mpz_class> ind;
+	// Construct index array. The cast to bigint automatically
+	// verifies that the index value is of type bigint.
+	vector<bigint> ind;
 	for(size_t i=0; i<var.nargs(); i++) {
-		ind.push_back(mpz_class(eval(var.args[i])));
+		ind.push_back(evalAsInt(var.args[i]));
 	}
 	if ( use_preset ) {
 		if ( preset.count(var.val) && preset[var.val].count(ind) ) {
@@ -226,11 +228,11 @@ value_t getvar(const expr& var, int use_preset = 0)
 
 void setvar(const expr& var, value_t val, int use_preset = 0)
 {
-	// Construct index array. The cast to mpz_class automatically
-	// verifies that the index value is of type mpz_class.
-	vector<mpz_class> ind;
+	// Construct index array. The cast to bigint automatically
+	// verifies that the index value is of type bigint.
+	vector<bigint> ind;
 	for(size_t i=0; i<var.nargs(); i++) {
-		ind.push_back(mpz_class(eval(var.args[i])));
+		ind.push_back(evalAsInt(var.args[i]));
 	}
 
 	map<string,indexmap> *varlist = &variable;
@@ -277,7 +279,12 @@ value_t value(const expr& x)
 
 	mpz_class intval;
 	mpf_class fltval;
-	if ( intval.set_str(x.val,0)==0 ) return x.cache = value_t(intval);
+	if ( intval.set_str(x.val,0)==0 ) {
+		bigint c = bigint(intval);
+		c.shrink();
+		x.cachedLong = c.small;
+		return x.cache = value_t(c);
+	}
 	else if ( fltval.set_str(x.val,0)==0 ) {
 		// Set sufficient precision:
 		if ( fltval.get_prec()<4*x.val.length() ) {
@@ -293,15 +300,15 @@ value_t value(const expr& x)
 template<class A, class B>
 struct arith_result {
 	typedef typename conditional<
-		is_same<A,mpz_class>::value && is_same<B,mpz_class>::value,
-			mpz_class,
+		is_same<A,bigint>::value && is_same<B,bigint>::value,
+			bigint,
 			mpf_class
 			>::type type;
 };
 
 template<class A, class B> struct arith_compatible {
-	constexpr static bool value = (is_same<mpz_class,A>::value || is_same<mpf_class,A>::value) &&
-		(is_same<mpz_class,B>::value || is_same<mpf_class,B>::value);
+	constexpr static bool value = (is_same<bigint,A>::value || is_same<mpf_class,A>::value) &&
+		(is_same<bigint,B>::value || is_same<mpf_class,B>::value);
 };
 
 template<class A, class B> struct is_comparable {
@@ -362,13 +369,13 @@ DECL_VALUE_CMPOP(!=,ne)
 
 value_t operator -(const value_t &x)
 {
-	return value_t(mpz_class(0)) - x;
+	return value_t(bigint(0)) - x;
 }
 
 value_t operator %(const value_t &x, const value_t &y)
 {
-	const mpz_class *xp, *yp;
-	if ( (xp = boost::get<const mpz_class>(&x.val)) && (yp = boost::get<const mpz_class>(&y.val))) {
+	const bigint *xp, *yp;
+	if ( (xp = boost::get<const bigint>(&x.val)) && (yp = boost::get<const bigint>(&y.val))) {
 		auto res = *xp;
 		res %= *yp;
 		return value_t(res);
@@ -379,31 +386,34 @@ value_t operator %(const value_t &x, const value_t &y)
 
 struct pow_visitor : public boost::static_visitor<value_t> {
 	template<class B, class E>
-	value_t operator()(const B& b, const E& e) const {
+	value_t operator()(const B&, const E&) const {
 		cerr << "only integer exponents allowed in " << program[prognr] << endl;
 		exit(exit_failure);
 	}
 	template<class B>
-	value_t operator()(const B& b, const mpz_class& e) const {
+	value_t operator()(const B& b, const bigint& e) const {
 		if(!e.fits_ulong_p()) {
 			cerr << "integer exponent " << e
 				<< " does not fit in unsigned long in " << program[prognr] << endl;
 			exit(exit_failure);
 		}
-		return pow(b, e);
+		unsigned long f = e.get_ui();
+		return pow(b, f);
 	}
-	value_t pow(const mpz_class& b, const mpz_class& e) const {
+	value_t pow(const bigint& b, unsigned long e) const {
 		mpz_class res;
-		mpz_pow_ui(res.get_mpz_t(), b.get_mpz_t(), e.get_ui());
-		return value_t(res);
+		mpz_pow_ui(res.get_mpz_t(), b.to_mpz().get_mpz_t(), e);
+		bigint res2(res);
+		res2.shrink();
+		return value_t(res2);
 	}
-	value_t pow(const mpf_class& b, const mpz_class& e) const {
+	value_t pow(const mpf_class& b, unsigned long e) const {
 		mpf_class res;
-		mpf_pow_ui(res.get_mpf_t(), b.get_mpf_t(), e.get_ui());
+		mpf_pow_ui(res.get_mpf_t(), b.get_mpf_t(), e);
 		return value_t(res);
 	}
 	template<class B>
-	value_t pow(const B&, const mpz_class&) const {
+	value_t pow(const B&, unsigned long) const {
 		cerr << "exponentiation base must be of arithmetic type in "
 			 << program[prognr] << endl;
 		exit(exit_failure);
@@ -420,7 +430,7 @@ value_t evalfun(args_t funargs)
 	string fun = funargs[0].val;
 	if ( fun=="STRLEN" ) {
 		string str = eval(funargs[1]).getstr();
-		return value_t(mpz_class(str.length()));
+		return value_t(bigint(str.length()));
 	}
 
 	cerr << "unknown function '" << fun << "' in "
@@ -480,10 +490,24 @@ value_t eval(const expr& e)
 		     << program[prognr] << endl;
 		exit(exit_failure);
 	}
-	if ( cachable(e) ) e.cache = res;
+	if ( cachable(e) ) {
+		e.cache = res;
+		if ( res.val.which()==value_int ) {
+			bigint x = res;
+			e.cachedLong = x.small;
+		}
+	}
 	return res;
 }
 
+bigint evalAsInt(const expr& e)
+{
+	if ( e.cachedLong != LONG_MIN ) {
+		return bigint(e.cachedLong);
+	}
+	return eval(e);
+}
+
 bool compare(const expr& cmp)
 {
 	string op = cmp.val;
@@ -531,7 +555,7 @@ bool unique(const args_t& varlist)
 	vector<pair<vector<value_t>,const indexmap::key_type*>> tuples;
 	for(indexmap::iterator it=vars[0]->begin();
 		it!=vars[0]->end(); ++it) {
-		const vector<mpz_class> &index = it->first;
+		const vector<bigint> &index = it->first;
 		vector<value_t> tuple;
 		for(size_t i=0; i<vars.size(); i++) {
 			auto it = vars[i]->find(index);
@@ -814,7 +838,7 @@ void getdecrange(const command& cmd, int *decrange)
 		if ( arg.val.which()!=value_int ) {
 			error((i==0 ? "min":"max")+string("decimal is not an integer"));
 		}
-		mpz_class val = arg;
+		bigint val = arg;
 		if ( val<0 || val>=INT_MAX ) {
 			error(string("the value of ")+(i==0 ? "min":"max")+"decimal is out of range");
 		}
@@ -833,9 +857,9 @@ void gentoken(command cmd, ostream &datastream)
 	else if ( cmd.name()=="NEWLINE" ) datastream << '\n';
 
 	else if ( cmd.name()=="INT" ) {
-		mpz_class lo = eval(cmd.args[0]);
-		mpz_class hi = eval(cmd.args[1]);
-		mpz_class x(lo + gmp_rnd.get_z_range(hi - lo + 1));
+		bigint lo = eval(cmd.args[0]);
+		bigint hi = eval(cmd.args[1]);
+		bigint x(lo.to_mpz() + gmp_rnd.get_z_range((hi - lo + 1).to_mpz()));
 
 		if ( cmd.nargs()>=3 ) {
 			// Check if we have a preset value, then override the
@@ -943,12 +967,16 @@ void checktoken(const command& cmd)
 		// Accepts format (0|-?[1-9][0-9]*), i.e. no leading zero's
 		// and no '-0' accepted.
 		string num;
-		while ( isdigit(data.peek()) || (num.empty() && data.peek()=='-') ) {
+		if ( data.peek()=='-' ) {
+			data.readchar();
+			num += '-';
+		}
+		while ( isdigit(data.peek()) ) {
 			num += data.readchar();
 		}
 
-		mpz_class lo = eval(cmd.args[0]);
-		mpz_class hi = eval(cmd.args[1]);
+		bigint lo = evalAsInt(cmd.args[0]);
+		bigint hi = evalAsInt(cmd.args[1]);
 
 //		debug("%s <= %s <= %s",lo.get_str().c_str(),num.c_str(),hi.get_str().c_str());
 		if ( cmd.nargs()>=3 ) debug("'%s' = '%s'",
@@ -960,7 +988,7 @@ void checktoken(const command& cmd)
 		if ( num.size()>=1 && num[0]=='-' &&
 		     (num.size()==1 || num[1]=='0') ) error("invalid minus sign (-0 not allowed)");
 
-		mpz_class x(num);
+		bigint x(num);
 
 		if ( x<lo || x>hi ) error("value out of range");
 		if ( cmd.nargs()>=3 ) setvar(cmd.args[2],value_t(x));
@@ -1120,11 +1148,11 @@ void checktestdata(ostream &datastream)
 
 			if ( cmd.name()=="REPI" || cmd.name()=="WHILEI" ) {
 				loopvar = 1;
-				setvar(cmd.args[0],value_t(mpz_class(i)));
+				setvar(cmd.args[0],value_t(bigint(i)));
 			}
 
 			if ( cmd.name()=="REP" || cmd.name()=="REPI" ) {
-				mpz_class n = eval(cmd.args[loopvar]);
+				bigint n = eval(cmd.args[loopvar]);
 				if ( !n.fits_ulong_p() ) {
 					cerr << "'" << n << "' does not fit in an unsigned long in "
 						 << program[prognr] << endl;
@@ -1163,7 +1191,7 @@ void checktestdata(ostream &datastream)
 				}
 				checktestdata(datastream);
 				i++;
-				if ( loopvar ) setvar(cmd.args[0],value_t(mpz_class(i)));
+				if ( loopvar ) setvar(cmd.args[0],value_t(bigint(i)));
 			}
 
 			// And skip to end of loop
diff --git a/parsetype.cc b/parsetype.cc
index 67707f3..36c0e13 100644
--- a/parsetype.cc
+++ b/parsetype.cc
@@ -60,15 +60,15 @@ std::ostream& operator<<(std::ostream& os, const none_t&) {
 	return os << "<no value>";
 }
 
-value_t::operator mpz_class() const
+value_t::operator bigint() const
 {
-	return boost::get<mpz_class>(val);
+	return boost::get<bigint>(val);
 }
 
 value_t::operator mpf_class() const
 {
-	if(const mpz_class* p = boost::get<mpz_class>(&val))
-		return *p;
+	if(const bigint* p = boost::get<bigint>(&val))
+		return (*p).to_mpz();
 	return boost::get<mpf_class>(val);
 }
 
diff --git a/parsetype.hpp b/parsetype.hpp
index b84e207..cb85b44 100644
--- a/parsetype.hpp
+++ b/parsetype.hpp
@@ -5,9 +5,12 @@
 #include <vector>
 #include <iostream>
 #include <sstream>
+#include <memory>
 #include <boost/variant.hpp>
 #include <gmpxx.h>
 
+#include "bigint.hpp"
+
 struct parse_t;
 
 typedef std::string val_t;
@@ -24,14 +27,14 @@ struct none_t {};
 std::ostream& operator<<(std::ostream&, const none_t&);
 
 struct value_t {
-	boost::variant<none_t, mpz_class, mpf_class, std::string> val;
+	boost::variant<none_t, bigint, mpf_class, std::string> val;
 
 	value_t(): val(none_t()) {}
-	explicit value_t(mpz_class x): val(x) {}
+	explicit value_t(bigint x): val(x) {}
 	explicit value_t(mpf_class x): val(x) {}
 	explicit value_t(std::string x): val(x) {}
 
-	operator mpz_class() const;
+	operator bigint() const;
 	operator mpf_class() const;
 
 	// This is a member function instead of a casting operator, since
@@ -78,6 +81,7 @@ struct parse_t {
 	  ~      uninitialized object, to detect unset default arguments
 	*/
 
+	mutable long cachedLong = LONG_MIN;
 	mutable checktestdata::value_t cache;
 
 	parse_t(): val(), args(), op('~') {}

From 1ac532999f4156cc710f4ac763ead826d919977e Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Sat, 19 Oct 2019 02:59:36 +0200
Subject: [PATCH 4/8] Avoid copying the current command constantly

This cuts ~3s down to ~2s.
---
 libchecktestdata.cc | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/libchecktestdata.cc b/libchecktestdata.cc
index 971f7e0..528bec3 100644
--- a/libchecktestdata.cc
+++ b/libchecktestdata.cc
@@ -44,7 +44,7 @@ const int display_before_error = 65;
 const int display_after_error  = 50;
 
 size_t prognr;
-command currcmd;
+const command *currcmd;
 
 gmp_randclass gmp_rnd(gmp_randinit_default);
 
@@ -164,7 +164,7 @@ void readtestdata(istream &in)
 void error(string msg = string())
 {
 	if ( gendata ) {
-		cerr << "ERROR: in command " << currcmd << ": " << msg << endl << endl;
+		cerr << "ERROR: in command " << *currcmd << ": " << msg << endl << endl;
 		throw generate_exception();
 	}
 
@@ -176,7 +176,7 @@ void error(string msg = string())
 		cerr << data.next(display_after_error) << endl << endl;
 
 		cerr << "ERROR: line " << data.line()+1 << " character " << data.lpos()+1;
-		cerr << " of testdata doesn't match " << currcmd;
+		cerr << " of testdata doesn't match " << *currcmd;
 		if ( msg.length()>0 ) cerr << ": " << msg;
 		cerr << endl << endl;
 	}
@@ -849,7 +849,7 @@ void getdecrange(const command& cmd, int *decrange)
 
 void gentoken(command cmd, ostream &datastream)
 {
-	currcmd = cmd;
+	currcmd = &cmd;
 	debug("generating token %s", cmd.name().c_str());
 
 	if ( cmd.name()=="SPACE" ) datastream << ' ';
@@ -951,11 +951,12 @@ void gentoken(command cmd, ostream &datastream)
 		cerr << "unknown command " << program[prognr] << endl;
 		exit(exit_failure);
 	}
+	currcmd = nullptr;
 }
 
 void checktoken(const command& cmd)
 {
-	currcmd = cmd;
+	currcmd = &cmd;
 	debug("checking token %s at %lu,%lu",
 	      cmd.name().c_str(),data.line(),data.lpos());
 
@@ -1116,6 +1117,7 @@ void checktoken(const command& cmd)
 		cerr << "unknown command " << program[prognr] << endl;
 		exit(exit_failure);
 	}
+	currcmd = nullptr;
 }
 
 // This function processes the outer control structure commands both
@@ -1127,7 +1129,7 @@ void checktestdata(ostream &datastream)
 
 	while ( true ) {
 		const command &cmd = program[prognr];
-		currcmd = cmd;
+		currcmd = &cmd;
 
 		if ( cmd.name()=="EOF" ) {
 			if ( gendata ) {
@@ -1246,6 +1248,7 @@ void checktestdata(ostream &datastream)
 			prognr++;
 		}
 	}
+	currcmd = nullptr;
 }
 
 void init_checktestdata(std::istream &progstream, int opt_mask)

From 7dffa4b39330df7e6a733a56f1713671c828c01c Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Sat, 19 Oct 2019 14:44:13 +0200
Subject: [PATCH 5/8] Avoid double lookups on the hot path

No effect on my naive benchmark.
---
 libchecktestdata.cc | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libchecktestdata.cc b/libchecktestdata.cc
index 528bec3..92c4161 100644
--- a/libchecktestdata.cc
+++ b/libchecktestdata.cc
@@ -218,8 +218,14 @@ value_t getvar(const expr& var, int use_preset = 0)
 		if ( gendata && preset.count(var.val) && preset[var.val].count(ind) ) {
 			return preset[var.val][ind];
 		}
-		if ( variable.count(var.val) && variable[var.val].count(ind) ) {
-			return variable[var.val][ind];
+		// Avoid double lookups on the hot path
+		auto it = variable.find(var.val);
+		if (it != variable.end()) {
+			auto& map = it->second;
+			auto it2 = map.find(ind);
+			if (it2 != map.end()) {
+				return it2->second;
+			}
 		}
 	}
 	cerr << "variable " << var << " undefined in " << program[prognr] << endl;

From ee6790e6e34ccae0da9767da2c0083ecda47a28d Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Sat, 19 Oct 2019 14:56:15 +0200
Subject: [PATCH 6/8] Read input file as binary

CRLF line endings should not validate, or if we think they should, that
handling should happen in NEWLINE. This also makes it possible to do
faster IO, by measuring the size of the input file before reading it.
---
 checktestdata.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/checktestdata.cc b/checktestdata.cc
index 312a2f0..bcf1f8d 100644
--- a/checktestdata.cc
+++ b/checktestdata.cc
@@ -140,7 +140,7 @@ int main(int argc, char **argv)
 	fstream fdata;
 	if ( argc>optind+1 ) {
 		char *datafile = argv[optind+1];
-		ios_base::openmode mode = generate ? ios_base::out|ios_base::trunc : ios_base::in;
+		ios_base::openmode mode = generate ? ios_base::out|ios_base::trunc|ios_base::binary : ios_base::in|ios_base::binary;
 		fdata.open(datafile, mode);
 		if ( fdata.fail() ) {
 			cerr << "Error opening '" << datafile << "'.\n";

From 4e1278e9a7a8840c752a6d3fe6a3e55d0909780f Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Sat, 19 Oct 2019 14:58:01 +0200
Subject: [PATCH 7/8] Faster IO

~2s -> ~1.4s
---
 databuffer.hpp      |  5 +++--
 libchecktestdata.cc | 20 ++++++++++++++------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/databuffer.hpp b/databuffer.hpp
index f27f653..1942414 100644
--- a/databuffer.hpp
+++ b/databuffer.hpp
@@ -16,11 +16,12 @@ int isspace_notnewline(char c) { return isspace(c) && c!='\n'; }
 class databuffer {
 private:
 	std::string data;
-	size_t _pos, _line, _lpos;
+	size_t _pos = 0, _line = 0, _lpos = 0;
 
 public:
 	databuffer() {}
-	databuffer(std::string _data): data(_data), _pos(0), _line(0), _lpos(0) {}
+	databuffer(const std::string& _data): data(_data) {}
+	databuffer(std::string&& _data): data(std::move(_data)) {}
 
 	bool eof() const { return _pos >= data.size(); }
 
diff --git a/libchecktestdata.cc b/libchecktestdata.cc
index 92c4161..391cff9 100644
--- a/libchecktestdata.cc
+++ b/libchecktestdata.cc
@@ -151,14 +151,22 @@ void readtestdata(istream &in)
 {
 	debug("reading testdata...");
 
-	stringstream ss;
-	ss << in.rdbuf();
-	if ( in.fail() ) {
-		cerr << "error reading testdata" << endl;
-		exit(exit_failure);
+	in.seekg(0, std::ios::end);
+	auto size = in.tellg();
+	if (size != -1) {
+		std::string buffer(size, '\0');
+		in.seekg(0);
+		if (size != 0) {
+			in.read(&buffer[0], size);
+		}
+		if (!in.fail()) {
+			data = databuffer(std::move(buffer));
+			return;
+		}
 	}
 
-	data = databuffer(ss.str());
+	cerr << "error reading testdata" << endl;
+	exit(exit_failure);
 }
 
 void error(string msg = string())

From 952b07b99a7c3192b428b272fab3f22779a6ac9a Mon Sep 17 00:00:00 2001
From: Simon Lindholm <simon.lindholm10@gmail.com>
Date: Sat, 19 Oct 2019 15:21:49 +0200
Subject: [PATCH 8/8] Make debug into a macro

---
 libchecktestdata.cc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/libchecktestdata.cc b/libchecktestdata.cc
index 391cff9..b376db3 100644
--- a/libchecktestdata.cc
+++ b/libchecktestdata.cc
@@ -75,28 +75,28 @@ int debugging;
 int quiet;
 int gendata;
 
-void debug(const char *, ...) __attribute__((format (printf, 1, 2)));
+void realdebug(const char *, ...) __attribute__((format (printf, 1, 2)));
 
-void debug(const char *format, ...)
+void realdebug(const char *format, ...)
 {
 	va_list ap;
 	va_start(ap,format);
 
-	if ( debugging ) {
-		fprintf(stderr,"debug: ");
-
-        if ( format!=NULL ) {
-			vfprintf(stderr,format,ap);
-        } else {
-			fprintf(stderr,"<no debug data?>");
-        }
+	fprintf(stderr,"debug: ");
 
-		fprintf(stderr,"\n");
+	if ( format!=NULL ) {
+		vfprintf(stderr,format,ap);
+	} else {
+		fprintf(stderr,"<no debug data?>");
 	}
 
+	fprintf(stderr,"\n");
+
 	va_end(ap);
 }
 
+#define debug(...) do { if ( debugging ) realdebug(__VA_ARGS__); } while (0)
+
 void readprogram(istream &in)
 {
 	debug("parsing script...");