Skip to content

Commit 112bb3b

Browse files
domoritztolleybot
authored andcommitted
apacheGH-40959: [JS] Store Timestamps in 64 bits (apache#40960)
Merge after apache#40892. This pull request also changes Dates to return timestamps instead of Date instances (similar to Timestamps and for the same reason. * GitHub Issue: apache#40959
1 parent 4e978c4 commit 112bb3b

File tree

10 files changed

+119
-121
lines changed

10 files changed

+119
-121
lines changed

js/src/type.ts

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -333,16 +333,28 @@ export class Decimal extends DataType<Type.Decimal> {
333333
/** @ignore */
334334
export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
335335
/** @ignore */
336-
export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
336+
type DateType = {
337+
[Type.Date]: { TArray: Int32Array | BigInt64Array };
338+
[Type.DateDay]: { TArray: Int32Array };
339+
[Type.DateMillisecond]: { TArray: BigInt64Array };
340+
};
341+
/** @ignore */
342+
export interface Date_<T extends Dates = Dates> extends DataType<T> {
343+
TArray: DateType[T]['TArray'];
344+
TValue: number;
345+
}
337346
/** @ignore */
338347
export class Date_<T extends Dates = Dates> extends DataType<T> {
339348
constructor(public readonly unit: DateUnit) {
340349
super(Type.Date as T);
341350
}
342351
public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }
352+
353+
public get ArrayType() {
354+
return this.unit === DateUnit.DAY ? Int32Array : BigInt64Array;
355+
}
343356
protected static [Symbol.toStringTag] = ((proto: Date_) => {
344357
(<any>proto).unit = null;
345-
(<any>proto).ArrayType = Int32Array;
346358
return proto[Symbol.toStringTag] = 'Date';
347359
})(Date_.prototype);
348360
}
@@ -417,9 +429,9 @@ export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() {
417429
type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
418430
/** @ignore */
419431
interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
420-
TArray: Int32Array;
432+
TArray: BigInt64Array;
421433
TValue: number;
422-
ArrayType: TypedArrayConstructor<Int32Array>;
434+
ArrayType: BigIntArrayConstructor<BigInt64Array>;
423435
}
424436

425437
/** @ignore */
@@ -432,7 +444,7 @@ class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
432444
protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
433445
(<any>proto).unit = null;
434446
(<any>proto).timezone = null;
435-
(<any>proto).ArrayType = Int32Array;
447+
(<any>proto).ArrayType = BigInt64Array;
436448
return proto[Symbol.toStringTag] = 'Timestamp';
437449
})(Timestamp_.prototype);
438450
}
@@ -483,7 +495,7 @@ type Durations = Type.Duration | Type.DurationSecond | Type.DurationMillisecond
483495
export interface Duration<T extends Durations = Durations> extends DataType<T> {
484496
TArray: BigInt64Array;
485497
TValue: bigint;
486-
ArrayType: BigInt64Array;
498+
ArrayType: BigIntArrayConstructor<BigInt64Array>;
487499
}
488500

489501
/** @ignore */
@@ -737,8 +749,6 @@ export function strideForType(type: DataType) {
737749
const t: any = type;
738750
switch (type.typeId) {
739751
case Type.Decimal: return (type as Decimal).bitWidth / 32;
740-
case Type.Timestamp: return 2;
741-
case Type.Date: return 1 + (t as Date_).unit;
742752
case Type.Interval: return 1 + (t as Interval_).unit;
743753
// case Type.Int: return 1 + +((t as Int_).bitWidth > 32);
744754
// case Type.Time: return 1 + +((t as Time_).bitWidth > 32);

js/src/util/bigint.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,16 @@ export function bigIntToNumber(number: bigint | number): number {
2424
}
2525
return Number(number);
2626
}
27+
28+
/**
29+
* Duivides the bigint number by the divisor and returns the result as a number.
30+
* Dividing bigints always results in bigints so we don't get the remainder.
31+
* This function gives us the remainder but assumes that the result fits into a number.
32+
*
33+
* @param number The number to divide.
34+
* @param divisor The divisor.
35+
* @returns The result of the division as a number.
36+
*/
37+
export function divideBigInts(number: bigint, divisor: bigint): number {
38+
return bigIntToNumber(number / divisor) + bigIntToNumber(number % divisor) / bigIntToNumber(divisor);
39+
}

js/src/visitor/get.ts

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import { Vector } from '../vector.js';
2121
import { Visitor } from '../visitor.js';
2222
import { MapRow } from '../row/map.js';
2323
import { StructRow, StructRowProxy } from '../row/struct.js';
24-
import { bigIntToNumber } from '../util/bigint.js';
24+
import { bigIntToNumber, divideBigInts } from '../util/bigint.js';
2525
import { decodeUtf8 } from '../util/utf8.js';
2626
import { TypeToDataType } from '../interfaces.js';
2727
import { uint16ToFloat64 } from '../util/math.js';
@@ -106,13 +106,6 @@ function wrapGet<T extends DataType>(fn: (data: Data<T>, _1: any) => any) {
106106
}
107107

108108
/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
109-
/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
110-
/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
111-
/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);
112-
113-
/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
114-
/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
115-
/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));
116109

117110
/** @ignore */
118111
const getNull = <T extends Null>(_data: Data<T>, _index: number): T['TValue'] => null;
@@ -139,9 +132,9 @@ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Floa
139132
type Numeric2X = Int64 | Uint64;
140133

141134
/** @ignore */
142-
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
135+
const getDateDay = <T extends DateDay>({ values }: Data<T>, index: number): T['TValue'] => epochDaysToMs(values, index);
143136
/** @ignore */
144-
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
137+
const getDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
145138
/** @ignore */
146139
const getNumeric = <T extends Numeric1X>({ stride, values }: Data<T>, index: number): T['TValue'] => values[stride * index];
147140
/** @ignore */
@@ -178,13 +171,13 @@ const getDate = <T extends Date_>(data: Data<T>, index: number): T['TValue'] =>
178171
);
179172

180173
/** @ignore */
181-
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
174+
const getTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number): T['TValue'] => 1000 * bigIntToNumber(values[index]);
182175
/** @ignore */
183-
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
176+
const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number): T['TValue'] => bigIntToNumber(values[index]);
184177
/** @ignore */
185-
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
178+
const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000));
186179
/** @ignore */
187-
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
180+
const getTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number): T['TValue'] => divideBigInts(values[index], BigInt(1000000));
188181
/* istanbul ignore next */
189182
/** @ignore */
190183
const getTimestamp = <T extends Timestamp>(data: Data<T>, index: number): T['TValue'] => {

js/src/visitor/iterator.ts

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,11 @@ function vectorIterator<T extends DataType>(vector: Vector<T>): IterableIterator
101101

102102
// Fast case, defer to native iterators if possible
103103
if (vector.nullCount === 0 && vector.stride === 1 && (
104-
(type.typeId === Type.Timestamp) ||
105-
(type instanceof Int && (type as Int).bitWidth !== 64) ||
106-
(type instanceof Time && (type as Time).bitWidth !== 64) ||
107-
(type instanceof Float && (type as Float).precision !== Precision.HALF)
104+
// Don't defer to native iterator for timestamps since Numbers are expected
105+
// (DataType.isTimestamp(type)) && type.unit === TimeUnit.MILLISECOND ||
106+
(DataType.isInt(type) && type.bitWidth !== 64) ||
107+
(DataType.isTime(type) && type.bitWidth !== 64) ||
108+
(DataType.isFloat(type) && type.precision !== Precision.HALF)
108109
)) {
109110
return new ChunkedIterator(vector.data.length, (chunkIndex) => {
110111
const data = vector.data[chunkIndex];

js/src/visitor/set.ts

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -109,21 +109,6 @@ function wrapSet<T extends DataType>(fn: (data: Data<T>, _1: any, _2: any) => vo
109109

110110
/** @ignore */
111111
export const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = Math.floor(epochMs / 86400000); };
112-
/** @ignore */
113-
export const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => {
114-
data[index] = Math.floor(epochMs % 4294967296);
115-
data[index + 1] = Math.floor(epochMs / 4294967296);
116-
};
117-
/** @ignore */
118-
export const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
119-
data[index] = Math.floor((epochMs * 1000) % 4294967296);
120-
data[index + 1] = Math.floor((epochMs * 1000) / 4294967296);
121-
};
122-
/** @ignore */
123-
export const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
124-
data[index] = Math.floor((epochMs * 1000000) % 4294967296);
125-
data[index + 1] = Math.floor((epochMs * 1000000) / 4294967296);
126-
};
127112

128113
/** @ignore */
129114
export const setVariableWidthBytes = <T extends Int32Array | BigInt64Array>(values: Uint8Array, valueOffsets: T, index: number, value: Uint8Array) => {
@@ -161,7 +146,7 @@ export const setAnyFloat = <T extends Float>(data: Data<T>, index: number, value
161146
/** @ignore */
162147
export const setDateDay = <T extends DateDay>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); };
163148
/** @ignore */
164-
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); };
149+
export const setDateMillisecond = <T extends DateMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
165150
/** @ignore */
166151
export const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: Data<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };
167152

@@ -178,13 +163,13 @@ export const setDate = <T extends Date_>(data: Data<T>, index: number, value: T[
178163
};
179164

180165
/** @ignore */
181-
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000);
166+
export const setTimestampSecond = <T extends TimestampSecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value / 1000); };
182167
/** @ignore */
183-
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value);
168+
export const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value); };
184169
/** @ignore */
185-
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value);
170+
export const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000); };
186171
/** @ignore */
187-
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value);
172+
export const setTimestampNanosecond = <T extends TimestampNanosecond>({ values }: Data<T>, index: number, value: T['TValue']): void => { values[index] = BigInt(value * 1000000); };
188173
/* istanbul ignore next */
189174
/** @ignore */
190175
export const setTimestamp = <T extends Timestamp>(data: Data<T>, index: number, value: T['TValue']): void => {

js/test/generate-test-data.ts

Lines changed: 24 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -402,10 +402,7 @@ function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, l
402402
const data = type.unit === DateUnit.DAY
403403
? createDate32(length, nullBitmap, values)
404404
: createDate64(length, nullBitmap, values);
405-
return {
406-
values: () => values.map((x) => x == null ? null : new Date(x)),
407-
vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })])
408-
};
405+
return { values: () => values, vector: new Vector([makeData({ type, length, nullCount, nullBitmap, data })]) };
409406
}
410407

411408
function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = Math.trunc(length * 0.2)): GeneratedVector<T> {
@@ -649,6 +646,7 @@ type TypedArrayConstructor =
649646

650647

651648
const rand = Math.random.bind(Math);
649+
const randSign = () => rand() > 0.5 ? -1 : 1;
652650
const randomBytes = (length: number) => fillRandom(Uint8Array, length);
653651

654652
const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
@@ -661,15 +659,15 @@ function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: numbe
661659
const BPE = ArrayType.BYTES_PER_ELEMENT;
662660
const array = new ArrayType(length);
663661
const max = (2 ** (8 * BPE)) - 1;
664-
for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1));
662+
for (let i = -1; ++i < length; array[i] = rand() * max * randSign());
665663
return array as InstanceType<T>;
666664
}
667665

668666
function fillRandomBigInt<T extends (typeof BigInt64Array) | (typeof BigUint64Array)>(ArrayType: T, length: number) {
669667
const BPE = ArrayType.BYTES_PER_ELEMENT;
670668
const array = new ArrayType(length);
671669
const max = (2 ** (8 * BPE)) - 1;
672-
for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * (rand() > 0.5 ? -1 : 1)));
670+
for (let i = -1; ++i < length; array[i] = BigInt(rand() * max * randSign()));
673671
return array as InstanceType<T>;
674672
}
675673

@@ -735,47 +733,44 @@ function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offset
735733
return bytes;
736734
}
737735

736+
/**
737+
* Creates timestamps with the accuracy of days (86400000 millisecond).
738+
*/
738739
function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
739740
const data = new Int32Array(length).fill(Math.trunc(Date.now() / 86400000));
740741
iterateBitmap(length, nullBitmap, (i, valid) => {
741742
if (!valid) {
742743
data[i] = 0;
743744
values[i] = null;
744745
} else {
745-
data[i] = Math.trunc(data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)));
746+
data[i] = Math.trunc(data[i] + (rand() * 10000 * randSign()));
746747
values[i] = data[i] * 86400000;
747748
}
748749
});
749750
return data;
750751
}
751752

752753
function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
753-
const data = new Int32Array(length * 2).fill(0);
754754
const data32 = createDate32(length, nullBitmap, values);
755-
iterateBitmap(length, nullBitmap, (i, valid) => {
756-
if (valid) {
757-
const value = data32[i] * 86400000;
758-
const hi = Math.trunc(value / 4294967296);
759-
const lo = Math.trunc(value - 4294967296 * hi);
760-
values[i] = value;
761-
data[i * 2 + 0] = lo;
762-
data[i * 2 + 1] = hi;
763-
}
764-
});
765-
return data;
755+
return BigInt64Array.from(data32, x => BigInt(x * 86400000));
756+
}
757+
758+
function divideBigInts(number: bigint, divisor: bigint): number {
759+
return Number(number / divisor) + Number(number % divisor) / Number(divisor);
766760
}
767761

768762
function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
769-
const mult = 86400 * multiple;
770-
const data = new Int32Array(length * 2).fill(0);
771-
const data32 = createDate32(length, nullBitmap, values);
763+
const data = new BigInt64Array(length).fill(0n);
764+
const tenYears = 10 * 365 * 24 * 60 * 60 * multiple;
765+
const now = Math.trunc(Date.now() / 1000 * multiple);
772766
iterateBitmap(length, nullBitmap, (i, valid) => {
773-
if (valid) {
774-
const value = data32[i] * mult;
775-
const hi = Math.trunc(value / 4294967296);
776-
const lo = Math.trunc(value - 4294967296 * hi);
777-
data[i * 2 + 0] = lo;
778-
data[i * 2 + 1] = hi;
767+
if (!valid) {
768+
data[i] = 0n;
769+
values[i] = null;
770+
} else {
771+
const value = BigInt(now + Math.trunc(rand() * randSign() * tenYears));
772+
data[i] = value;
773+
values[i] = divideBigInts(value * 1000n, BigInt(multiple));
779774
}
780775
});
781776
return data;
@@ -788,7 +783,7 @@ function createTime32(length: number, nullBitmap: Uint8Array, multiple: number,
788783
data[i] = 0;
789784
values[i] = null;
790785
} else {
791-
values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1);
786+
values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * randSign();
792787
}
793788
});
794789
return data;

0 commit comments

Comments
 (0)