|
20 | 20 | datetime,
|
21 | 21 | timedelta,
|
22 | 22 | )
|
23 |
| -import struct |
| 23 | +import sys |
24 | 24 | from typing import cast
|
25 | 25 |
|
26 | 26 | import numpy as np
|
|
42 | 42 | )
|
43 | 43 |
|
44 | 44 | from pandas.io.common import get_handle
|
45 |
| -from pandas.io.sas._sas import Parser |
| 45 | +from pandas.io.sas._sas import ( |
| 46 | + Parser, |
| 47 | + read_double_with_byteswap, |
| 48 | + read_float_with_byteswap, |
| 49 | + read_uint16_with_byteswap, |
| 50 | + read_uint32_with_byteswap, |
| 51 | + read_uint64_with_byteswap, |
| 52 | +) |
46 | 53 | import pandas.io.sas.sas_constants as const
|
47 | 54 | from pandas.io.sas.sasreader import ReaderBase
|
48 | 55 |
|
@@ -259,8 +266,10 @@ def _get_properties(self) -> None:
|
259 | 266 | buf = self._read_bytes(const.endianness_offset, const.endianness_length)
|
260 | 267 | if buf == b"\x01":
|
261 | 268 | self.byte_order = "<"
|
| 269 | + self.need_byteswap = sys.byteorder == "big" |
262 | 270 | else:
|
263 | 271 | self.byte_order = ">"
|
| 272 | + self.need_byteswap = sys.byteorder == "little" |
264 | 273 |
|
265 | 274 | # Get encoding information
|
266 | 275 | buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
|
@@ -345,22 +354,37 @@ def __next__(self) -> DataFrame:
|
345 | 354 |
|
346 | 355 | # Read a single float of the given width (4 or 8).
|
347 | 356 | def _read_float(self, offset: int, width: int):
|
348 |
| - if width not in (4, 8): |
| 357 | + if width == 4: |
| 358 | + return read_float_with_byteswap( |
| 359 | + self._read_bytes(offset, 4), self.need_byteswap |
| 360 | + ) |
| 361 | + elif width == 8: |
| 362 | + return read_double_with_byteswap( |
| 363 | + self._read_bytes(offset, 8), self.need_byteswap |
| 364 | + ) |
| 365 | + else: |
349 | 366 | self.close()
|
350 | 367 | raise ValueError("invalid float width")
|
351 |
| - buf = self._read_bytes(offset, width) |
352 |
| - fd = "f" if width == 4 else "d" |
353 |
| - return struct.unpack(self.byte_order + fd, buf)[0] |
354 | 368 |
|
355 | 369 | # Read a single signed integer of the given width (1, 2, 4 or 8).
|
356 | 370 | def _read_int(self, offset: int, width: int) -> int:
|
357 |
| - if width not in (1, 2, 4, 8): |
| 371 | + if width == 1: |
| 372 | + return self._read_bytes(offset, 1)[0] |
| 373 | + elif width == 2: |
| 374 | + return read_uint16_with_byteswap( |
| 375 | + self._read_bytes(offset, 2), self.need_byteswap |
| 376 | + ) |
| 377 | + elif width == 4: |
| 378 | + return read_uint32_with_byteswap( |
| 379 | + self._read_bytes(offset, 4), self.need_byteswap |
| 380 | + ) |
| 381 | + elif width == 8: |
| 382 | + return read_uint64_with_byteswap( |
| 383 | + self._read_bytes(offset, 8), self.need_byteswap |
| 384 | + ) |
| 385 | + else: |
358 | 386 | self.close()
|
359 | 387 | raise ValueError("invalid int width")
|
360 |
| - buf = self._read_bytes(offset, width) |
361 |
| - it = {1: "b", 2: "h", 4: "l", 8: "q"}[width] |
362 |
| - iv = struct.unpack(self.byte_order + it, buf)[0] |
363 |
| - return iv |
364 | 388 |
|
365 | 389 | def _read_bytes(self, offset: int, length: int):
|
366 | 390 | if self._cached_page is None:
|
|
0 commit comments