diff --git a/python_env.yaml b/python_env.yaml new file mode 100644 index 0000000..ce7bfa4 --- /dev/null +++ b/python_env.yaml @@ -0,0 +1,166 @@ +{ + "python": "3.13", + "dependencies": [ + "alembic==1.14.1", + "annotated-types==0.7.0", + "antlr4-python3-runtime==4.9.3", + "appnope==0.1.4", + "argcomplete==3.5.3", + "asttokens==3.0.0", + "attrs==25.1.0", + "bandit==1.8.3", + "blinker==1.9.0", + "cachetools==5.5.2", + "certifi==2025.1.31", + "cffi==1.17.1", + "cfgv==3.4.0", + "charset-normalizer==3.4.1", + "click==8.1.8", + "cloudpickle==3.1.1", + "colorama==0.4.6", + "comm==0.2.2", + "commitizen==4.4.1", + "contourpy==1.3.1", + "coverage==7.6.12", + "cycler==0.12.1", + "databricks-sdk==0.44.1", + "debugpy==1.8.12", + "decli==0.6.2", + "decorator==5.2.1", + "deprecated==1.2.18", + "distlib==0.3.9", + "docker==7.1.0", + "execnet==2.1.1", + "executing==2.2.0", + "fastjsonschema==2.21.1", + "filelock==3.17.0", + "flask==3.1.0", + "fonttools==4.56.0", + "gitdb==4.0.12", + "gitpython==3.1.44", + "google-auth==2.38.0", + "graphene==3.4.3", + "graphql-core==3.2.6", + "graphql-relay==3.2.0", + "greenlet==3.1.1", + "gunicorn==23.0.0", + "hatchling==1.27.0", + "identify==2.6.8", + "idna==3.10", + "importlib-metadata==8.6.1", + "iniconfig==2.0.0", + "ipykernel==6.29.5", + "ipython==9.0.0", + "ipython-pygments-lexers==1.1.1", + "itsdangerous==2.2.0", + "jedi==0.19.2", + "jinja2==3.1.5", + "joblib==1.4.2", + "jsonschema==4.23.0", + "jsonschema-specifications==2024.10.1", + "jupyter-client==8.6.3", + "jupyter-core==5.7.2", + "kiwisolver==1.4.8", + "llvmlite==0.44.0", + "loguru==0.7.3", + "mako==1.3.9", + "markdown==3.7", + "markdown-it-py==3.0.0", + "markupsafe==3.0.2", + "matplotlib==3.10.1", + "matplotlib-inline==0.1.7", + "mdurl==0.1.2", + "mlflow==2.20.3", + "mlflow-skinny==2.20.3", + "mypy==1.15.0", + "mypy-extensions==1.0.0", + "narwhals==1.28.0", + "nbformat==5.10.4", + "nest-asyncio==1.6.0", + "nodeenv==1.9.1", + "numba==0.61.0", + "numpy==2.1.3", + "nvidia-ml-py==12.570.86", + "omegaconf==2.3.0", + "opentelemetry-api==1.16.0", + "opentelemetry-sdk==1.16.0", + "opentelemetry-semantic-conventions==0.37b0", + "packaging==24.2", + "pandas==2.2.3", + "pandas-stubs==2.2.3.241126", + "pandera==0.23.0", + "parso==0.8.4", + "pathspec==0.12.1", + "pbr==6.1.1", + "pdoc==15.0.1", + "pexpect==4.9.0", + "pillow==11.1.0", + "platformdirs==4.3.6", + "plotly==6.0.0", + "pluggy==1.5.0", + "plyer==2.1.0", + "pre-commit==4.1.0", + "prompt-toolkit==3.0.50", + "protobuf==5.29.3", + "psutil==7.0.0", + "ptyprocess==0.7.0", + "pure-eval==0.2.3", + "pyarrow==19.0.1", + "pyasn1==0.6.1", + "pyasn1-modules==0.4.1", + "pycparser==2.22", + "pydantic==2.10.6", + "pydantic-core==2.27.2", + "pydantic-settings==2.8.1", + "pygments==2.19.1", + "pynvml==12.0.0", + "pyparsing==3.2.1", + "pytest==8.3.5", + "pytest-cov==6.0.0", + "pytest-mock==3.14.0", + "pytest-xdist==3.6.1", + "python-dateutil==2.9.0.post0", + "python-dotenv==1.0.1", + "pytz==2025.1", + "pyyaml==6.0.2", + "pyzmq==26.2.1", + "questionary==2.1.0", + "referencing==0.36.2", + "requests==2.32.3", + "rich==13.9.4", + "rpds-py==0.23.1", + "rsa==4.9", + "ruff==0.9.9", + "scikit-learn==1.6.1", + "scipy==1.15.2", + "setuptools==75.8.2", + "shap==0.46.0", + "six==1.17.0", + "slicer==0.0.8", + "smmap==5.0.2", + "sqlalchemy==2.0.38", + "sqlparse==0.5.3", + "stack-data==0.6.3", + "stevedore==5.4.1", + "termcolor==2.5.0", + "threadpoolctl==3.5.0", + "tomlkit==0.13.2", + "tornado==6.4.2", + "tqdm==4.67.1", + "traitlets==5.14.3", + "trove-classifiers==2025.3.3.18", + "typeguard==4.4.2", + "types-pytz==2025.1.0.20250204", + "typing-extensions==4.12.2", + "typing-inspect==0.9.0", + "tzdata==2025.1", + "urllib3==2.3.0", + "virtualenv==20.29.2", + "waitress==3.0.2", + "wcwidth==0.2.13", + "werkzeug==3.1.3", + "win32-setctime==1.2.0", + "wrapt==1.17.2", + "zipp==3.21.0" + ] +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7150955 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,164 @@ +# This file was autogenerated by uv via the following command: +# uv export --format=requirements-txt --no-dev --no-hashes --no-editable --no-emit-project --output-file=requirements.txt +alembic==1.14.1 +annotated-types==0.7.0 +antlr4-python3-runtime==4.9.3 +appnope==0.1.4 ; platform_system == 'Darwin' +argcomplete==3.5.3 +asttokens==3.0.0 +attrs==25.1.0 +bandit==1.8.3 +blinker==1.9.0 +cachetools==5.5.2 +certifi==2025.1.31 +cffi==1.17.1 ; implementation_name == 'pypy' +cfgv==3.4.0 +charset-normalizer==3.4.1 +click==8.1.8 +cloudpickle==3.1.1 +colorama==0.4.6 +comm==0.2.2 +commitizen==4.4.1 +contourpy==1.3.1 +coverage==7.6.12 +cycler==0.12.1 +databricks-sdk==0.44.1 +debugpy==1.8.12 +decli==0.6.2 +decorator==5.2.1 +deprecated==1.2.18 +distlib==0.3.9 +docker==7.1.0 +execnet==2.1.1 +executing==2.2.0 +fastjsonschema==2.21.1 +filelock==3.17.0 +flask==3.1.0 +fonttools==4.56.0 +gitdb==4.0.12 +gitpython==3.1.44 +google-auth==2.38.0 +graphene==3.4.3 +graphql-core==3.2.6 +graphql-relay==3.2.0 +greenlet==3.1.1 ; (python_full_version == '3.13.*' and platform_machine == 'AMD64') or (python_full_version == '3.13.*' and platform_machine == 'WIN32') or (python_full_version == '3.13.*' and platform_machine == 'aarch64') or (python_full_version == '3.13.*' and platform_machine == 'amd64') or (python_full_version == '3.13.*' and platform_machine == 'ppc64le') or (python_full_version == '3.13.*' and platform_machine == 'win32') or (python_full_version == '3.13.*' and platform_machine == 'x86_64') +gunicorn==23.0.0 ; platform_system != 'Windows' +hatchling==1.27.0 +identify==2.6.8 +idna==3.10 +importlib-metadata==8.6.1 +iniconfig==2.0.0 +ipykernel==6.29.5 +ipython==9.0.0 +ipython-pygments-lexers==1.1.1 +itsdangerous==2.2.0 +jedi==0.19.2 +jinja2==3.1.5 +joblib==1.4.2 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jupyter-client==8.6.3 +jupyter-core==5.7.2 +kiwisolver==1.4.8 +llvmlite==0.44.0 +loguru==0.7.3 +mako==1.3.9 +markdown==3.7 +markdown-it-py==3.0.0 +markupsafe==3.0.2 +matplotlib==3.10.1 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +mlflow==2.20.3 +mlflow-skinny==2.20.3 +mypy==1.15.0 +mypy-extensions==1.0.0 +narwhals==1.28.0 +nbformat==5.10.4 +nest-asyncio==1.6.0 +nodeenv==1.9.1 +numba==0.61.0 +numpy==2.1.3 +nvidia-ml-py==12.570.86 +omegaconf==2.3.0 +opentelemetry-api==1.16.0 +opentelemetry-sdk==1.16.0 +opentelemetry-semantic-conventions==0.37b0 +packaging==24.2 +pandas==2.2.3 +pandas-stubs==2.2.3.241126 +pandera==0.23.0 +parso==0.8.4 +pathspec==0.12.1 +pbr==6.1.1 +pdoc==15.0.1 +pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32' +pillow==11.1.0 +platformdirs==4.3.6 +plotly==6.0.0 +pluggy==1.5.0 +plyer==2.1.0 +pre-commit==4.1.0 +prompt-toolkit==3.0.50 +protobuf==5.29.3 +psutil==7.0.0 +ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32' +pure-eval==0.2.3 +pyarrow==19.0.1 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 +pycparser==2.22 ; implementation_name == 'pypy' +pydantic==2.10.6 +pydantic-core==2.27.2 +pydantic-settings==2.8.1 +pygments==2.19.1 +pynvml==12.0.0 +pyparsing==3.2.1 +pytest==8.3.5 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +pytest-xdist==3.6.1 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2025.1 +pywin32==308 ; sys_platform == 'win32' +pyyaml==6.0.2 +pyzmq==26.2.1 +questionary==2.1.0 +referencing==0.36.2 +requests==2.32.3 +rich==13.9.4 +rpds-py==0.23.1 +rsa==4.9 +ruff==0.9.9 +scikit-learn==1.6.1 +scipy==1.15.2 +setuptools==75.8.2 +shap==0.46.0 +six==1.17.0 +slicer==0.0.8 +smmap==5.0.2 +sqlalchemy==2.0.38 +sqlparse==0.5.3 +stack-data==0.6.3 +stevedore==5.4.1 +termcolor==2.5.0 +threadpoolctl==3.5.0 +tomlkit==0.13.2 +tornado==6.4.2 +tqdm==4.67.1 +traitlets==5.14.3 +trove-classifiers==2025.3.3.18 +typeguard==4.4.2 +types-pytz==2025.1.0.20250204 +typing-extensions==4.12.2 +typing-inspect==0.9.0 +tzdata==2025.1 +urllib3==2.3.0 +virtualenv==20.29.2 +waitress==3.0.2 ; platform_system == 'Windows' +wcwidth==0.2.13 +werkzeug==3.1.3 +win32-setctime==1.2.0 ; sys_platform == 'win32' +wrapt==1.17.2 +zipp==3.21.0 diff --git a/src/bikes/io/datasets.py b/src/bikes/io/datasets.py index 677c654..6741576 100644 --- a/src/bikes/io/datasets.py +++ b/src/bikes/io/datasets.py @@ -74,7 +74,7 @@ class ParquetReader(Reader): @T.override def read(self) -> pd.DataFrame: # can't limit rows at read time - data = pd.read_parquet(self.path, dtype_backend="pyarrow") + data = pd.read_parquet(self.path, dtype_backend=self.backend) if self.limit is not None: data = data.head(self.limit) return data