From bf5137a41f646e1f2ec54d07686965b9fb5dc243 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 18 Jan 2024 10:44:46 -0800
Subject: [PATCH 1/5] ensuring boolean columns get cast to float for
 statsmodels

---
 ISLP/models/model_spec.py            | 12 ++++++++----
 tests/models/test_boolean_columns.py | 23 +++++++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 tests/models/test_boolean_columns.py

diff --git a/ISLP/models/model_spec.py b/ISLP/models/model_spec.py
index 07a8b88..983a85d 100644
--- a/ISLP/models/model_spec.py
+++ b/ISLP/models/model_spec.py
@@ -107,7 +107,6 @@ def fit(self, X, y=None):
         cats = self.encoder_.categories_[0]
         column_names = [str(n) for n in cats]
 
-
         if isinstance(X, pd.DataFrame): # expecting a column, we take .iloc[:,0]
             X = X.iloc[:,0]
 
@@ -635,18 +634,23 @@ def build_model(column_info,
         if isinstance(X, (pd.Series, pd.DataFrame)):
             df = pd.concat(dfs, axis=1)
             df.index = X.index
-            return df
         else:
-            return np.column_stack(dfs)
+            return np.column_stack(dfs).astype(float)
     else:  # return a 0 design
         zero = np.zeros(X.shape[0])
         if isinstance(X, (pd.Series, pd.DataFrame)):
             df = pd.DataFrame({'zero': zero})
             df.index = X.index
-            return df
         else:
             return zero
 
+    # if we reach here, we will be returning a DataFrame
+
+    for col in df.columns:
+        if df[col].dtype == bool:
+            df[col] = df[col].astype(float)
+    return df
+
 def derived_feature(variables, encoder=None, name=None, use_transform=True):
     """
     Create a Feature, optionally
diff --git a/tests/models/test_boolean_columns.py b/tests/models/test_boolean_columns.py
new file mode 100644
index 0000000..7b5a429
--- /dev/null
+++ b/tests/models/test_boolean_columns.py
@@ -0,0 +1,23 @@
+import pandas as pd
+import statsmodels.api as sm
+import numpy as np
+from itertools import combinations
+
+from ISLP.models import ModelSpec as MS
+
+rng = np.random.default_rng(0)
+
+df = pd.DataFrame({'A':rng.standard_normal(10),
+                  'B':np.array([1,2,3,2,1,1,1,3,2,1], int),
+                  'C':np.array([True,False,False,True,True]*2, bool),
+                  'D':rng.standard_normal(10)})
+Y = rng.standard_normal(10)
+
+def test_all():
+
+    for i in range(1, 5):
+        for comb in combinations(['A','B','C','D'], i):
+
+            X = MS(comb).fit_transform(df)
+            sm.OLS(Y, X).fit() 
+

From 88316a6a1a0a3660ff0be308ca46d643e5e8da9f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 18 Jan 2024 10:46:02 -0800
Subject: [PATCH 2/5] editing setup files, cleanup

---
 ISLP/info.py   | 132 ++++++++++++++++++++++---------------------------
 pyproject.toml |   6 ++-
 setup.cfg      |   3 ++
 setup.py       |  36 ++++++--------
 4 files changed, 81 insertions(+), 96 deletions(-)

diff --git a/ISLP/info.py b/ISLP/info.py
index 870c1e9..df1505c 100644
--- a/ISLP/info.py
+++ b/ISLP/info.py
@@ -3,81 +3,67 @@
 In setup.py in particular, we exec this file, so it cannot import regreg
 """
 
-# regreg version information.  An empty _version_extra corresponds to a
-# full release.  '.dev' as a _version_extra string means this is a development
-# version
-_version_major = 0
-_version_minor = 2
-_version_micro = 0
-_version_extra = ''
+# CLASSIFIERS = ["Development Status :: 3 - Alpha",
+#                "Environment :: Console",
+#                "Intended Audience :: Science/Research",
+#                "License :: OSI Approved :: BSD License",
+#                "Operating System :: OS Independent",
+#                "Programming Language :: Python",
+#                "Topic :: Scientific/Engineering"]
 
-# Format expected by setup.py and doc/source/conf.py: string of form "X.Y.Z"
-__version__ = "%s.%s.%s%s" % (_version_major,
-                              _version_minor,
-                              _version_micro,
-                              _version_extra)
+# description  = 'Library for ISLP labs'
 
-CLASSIFIERS = ["Development Status :: 3 - Alpha",
-               "Environment :: Console",
-               "Intended Audience :: Science/Research",
-               "License :: OSI Approved :: BSD License",
-               "Operating System :: OS Independent",
-               "Programming Language :: Python",
-               "Topic :: Scientific/Engineering"]
+# # # Note: this long_description is actually a copy/paste from the top-level
+# # # README.txt, so that it shows up nicely on PyPI.  So please remember to edit
+# # # it only in one place and sync it correctly.
+# # long_description = \
+# # """
+# # ============
+# # Fixed lambda
+# # ============
 
-description  = 'Library for ISLP labs'
+# # This mini-package contains a module to perform
+# # a fixed lambda test for the LASSO.
+# # """
 
-# Note: this long_description is actually a copy/paste from the top-level
-# README.txt, so that it shows up nicely on PyPI.  So please remember to edit
-# it only in one place and sync it correctly.
-long_description = \
-"""
-============
-Fixed lambda
-============
-
-This mini-package contains a module to perform
-a fixed lambda test for the LASSO.
-"""
-
-# versions
-NUMPY_MIN_VERSION='1.7.1'
-SCIPY_MIN_VERSION = '0.9'
-PANDAS_MIN_VERSION = "0.20"
-PANDAS_MAX_VERSION = "1.9"
-SKLEARN_MIN_VERSION = '1.2'
-STATSMODELS_MIN_VERSION = '0.13'
-MATPLOTLIB_MIN_VERSION = '3.3.3'
+# # # versions
+# # NUMPY_MIN_VERSION='1.7.1'
+# # SCIPY_MIN_VERSION = '0.9'
+# # PANDAS_MIN_VERSION = "0.20"
+# # PANDAS_MAX_VERSION = "1.9"
+# # SKLEARN_MIN_VERSION = '1.2'
+# # STATSMODELS_MIN_VERSION = '0.13'
+# # MATPLOTLIB_MIN_VERSION = '3.3.3'
 
-NAME                = 'ISLP'
-MAINTAINER          = "Jonathan Taylor"
-MAINTAINER_EMAIL    = ""
-DESCRIPTION         = description
-LONG_DESCRIPTION    = long_description
-URL                 = "http://github.org/intro-stat-learning/ISLP"
-DOWNLOAD_URL        = ""
-LICENSE             = "BSD license"
-CLASSIFIERS         = CLASSIFIERS
-AUTHOR              = "ISLP authors"
-AUTHOR_EMAIL        = ""
-PLATFORMS           = "OS Independent"
-MAJOR               = _version_major
-MINOR               = _version_minor
-MICRO               = _version_micro
-ISRELEASE           = _version_extra == ''
-VERSION             = __version__
-STATUS              = 'alpha'
-PROVIDES            = []
-REQUIRES            = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
-                       "scipy (>=%s)" % SCIPY_MIN_VERSION,
-                       "statsmodels (>=%s)" % STATSMODELS_MIN_VERSION,
-                       "pandas (>=%s)" % PANDAS_MIN_VERSION,
-                       "pandas (<=%s)" % PANDAS_MAX_VERSION,
-                       "sklearn (>=%s)" % SKLEARN_MIN_VERSION,
-                       "lifelines",
-                       "joblib",
-                       "pygam",
-                       "torch",
-                       "torchmetrics",
-                       "pytorch_lightning"
-                       ]
+# NAME                = 'ISLP'
+# MAINTAINER          = "Jonathan Taylor"
+# MAINTAINER_EMAIL    = "jonathan.taylor@stanford.edu"
+# DESCRIPTION         = description
+# LONG_DESCRIPTION    = long_description
+# URL                 = "http://github.org/intro-stat-learning/ISLP"
+# DOWNLOAD_URL        = "https://pypi.org/project/ISLP/"
+# LICENSE             = "BSD license"
+# CLASSIFIERS         = CLASSIFIERS
+# AUTHOR              = "ISLP authors"
+# AUTHOR_EMAIL        = ""
+# PLATFORMS           = "OS Independent"
+# MAJOR               = _version_major
+# MINOR               = _version_minor
+# MICRO               = _version_micro
+# ISRELEASE           = _version_extra == ''
+# VERSION             = __version__
+# STATUS              = 'alpha'
+# PROVIDES            = []
+# REQUIRES            = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
+#                        "scipy (>=%s)" % SCIPY_MIN_VERSION,
+#                        "statsmodels (>=%s)" % STATSMODELS_MIN_VERSION,
+#                        "pandas (>=%s)" % PANDAS_MIN_VERSION,
+#                        "pandas (<=%s)" % PANDAS_MAX_VERSION,
+#                        "sklearn (>=%s)" % SKLEARN_MIN_VERSION,
+#                        "lifelines",
+#                        "joblib",
+#                        "pygam",
+#                        "torch",
+#                        "torchmetrics",
+#                        "pytorch_lightning"
+#                        ]
diff --git a/pyproject.toml b/pyproject.toml
index b94fdf7..e0359ad 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ISLP"
-version = "0.3.21"
+version = "0.3.22"
 dependencies = ["numpy>=1.7.1,<1.25", # max version for numba
                "scipy>=0.9",
                "pandas>=0.20,<=1.9",
@@ -37,6 +37,7 @@ classifiers = ["Development Status :: 3 - Alpha",
                "Programming Language :: Python",
                "Topic :: Scientific/Engineering"
 	       ]
+
 [project.urls]  # Optional
 "Homepage" = "https://github.com/intro-stat-learning/ISLP"
 "Bug Reports" = "https://github.com/intro-stat-learning/ISLP/issues"
@@ -44,6 +45,9 @@ classifiers = ["Development Status :: 3 - Alpha",
 "Say Thanks!" = "http://saythanks.io/to/example"
 "Source" = "https://github.com/pypa/sampleproject/"
   
+[project.optional-dependencies]
+doc = ['Sphinx>=3.0']
+
 [build-system]
 requires = ["setuptools>=42",
             "wheel",
diff --git a/setup.cfg b/setup.cfg
index 14d7ccd..4a6f58e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -4,3 +4,6 @@ style = pep440
 versionfile_source = ISLP/_version.py
 tag_prefix =
 parentdir_prefix = ISLP-
+
+[metadata]
+license_files = LICENSE.txt
\ No newline at end of file
diff --git a/setup.py b/setup.py
index c30a6d7..19a91a3 100755
--- a/setup.py
+++ b/setup.py
@@ -55,14 +55,6 @@ def read_vars_from(ver_file):
 # is missing.  Otherwise the monkeypatched Extension will change .pyx
 # filenames to .c filenames, and we probably don't have the .c files.
 sys.path.insert(0, pjoin(dirname(__file__), 'fake_pyrex'))
-# Set setuptools extra arguments
-extra_setuptools_args = dict(
-    tests_require=['nose'],
-    test_suite='nose.collector',
-    zip_safe=False,
-    extras_require = dict(
-        doc=['Sphinx>=1.0'],
-        test=['nose>=0.10.1']))
 
 # Define extensions
 EXTS = []
@@ -74,20 +66,20 @@ def read_vars_from(ver_file):
 long_description = open('README.md', 'rt', encoding='utf-8').read()
 
 def main(**extra_args):
-    setup(name=info.NAME,
-          maintainer=info.MAINTAINER,
-          maintainer_email=info.MAINTAINER_EMAIL,
-          description=info.DESCRIPTION,
-          url=info.URL,
-          download_url=info.DOWNLOAD_URL,
-          license=info.LICENSE,
-          classifiers=info.CLASSIFIERS,
-          author=info.AUTHOR,
-          author_email=info.AUTHOR_EMAIL,
-          platforms=info.PLATFORMS,
+    setup(#name=info.NAME,
+          #maintainer=info.MAINTAINER,
+          #maintainer_email=info.MAINTAINER_EMAIL,
+          #description=info.DESCRIPTION,
+          #url=info.URL,
+          #download_url=info.DOWNLOAD_URL,
+          #license=info.LICENSE,
+          #classifiers=info.CLASSIFIERS,
+          #author=info.AUTHOR,
+          #author_email=info.AUTHOR_EMAIL,
+          #platforms=info.PLATFORMS,
           version=versioneer.get_version(),
-          requires=info.REQUIRES,
-          provides=info.PROVIDES,
+          #requires=info.REQUIRES,
+          #provides=info.PROVIDES,
           packages     = ['ISLP',
                           'ISLP.models',
                           'ISLP.models',
@@ -108,4 +100,4 @@ def main(**extra_args):
 #simple way to test what setup will do
 #python setup.py install --prefix=/tmp
 if __name__ == "__main__":
-    main(**extra_setuptools_args)
+    main()

From 4366af94681a84db7e26d1670462ed202d8ddd3a Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 18 Jan 2024 11:06:20 -0800
Subject: [PATCH 3/5] commenting out info

---
 setup.py | 85 ++++++++++++++++++++++++--------------------------------
 1 file changed, 36 insertions(+), 49 deletions(-)

diff --git a/setup.py b/setup.py
index 19a91a3..8fba84c 100755
--- a/setup.py
+++ b/setup.py
@@ -20,41 +20,41 @@
 from distutils.core import setup
 from distutils.extension import Extension
 
-# Get various parameters for this version, stored in ISLP/info.py
-
-class Bunch(object):
-    def __init__(self, vars):
-        for key, name in vars.items():
-            if key.startswith('__'):
-                continue
-            self.__dict__[key] = name
-
-def read_vars_from(ver_file):
-    """ Read variables from Python text file
-
-    Parameters
-    ----------
-    ver_file : str
-        Filename of file to read
-
-    Returns
-    -------
-    info_vars : Bunch instance
-        Bunch object where variables read from `ver_file` appear as
-        attributes
-    """
-    # Use exec for compabibility with Python 3
-    ns = {}
-    with open(ver_file, 'rt') as fobj:
-        exec(fobj.read(), ns)
-    return Bunch(ns)
-
-info = read_vars_from(pjoin('ISLP', 'info.py'))
-
-# Try to preempt setuptools monkeypatching of Extension handling when Pyrex
-# is missing.  Otherwise the monkeypatched Extension will change .pyx
-# filenames to .c filenames, and we probably don't have the .c files.
-sys.path.insert(0, pjoin(dirname(__file__), 'fake_pyrex'))
+# # Get various parameters for this version, stored in ISLP/info.py
+
+# class Bunch(object):
+#     def __init__(self, vars):
+#         for key, name in vars.items():
+#             if key.startswith('__'):
+#                 continue
+#             self.__dict__[key] = name
+
+# def read_vars_from(ver_file):
+#     """ Read variables from Python text file
+
+#     Parameters
+#     ----------
+#     ver_file : str
+#         Filename of file to read
+
+#     Returns
+#     -------
+#     info_vars : Bunch instance
+#         Bunch object where variables read from `ver_file` appear as
+#         attributes
+#     """
+#     # Use exec for compabibility with Python 3
+#     ns = {}
+#     with open(ver_file, 'rt') as fobj:
+#         exec(fobj.read(), ns)
+#     return Bunch(ns)
+
+# info = read_vars_from(pjoin('ISLP', 'info.py'))
+
+# # Try to preempt setuptools monkeypatching of Extension handling when Pyrex
+# # is missing.  Otherwise the monkeypatched Extension will change .pyx
+# # filenames to .c filenames, and we probably don't have the .c files.
+# sys.path.insert(0, pjoin(dirname(__file__), 'fake_pyrex'))
 
 # Define extensions
 EXTS = []
@@ -66,20 +66,7 @@ def read_vars_from(ver_file):
 long_description = open('README.md', 'rt', encoding='utf-8').read()
 
 def main(**extra_args):
-    setup(#name=info.NAME,
-          #maintainer=info.MAINTAINER,
-          #maintainer_email=info.MAINTAINER_EMAIL,
-          #description=info.DESCRIPTION,
-          #url=info.URL,
-          #download_url=info.DOWNLOAD_URL,
-          #license=info.LICENSE,
-          #classifiers=info.CLASSIFIERS,
-          #author=info.AUTHOR,
-          #author_email=info.AUTHOR_EMAIL,
-          #platforms=info.PLATFORMS,
-          version=versioneer.get_version(),
-          #requires=info.REQUIRES,
-          #provides=info.PROVIDES,
+    setup(version=versioneer.get_version(),
           packages     = ['ISLP',
                           'ISLP.models',
                           'ISLP.models',

From 4d5eb34ce18f7b811a2e092d13ce51af2a399164 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 18 Jan 2024 11:07:49 -0800
Subject: [PATCH 4/5] cleanup of setup.py

---
 ISLP/info.py | 69 ----------------------------------------------------
 setup.py     | 37 +---------------------------
 2 files changed, 1 insertion(+), 105 deletions(-)
 delete mode 100644 ISLP/info.py

diff --git a/ISLP/info.py b/ISLP/info.py
deleted file mode 100644
index df1505c..0000000
--- a/ISLP/info.py
+++ /dev/null
@@ -1,69 +0,0 @@
-""" This file contains defines parameters for regreg that we use to fill
-settings in setup.py, the regreg top-level docstring, and for building the docs.
-In setup.py in particular, we exec this file, so it cannot import regreg
-"""
-
-# CLASSIFIERS = ["Development Status :: 3 - Alpha",
-#                "Environment :: Console",
-#                "Intended Audience :: Science/Research",
-#                "License :: OSI Approved :: BSD License",
-#                "Operating System :: OS Independent",
-#                "Programming Language :: Python",
-#                "Topic :: Scientific/Engineering"]
-
-# description  = 'Library for ISLP labs'
-
-# # # Note: this long_description is actually a copy/paste from the top-level
-# # # README.txt, so that it shows up nicely on PyPI.  So please remember to edit
-# # # it only in one place and sync it correctly.
-# # long_description = \
-# # """
-# # ============
-# # Fixed lambda
-# # ============
-
-# # This mini-package contains a module to perform
-# # a fixed lambda test for the LASSO.
-# # """
-
-# # # versions
-# # NUMPY_MIN_VERSION='1.7.1'
-# # SCIPY_MIN_VERSION = '0.9'
-# # PANDAS_MIN_VERSION = "0.20"
-# # PANDAS_MAX_VERSION = "1.9"
-# # SKLEARN_MIN_VERSION = '1.2'
-# # STATSMODELS_MIN_VERSION = '0.13'
-# # MATPLOTLIB_MIN_VERSION = '3.3.3'
-
-# NAME                = 'ISLP'
-# MAINTAINER          = "Jonathan Taylor"
-# MAINTAINER_EMAIL    = "jonathan.taylor@stanford.edu"
-# DESCRIPTION         = description
-# LONG_DESCRIPTION    = long_description
-# URL                 = "http://github.org/intro-stat-learning/ISLP"
-# DOWNLOAD_URL        = "https://pypi.org/project/ISLP/"
-# LICENSE             = "BSD license"
-# CLASSIFIERS         = CLASSIFIERS
-# AUTHOR              = "ISLP authors"
-# AUTHOR_EMAIL        = ""
-# PLATFORMS           = "OS Independent"
-# MAJOR               = _version_major
-# MINOR               = _version_minor
-# MICRO               = _version_micro
-# ISRELEASE           = _version_extra == ''
-# VERSION             = __version__
-# STATUS              = 'alpha'
-# PROVIDES            = []
-# REQUIRES            = ["numpy (>=%s)" % NUMPY_MIN_VERSION,
-#                        "scipy (>=%s)" % SCIPY_MIN_VERSION,
-#                        "statsmodels (>=%s)" % STATSMODELS_MIN_VERSION,
-#                        "pandas (>=%s)" % PANDAS_MIN_VERSION,
-#                        "pandas (<=%s)" % PANDAS_MAX_VERSION,
-#                        "sklearn (>=%s)" % SKLEARN_MIN_VERSION,
-#                        "lifelines",
-#                        "joblib",
-#                        "pygam",
-#                        "torch",
-#                        "torchmetrics",
-#                        "pytorch_lightning"
-#                        ]
diff --git a/setup.py b/setup.py
index 8fba84c..4834240 100755
--- a/setup.py
+++ b/setup.py
@@ -20,46 +20,11 @@
 from distutils.core import setup
 from distutils.extension import Extension
 
-# # Get various parameters for this version, stored in ISLP/info.py
-
-# class Bunch(object):
-#     def __init__(self, vars):
-#         for key, name in vars.items():
-#             if key.startswith('__'):
-#                 continue
-#             self.__dict__[key] = name
-
-# def read_vars_from(ver_file):
-#     """ Read variables from Python text file
-
-#     Parameters
-#     ----------
-#     ver_file : str
-#         Filename of file to read
-
-#     Returns
-#     -------
-#     info_vars : Bunch instance
-#         Bunch object where variables read from `ver_file` appear as
-#         attributes
-#     """
-#     # Use exec for compabibility with Python 3
-#     ns = {}
-#     with open(ver_file, 'rt') as fobj:
-#         exec(fobj.read(), ns)
-#     return Bunch(ns)
-
-# info = read_vars_from(pjoin('ISLP', 'info.py'))
-
-# # Try to preempt setuptools monkeypatching of Extension handling when Pyrex
-# # is missing.  Otherwise the monkeypatched Extension will change .pyx
-# # filenames to .c filenames, and we probably don't have the .c files.
-# sys.path.insert(0, pjoin(dirname(__file__), 'fake_pyrex'))
 
 # Define extensions
 EXTS = []
 
-cmdclass=versioneer.get_cmdclass()
+cmdclass = versioneer.get_cmdclass()
 
 # get long_description
 

From 6f718c89883c89357b60e1032fa8cc00817fafbe Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Thu, 18 Jan 2024 11:42:06 -0800
Subject: [PATCH 5/5] use iloc for float conversion in case there are duplicate
 columnes

---
 ISLP/models/model_spec.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/ISLP/models/model_spec.py b/ISLP/models/model_spec.py
index 983a85d..ce09e01 100644
--- a/ISLP/models/model_spec.py
+++ b/ISLP/models/model_spec.py
@@ -632,7 +632,7 @@ def build_model(column_info,
 
     if len(dfs):
         if isinstance(X, (pd.Series, pd.DataFrame)):
-            df = pd.concat(dfs, axis=1)
+            df = pd.concat(dfs, axis='columns')
             df.index = X.index
         else:
             return np.column_stack(dfs).astype(float)
@@ -645,10 +645,11 @@ def build_model(column_info,
             return zero
 
     # if we reach here, we will be returning a DataFrame
-
-    for col in df.columns:
-        if df[col].dtype == bool:
-            df[col] = df[col].astype(float)
+    # make sure all columns are floats
+    
+    for i, _ in enumerate(df.columns):
+        if df.iloc[:,i].dtype == bool:
+            df.iloc[:,i] = df.iloc[:,i].astype(float)
     return df
 
 def derived_feature(variables, encoder=None, name=None, use_transform=True):