From 53db9cd0c2c39bb9bc48fb3af986042d20c2ef67 Mon Sep 17 00:00:00 2001 From: Andrea Catelli Date: Sat, 11 Jan 2025 20:35:06 +0100 Subject: [PATCH 1/3] Updating README.md as first commit --- README.md | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/README.md b/README.md index 4e72dbe..960f73c 100644 --- a/README.md +++ b/README.md @@ -1 +1,33 @@ # PorQua + +**PorQua** is an advanced Python library designed for portfolio optimization and index replication, part of the [GeomScale project](https://github.com/GeomScale). The library includes tools and models tailored for financial data analysis, providing efficient solutions for portfolio management and asset selection. + +## Features + +- **Portfolio Optimization**: Implement algorithms for creating efficient and optimal portfolios. +- **Index Replication**: Build portfolios to closely track specific indices. +- **Machine Learning Models**: Use data-driven models for financial analysis, asset selection, and universe selection. + +## Installation + +Clone the repository: + +```sh +git clone https://github.com/GeomScale/PorQua.git +cd PorQua +``` + + +## Example Notebooks + +- [example/backtest.ipynb](example/backtest.ipynb): Example of running a backtest. +- [example/compare_solver.ipynb](example/compare_solver.ipynb): Example of comparing solvers. +- [example/index_replication.ipynb](example/index_replication.ipynb): Example of index replication. +- [example/lstm.ipynb](example/lstm.ipynb): Example of using LSTM for universe selection. +- [example/ml.ipynb](example/ml.ipynb): Example of machine learning models. +- [example/ordinal_regression.ipynb](example/ordinal_regression.ipynb): Example of ordinal regression. + + +## License + +This project is licensed under the GNU LGPL.3 License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file From d9c3f41dac190f4c5282a0af9b8c626837cc2d4e Mon Sep 17 00:00:00 2001 From: Andrea Catelli Date: Fri, 14 Feb 2025 14:08:29 +0100 Subject: [PATCH 2/3] Added docstrings and improved documentation. Related to #6 --- src/backtest.py | 229 +++++++++++++++------- src/builders.py | 360 +++++++++++++++++++++++----------- src/constraints.py | 189 +++++++++++++++++- src/covariance.py | 141 ++++++++++++-- src/data_loader.py | 91 +++++++-- src/helper_functions.py | 162 +++++++++++++--- src/mean_estimation.py | 67 +++++-- src/optimization.py | 405 +++++++++++++++++++++++++++++++++------ src/optimization_data.py | 69 ++++++- src/portfolio.py | 340 ++++++++++++++++++++++++++++---- src/qp_problems.py | 262 +++++++++++++++++-------- src/selection.py | 146 ++++++++++---- 12 files changed, 1988 insertions(+), 473 deletions(-) diff --git a/src/backtest.py b/src/backtest.py index 8855473..cc44761 100644 --- a/src/backtest.py +++ b/src/backtest.py @@ -8,14 +8,10 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - ############################################################################ ### CLASSES BacktestData, BacktestService, Backtest ############################################################################ - - - import os from typing import Optional import pickle @@ -30,16 +26,38 @@ from builders import SelectionItemBuilder, OptimizationItemBuilder +class BacktestData: + """ + Represents the data required for backtesting. + This class acts as a container for any data-related requirements for backtesting. - -class BacktestData(): + Attributes + ---------- + None + """ def __init__(self): pass -class BacktestService(): +class BacktestService: + """ + Manages backtesting services, including selection, optimization, and settings. + + Attributes + ---------- + data : BacktestData + The backtest data. + selection_item_builders : dict[str, SelectionItemBuilder] + Builders for selection items. + optimization_item_builders : dict[str, OptimizationItemBuilder] + Builders for optimization items. + optimization : Optional[Optimization] + The optimization instance. Defaults to `EmptyOptimization`. + settings : Optional[dict] + Additional settings for the backtest. + """ def __init__(self, data: BacktestData, @@ -48,16 +66,34 @@ def __init__(self, optimization: Optional[Optimization] = EmptyOptimization(), settings: Optional[dict] = None, **kwargs) -> None: + """ + Initializes the BacktestService class. + + Parameters + ---------- + data : BacktestData + The backtest data. + selection_item_builders : dict + Dictionary of selection item builders. + optimization_item_builders : dict + Dictionary of optimization item builders. + optimization : Optional[Optimization], optional + Optimization instance, by default EmptyOptimization(). + settings : Optional[dict], optional + Additional settings, by default None. + **kwargs : + Additional settings. + """ self.data = data self.optimization = optimization self.selection_item_builders = selection_item_builders self.optimization_item_builders = optimization_item_builders self.settings = settings if settings is not None else {} self.settings.update(kwargs) - # Initialize the selection and optimization data self.selection = Selection() self.optimization_data = OptimizationData([]) + @property def data(self): return self._data @@ -126,33 +162,66 @@ def settings(self, value): raise TypeError("Expected a dictionary for 'settings'") self._settings = value + + def build_selection(self, rebdate: str) -> None: - # Loop over the selection_item_builders items + """ + Builds the selection process for a given rebalancing date. + + Parameters + ---------- + rebdate : str + The rebalancing date. + """ for key, item_builder in self.selection_item_builders.items(): item_builder.arguments['item_name'] = key item_builder(self, rebdate) return None def build_optimization(self, rebdate: str) -> None: - - # Initialize the optimization constraints - self.optimization.constraints = Constraints(selection = self.selection.selected) - - # Loop over the optimization_item_builders items + """ + Builds the optimization problem for a given rebalancing date. + + Parameters + ---------- + rebdate : str + The rebalancing date. + """ + self.optimization.constraints = Constraints(selection=self.selection.selected) for item_builder in self.optimization_item_builders.values(): item_builder(self, rebdate) return None def prepare_rebalancing(self, rebalancing_date: str) -> None: - self.build_selection(rebdate = rebalancing_date) - self.build_optimization(rebdate = rebalancing_date) + """ + Prepares the selection and optimization for a rebalancing date. + + Parameters + ---------- + rebalancing_date : str + The rebalancing date. + """ + self.build_selection(rebdate=rebalancing_date) + self.build_optimization(rebdate=rebalancing_date) return None - class Backtest: + """ + Performs portfolio backtesting, including strategy building and output storage. + + Attributes + ---------- + strategy : Strategy + The backtesting strategy. + output : dict + The backtesting output. + """ def __init__(self) -> None: + """ + Initializes the Backtest class. + """ self._strategy = Strategy([]) self._output = {} @@ -163,81 +232,93 @@ def strategy(self): @property def output(self): return self._output - - def append_output(self, - date_key = None, - output_key = None, - value = None): + + + def append_output(self, date_key=None, output_key=None, value=None): + """ + Appends output data for a specific date and output key. + + Parameters + ---------- + date_key : str, optional + The date key for the output. + output_key : str, optional + The output key. + value : any, optional + The value to append. + """ if value is None: return True - if date_key in self.output.keys(): if output_key in self.output[date_key].keys(): - raise Warning(f"Output key '{output_key}' for date key '{date_key}' \ - already exists and will be overwritten.") + raise Warning(f"Output key '{output_key}' for date key '{date_key}' already exists and will be overwritten.") self.output[date_key][output_key] = value else: self.output[date_key] = {} self.output[date_key].update({output_key: value}) - return True - def rebalance(self, - bs: BacktestService, - rebalancing_date: str) -> None: - - # Prepare the rebalancing, i.e., the optimization problem - bs.prepare_rebalancing(rebalancing_date = rebalancing_date) - - # Solve the optimization problem + def rebalance(self, bs: BacktestService, rebalancing_date: str) -> None: + """ + Performs portfolio rebalancing for a given date. + + Parameters + ---------- + bs : BacktestService + The backtesting service instance. + rebalancing_date : str + The rebalancing date. + """ + bs.prepare_rebalancing(rebalancing_date=rebalancing_date) try: - bs.optimization.set_objective(optimization_data = bs.optimization_data) + bs.optimization.set_objective(optimization_data=bs.optimization_data) bs.optimization.solve() except Exception as error: raise RuntimeError(error) - return None def run(self, bs: BacktestService) -> None: - + """ + Executes the backtest for all rebalancing dates. + + Parameters + ---------- + bs : BacktestService + The backtesting service instance. + """ for rebalancing_date in bs.settings['rebdates']: - if not bs.settings.get('quiet'): print(f'Rebalancing date: {rebalancing_date}') - - self.rebalance(bs = bs, - rebalancing_date = rebalancing_date) - - # Append portfolio to strategy + self.rebalance(bs=bs, rebalancing_date=rebalancing_date) weights = bs.optimization.results['weights'] - portfolio = Portfolio(rebalancing_date = rebalancing_date, weights = weights) + portfolio = Portfolio(rebalancing_date=rebalancing_date, weights=weights) self.strategy.portfolios.append(portfolio) - - # Append stuff to output if a custom append function is provided append_fun = bs.settings.get('append_fun') if append_fun is not None: - append_fun(backtest = self, - bs = bs, - rebalancing_date = rebalancing_date, - what = bs.settings.get('append_fun_args')) - + append_fun(backtest=self, bs=bs, rebalancing_date=rebalancing_date, what=bs.settings.get('append_fun_args')) return None - def save(self, - filename: str, - path: Optional[str] = None) -> None: + def save(self, filename: str, path: Optional[str] = None) -> None: + """ + Saves the backtest object to a file. + + Parameters + ---------- + filename : str + The filename for the output file. + path : Optional[str], optional + The path where the file should be saved. + """ try: if path is not None and filename is not None: - filename = os.path.join(path, filename) #// alternatively, use pathlib package + filename = os.path.join(path, filename) with open(filename, "wb") as f: pickle.dump(self, f, protocol=pickle.HIGHEST_PROTOCOL) except Exception as ex: print("Error during pickling object:", ex) - return None - # -------------------------------------------------------------------------- # Helper functions # -------------------------------------------------------------------------- @@ -246,25 +327,39 @@ def append_custom(backtest: Backtest, bs: BacktestService, rebalancing_date: Optional[str] = None, what: Optional[list] = None) -> None: - + """ + Appends custom data to the backtest output. + + Parameters + ---------- + backtest : Backtest + The backtest instance. + bs : BacktestService + The backtesting service instance. + rebalancing_date : Optional[str], optional + The rebalancing date. + what : Optional[list], optional + List of output keys to append. + """ if what is None: what = ['w_dict', 'objective'] - for key in what: if key == 'w_dict': w_dict = bs.optimization.results['w_dict'] for key in w_dict.keys(): - weights = w_dict[key] + weights = w_dict[key] if hasattr(weights, 'to_dict'): weights = weights.to_dict() - portfolio = Portfolio(rebalancing_date = rebalancing_date, weights = weights) - backtest.append_output(date_key = rebalancing_date, - output_key = f'weights_{key}', - value = pd.Series(portfolio.weights)) + portfolio = Portfolio(rebalancing_date=rebalancing_date, weights=weights) + backtest.append_output(date_key=rebalancing_date, + output_key=f'weights_{key}', + value=pd.Series(portfolio.weights)) else: if not key in bs.optimization.results.keys(): continue - backtest.append_output(date_key = rebalancing_date, - output_key = key, - value = bs.optimization.results[key]) + backtest.append_output(date_key=rebalancing_date, + output_key=key, + value=bs.optimization.results[key]) return None + + diff --git a/src/builders.py b/src/builders.py index e68bda7..604e19b 100644 --- a/src/builders.py +++ b/src/builders.py @@ -7,141 +7,240 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - ############################################################################ -### CLASS BacktestItemBuilde AND BACKTEST ITEM BUILDER FUNCTIONS +### CLASS BacktestItemBuilder AND BACKTEST ITEM BUILDER FUNCTIONS ############################################################################ - -# Notice: -# The logic underlying the approach to build backtest items favours flexibility over safety ! - - - from typing import Any - import pandas as pd import numpy as np from abc import ABC, abstractmethod +class BacktestItemBuilder(ABC): + """ + Abstract base class for building backtest items. + This class provides a flexible framework for defining how items are built + during backtesting, favoring flexibility over safety. - -# -------------------------------------------------------------------------- -# Classes -# -------------------------------------------------------------------------- - -class BacktestItemBuilder(ABC): + Attributes + ---------- + arguments : dict[str, Any] + A dictionary of arguments used for item construction. + """ def __init__(self, **kwargs): + """ + Initializes the BacktestItemBuilder with provided arguments. + + Parameters + ---------- + **kwargs : + Key-value pairs of arguments for item building. + """ self._arguments = {} self._arguments.update(kwargs) @property def arguments(self) -> dict[str, Any]: + """ + Returns the arguments dictionary. + + Returns + ------- + dict[str, Any] + A dictionary of arguments. + """ return self._arguments @arguments.setter def arguments(self, value: dict[str, Any]) -> None: + """ + Sets the arguments dictionary. + + Parameters + ---------- + value : dict[str, Any] + A new dictionary of arguments. + """ self._arguments = value @abstractmethod def __call__(self, service, rebdate: str) -> None: + """ + Abstract method to build the backtest item. + + Parameters + ---------- + service : Any + The backtest service instance. + rebdate : str + The rebalancing date. + """ raise NotImplementedError("Method '__call__' must be implemented in derived class.") - class SelectionItemBuilder(BacktestItemBuilder): + """ + Builds selection items for backtesting based on a custom function. - def __call__(self, bs, rebdate: str) -> None: - - ''' - Build selection item from a custom function. - ''' + Methods + ------- + __call__(bs, rebdate) + Constructs and adds a selection item to the backtest service. + """ + def __call__(self, bs, rebdate: str) -> None: + """ + Constructs and adds a selection item to the backtest service. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + + Raises + ------ + ValueError + If the custom function 'bibfn' is not defined or callable. + """ selection_item_builder_fn = self.arguments.get('bibfn') if selection_item_builder_fn is None or not callable(selection_item_builder_fn): raise ValueError('bibfn is not defined or not callable.') - item_value = selection_item_builder_fn(bs = bs, rebdate = rebdate, **self.arguments) + item_value = selection_item_builder_fn(bs=bs, rebdate=rebdate, **self.arguments) item_name = self.arguments.get('item_name') # Add selection item - bs.selection.add_filtered(filter_name = item_name, value = item_value) + bs.selection.add_filtered(filter_name=item_name, value=item_value) return None - class OptimizationItemBuilder(BacktestItemBuilder): + """ + Builds optimization items for backtesting based on a custom function. - def __call__(self, bs, rebdate: str) -> None: - - ''' - Build optimization item from a custom function. - ''' + Methods + ------- + __call__(bs, rebdate) + Constructs optimization data or constraints for the backtest service. + """ + def __call__(self, bs, rebdate: str) -> None: + """ + Constructs optimization data or constraints for the backtest service. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + + Raises + ------ + ValueError + If the custom function 'bibfn' is not defined or callable. + """ optimization_item_builder_fn = self.arguments.get('bibfn') if optimization_item_builder_fn is None or not callable(optimization_item_builder_fn): raise ValueError('bibfn is not defined or not callable.') - # Call the backtest item builder function. Notice that the function returns None, - # it modifies the backtest service in place. - optimization_item_builder_fn(bs = bs, rebdate = rebdate, **self.arguments) + # Call the custom function to modify the backtest service in place + optimization_item_builder_fn(bs=bs, rebdate=rebdate, **self.arguments) return None - - # -------------------------------------------------------------------------- # Backtest item builder functions (bibfn) - Selection # -------------------------------------------------------------------------- def bibfn_selection_min_volume(bs, rebdate: str, **kwargs) -> pd.Series: - - # Arguments + """ + Filters assets based on minimum trading volume. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters including: + - 'width': The rolling window width (default: 365). + - 'agg_fn': Aggregation function applied to volume data (default: np.median). + - 'min_volume': Minimum volume threshold (default: 500,000). + + Returns + ------- + pd.Series + A series of selected assets meeting the volume threshold. + """ width = kwargs.get('width', 365) agg_fn = kwargs.get('agg_fn', np.median) min_volume = kwargs.get('min_volume', 500_000) - # Volume data X_vol = ( - bs.data.get_volume_series(end_date = rebdate, width = width) - .fillna(0).apply(agg_fn, axis = 0) + bs.data.get_volume_series(end_date=rebdate, width=width) + .fillna(0).apply(agg_fn, axis=0) ) - # Filtering ids = [col for col in X_vol.columns if agg_fn(X_vol[col]) >= min_volume] - # Output - series = pd.Series(np.ones(len(ids)), index = ids, name = 'minimum_volume') - bs.rebalancing.selection.add_filtered(filter_name = series.name, - value = series) + series = pd.Series(np.ones(len(ids)), index=ids, name='minimum_volume') + bs.rebalancing.selection.add_filtered(filter_name=series.name, value=series) return None def bibfn_selection_data(bs, rebdate: str, **kwargs) -> pd.Series: - - ''' - Backtest item builder function for defining the selection - based on all available return series. - ''' - + """ + Selects all available assets from the return series. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters. + + Returns + ------- + pd.Series + A binary series selecting all available assets. + """ data = bs.data.get('return_series') if data is None: raise ValueError('Return series data is missing.') - return pd.Series(np.ones(data.shape[1], dtype = int), index = data.columns, name = 'binary') - + return pd.Series(np.ones(data.shape[1], dtype=int), index=data.columns, name='binary') -def bibfn_selection_ltr(bs, rebdate: str, **kwargs) -> pd.DataFrame: - ''' - Backtest item builder function for defining the selection - based on a Learn-to-Rank model. - ''' +def bibfn_selection_ltr(bs, rebdate: str, **kwargs) -> pd.DataFrame: + """ + Defines the selection based on a Learn-to-Rank model. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters, including 'params_xgb' for XGBoost training. + + Returns + ------- + pd.DataFrame + DataFrame with scores and binary selections. + """ + # Arguments params_xgb = kwargs.get('params_xgb') @@ -180,108 +279,147 @@ def bibfn_selection_ltr(bs, rebdate: str, **kwargs) -> pd.DataFrame: }, index = scores.index) - # -------------------------------------------------------------------------- # Backtest item builder functions (bibfn) - Optimization data # -------------------------------------------------------------------------- def bibfn_return_series(bs, rebdate: str, **kwargs) -> None: - - ''' - Backtest item builder function for return series. - Prepares an element of bs.optimization_data with - single stock return series that are used for optimization. - ''' - - # Arguments + """ + Prepares single stock return series for optimization. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters, including: + - 'width': int + The rolling window size. + + Raises + ------ + ValueError + If the return series data is missing. + + Returns + ------- + None + """ width = kwargs.get('width') - # Selection ids = bs.selection.selected - - # Data data = bs.data.get('return_series') if data is None: raise ValueError('Return series data is missing.') - # Subset return series return_series = data[data.index <= rebdate].tail(width)[ids] - - # Remove weekends return_series = return_series[return_series.index.dayofweek < 5] - # Output bs.optimization_data['return_series'] = return_series return None - def bibfn_bm_series(bs, rebdate: str, **kwargs) -> None: - - ''' - Backtest item builder function for benchmark series. - Prepares an element of bs.optimization_data with - the benchmark series that is be used for optimization. - ''' - - # Arguments + """ + Prepares benchmark series for optimization. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters, including: + - 'width': int + The rolling window width. + - 'align': bool + Whether to align the benchmark series with return series. + + Raises + ------ + ValueError + If the benchmark return series data is missing. + + Returns + ------- + None + """ width = kwargs.get('width') align = kwargs.get('align') - # Data data = bs.data.get('bm_series') if data is None: raise ValueError('Benchmark return series data is missing.') - # Subset the benchmark series bm_series = data[data.index <= rebdate].tail(width) - - # Remove weekends bm_series = bm_series[bm_series.index.dayofweek < 5] - # Append the benchmark series to the optimization data bs.optimization_data['bm_series'] = bm_series - # Align the benchmark series to the return series if align: bs.optimization_data.align_dates( - variable_names = ['bm_series', 'return_series'], - dropna = True + variable_names=['bm_series', 'return_series'], + dropna=True ) return None + + # -------------------------------------------------------------------------- # Backtest item builder functions - Optimization constraints # -------------------------------------------------------------------------- def bibfn_budget_constraint(bs, rebdate: str, **kwargs) -> None: - - ''' - Backtest item builder function for setting the budget constraint. - ''' - - # Arguments + """ + Sets the budget constraint for the optimization. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters, including 'budget' (default: 1). + + Returns + ------- + None + """ budget = kwargs.get('budget', 1) - - # Add constraint - bs.optimization.constraints.add_budget(rhs = budget, sense = '=') + bs.optimization.constraints.add_budget(rhs=budget, sense='=') return None - def bibfn_box_constraints(bs, rebdate: str, **kwargs) -> None: - - ''' - Backtest item builder function for setting the box constraints. - ''' - - # Arguments + """ + Sets the box constraints for the optimization. + + Parameters + ---------- + bs : Any + The backtest service instance. + rebdate : str + The rebalancing date. + **kwargs : + Additional parameters, including: + - 'lower': Lower bound (default: 0). + - 'upper': Upper bound (default: 1). + - 'box_type': Type of box constraint (default: 'LongOnly'). + + Returns + ------- + None + """ lower = kwargs.get('lower', 0) upper = kwargs.get('upper', 1) box_type = kwargs.get('box_type', 'LongOnly') - - # Constraints - bs.optimization.constraints.add_box(box_type = box_type, - lower = lower, - upper = upper) + bs.optimization.constraints.add_box(box_type=box_type, lower=lower, upper=upper) return None + + + + diff --git a/src/constraints.py b/src/constraints.py index dc2b21c..d7272d3 100644 --- a/src/constraints.py +++ b/src/constraints.py @@ -18,11 +18,38 @@ - - class Constraints: + """ + Handles constraints for portfolio optimization. + + Attributes + ---------- + selection : list[str] + List of asset names or identifiers to which constraints are applied. + budget : dict + Budget constraint configuration, including matrix, sense, and RHS values. + box : dict + Box constraint configuration, including type, lower, and upper bounds. + linear : dict + Linear constraint configuration, including matrix, sense, and RHS values. + l1 : dict + L1 constraint configuration, including custom parameters. + """ def __init__(self, selection="NA") -> None: + """ + Initializes the Constraints object. + + Parameters + ---------- + selection : list[str] + A list of asset identifiers. Defaults to "NA". + + Raises + ------ + ValueError + If the selection is not a list of strings. + """ if not all(isinstance(item, str) for item in selection): raise ValueError("argument 'selection' has to be a character vector.") @@ -34,9 +61,33 @@ def __init__(self, selection="NA") -> None: return None def __str__(self) -> str: + """ + Returns a string representation of the Constraints object. + + Returns + ------- + str + String representation of the Constraints object. + """ return ' '.join(f'\n{key}:\n\n{vars(self)[key]}\n' for key in vars(self).keys()) def add_budget(self, rhs=1, sense='=') -> None: + """ + Adds a budget constraint to the optimization. + + Parameters + ---------- + rhs : float, optional + Right-hand side of the constraint, by default 1. + sense : str, optional + Constraint sense, by default '='. + + Raises + ------ + Warning + If an existing budget constraint is overwritten. + """ + if self.budget.get('rhs') is not None: warnings.warn("Existing budget constraint is overwritten\n") @@ -50,6 +101,24 @@ def add_box(self, box_type="LongOnly", lower=None, upper=None) -> None: + """ + Adds a box constraint to the optimization. + + Parameters + ---------- + box_type : str, optional + Type of box constraint, by default "LongOnly". + lower : float or pd.Series, optional + Lower bound for the constraints, by default None. + upper : float or pd.Series, optional + Upper bound for the constraints, by default None. + + Raises + ------ + ValueError + If any lower bound exceeds its corresponding upper bound. + """ + boxcon = box_constraint(box_type, lower, upper) if np.isscalar(boxcon['lower']): @@ -69,6 +138,27 @@ def add_linear(self, sense: str = '=', rhs=None, name: str = None) -> None: + """ + Adds a linear constraint to the optimization. + + Parameters + ---------- + Amat : pd.DataFrame, optional + Coefficient matrix for the linear constraints, by default None. + a_values : pd.Series, optional + Coefficient values for a single linear constraint, by default None. + sense : str, optional + Constraint sense, by default '='. + rhs : float or pd.Series, optional + Right-hand side of the constraints, by default None. + name : str, optional + Name for the constraint, by default None. + + Raises + ------ + ValueError + If both Amat and a_values are not provided. + """ if Amat is None: if a_values is None: raise ValueError("Either 'Amat' or 'a_values' must be provided.") @@ -99,6 +189,28 @@ def add_l1(self, rhs=None, x0=None, *args, **kwargs) -> None: + """ + Adds an L1 constraint to the optimization. + + Parameters + ---------- + name : str + Name of the L1 constraint. + rhs : float + Right-hand side of the constraint. + x0 : dict, optional + Initial values for the constraint, by default None. + *args : + Additional positional arguments. + **kwargs : + Additional keyword arguments. + + Raises + ------ + TypeError + If rhs is not provided. + """ + if rhs is None: raise TypeError("argument 'rhs' is required.") con = {'rhs': rhs} @@ -112,6 +224,21 @@ def add_l1(self, return None def to_GhAb(self, lbub_to_G: bool = False) -> Dict[str, pd.DataFrame]: + + """ + Converts constraints to G, h, A, and b matrices for optimization solvers. + + Parameters + ---------- + lbub_to_G : bool, optional + Whether to include lower and upper bounds in G, by default False. + + Returns + ------- + dict + A dictionary containing G, h, A, and b matrices. + """ + A = None b = None G = None @@ -173,11 +300,48 @@ def to_GhAb(self, lbub_to_G: bool = False) -> Dict[str, pd.DataFrame]: # -------------------------------------------------------------------------- def match_arg(x, lst): + """ + Finds and returns the first match of an element in a list. + + Parameters + ---------- + x : str + Element to search for. + lst : list[str] + List to search within. + + Returns + ------- + str + The first matching element in the list. + """ return [el for el in lst if x in el][0] def box_constraint(box_type="LongOnly", lower=None, upper=None) -> dict: + """ + Creates a box constraint configuration. + + Parameters + ---------- + box_type : str, optional + Type of box constraint (e.g., "LongOnly", "LongShort", "Unbounded"), by default "LongOnly". + lower : float, optional + Lower bound, by default None. + upper : float, optional + Upper bound, by default None. + + Returns + ------- + dict + A dictionary containing box constraint settings. + + Raises + ------ + ValueError + If bounds are inconsistent with the box type. + """ box_type = match_arg(box_type, ["LongOnly", "LongShort", "Unbounded"]) if box_type == "Unbounded": @@ -208,6 +372,27 @@ def linear_constraint(Amat=None, rhs=float("inf"), index_or_name=None, a_values=None) -> dict: + """ + Creates a linear constraint configuration. + + Parameters + ---------- + Amat : pd.DataFrame, optional + Coefficient matrix for the linear constraints, by default None. + sense : str, optional + Constraint sense (e.g., "=", "<=", ">="), by default "=". + rhs : float, optional + Right-hand side of the constraint, by default infinity. + index_or_name : str, optional + Index or name of the constraint, by default None. + a_values : pd.Series, optional + Coefficient values for the constraint, by default None. + + Returns + ------- + dict + A dictionary containing the linear constraint configuration. + """ ans = {'Amat': Amat, 'sense': sense, 'rhs': rhs} diff --git a/src/covariance.py b/src/covariance.py index a17d9a0..fe5830e 100644 --- a/src/covariance.py +++ b/src/covariance.py @@ -8,37 +8,106 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - - - import pandas as pd import numpy as np from helper_functions import isPD, nearestPD - class CovarianceSpecification(dict): + """ + Configuration class for specifying covariance estimation parameters. + + Attributes + ---------- + method : str + The method to estimate the covariance matrix. Default is 'pearson'. + check_positive_definite : bool + Whether to ensure the covariance matrix is positive definite. Default is True. + """ def __init__(self, *args, **kwargs): + """ + Initializes a CovarianceSpecification instance with default values if not provided. + + Parameters + ---------- + *args : + Positional arguments passed to the dictionary initialization. + **kwargs : + Keyword arguments for covariance specification. + """ super(CovarianceSpecification, self).__init__(*args, **kwargs) self.__dict__ = self # Add default values - if self.get('method') is None: self['method'] = 'pearson' - if self.get('check_positive_definite') is None: self['check_positive_definite'] = True + if self.get('method') is None: + self['method'] = 'pearson' + if self.get('check_positive_definite') is None: + self['check_positive_definite'] = True + class Covariance: + """ + Class to estimate covariance matrices based on a specified configuration. + + Attributes + ---------- + spec : CovarianceSpecification + Configuration specifying the estimation parameters and settings. + """ def __init__(self, spec: CovarianceSpecification = None, *args, **kwargs): + """ + Initializes a Covariance instance with a given specification or default values. + + Parameters + ---------- + spec : CovarianceSpecification, optional + A pre-defined specification for covariance estimation. If None, uses default settings. + *args : + Additional positional arguments to define the specification. + **kwargs : + Additional keyword arguments to define the specification. + """ self.spec = CovarianceSpecification(*args, **kwargs) if spec is None else spec + + def set_ctrl(self, *args, **kwargs) -> None: + """ + Updates the covariance estimation specification. + + Parameters + ---------- + *args : + Positional arguments for updating the specification. + **kwargs : + Keyword arguments for updating the specification. + """ self.spec = CovarianceSpecification(*args, **kwargs) - return None - def estimate(self, X: pd.DataFrame) -> pd.DataFrame: + + def estimate(self, X: pd.DataFrame) -> pd.DataFrame: + """ + Estimates the covariance matrix based on the provided data and specified method. + + Parameters + ---------- + X : pd.DataFrame + The input data for which the covariance matrix is to be estimated. + + Returns + ------- + pd.DataFrame + The estimated covariance matrix. + + Raises + ------ + NotImplementedError + If the specified method is not implemented. + """ estimation_method = self.spec['method'] if estimation_method == 'pearson': covmat = cov_pearson(X) @@ -56,20 +125,60 @@ def estimate(self, X: pd.DataFrame) -> pd.DataFrame: return covmat - - # -------------------------------------------------------------------------- # Functions # -------------------------------------------------------------------------- -def cov_pearson(X): +def cov_pearson(X: pd.DataFrame) -> pd.DataFrame: + """ + Computes the Pearson covariance matrix. + + Parameters + ---------- + X : pd.DataFrame + Input data. + + Returns + ------- + pd.DataFrame + Pearson covariance matrix. + """ return X.cov() -def cov_duv(X): + +def cov_duv(X: pd.DataFrame) -> np.ndarray: + """ + Returns a diagonal unit variance covariance matrix. + + Parameters + ---------- + X : pd.DataFrame + Input data. + + Returns + ------- + np.ndarray + Diagonal covariance matrix with ones on the diagonal. + """ return np.identity(X.shape[1]) -def cov_linear_shrinkage(X, lambda_covmat_regularization = None): - # Applies a linear shrinkage (in the form of L2 penalty in the objective function) to a given covariance matrix. + +def cov_linear_shrinkage(X: pd.DataFrame, lambda_covmat_regularization: float = None) -> pd.DataFrame: + """ + Applies linear shrinkage to the covariance matrix. + + Parameters + ---------- + X : pd.DataFrame + Input data. + lambda_covmat_regularization : float, optional + Regularization parameter. Default is None, which assumes no shrinkage. + + Returns + ------- + pd.DataFrame + Regularized covariance matrix. + """ if lambda_covmat_regularization is None or np.isnan(lambda_covmat_regularization) or lambda_covmat_regularization < 0: lambda_covmat_regularization = 0 sigmat = X.cov() @@ -80,5 +189,7 @@ def cov_linear_shrinkage(X, lambda_covmat_regularization = None): corrs = [] for k in range(1, d): corrs.extend(np.diag(corrMat, k)) - sigmat = pd.DataFrame(sigmat.to_numpy() + lambda_covmat_regularization * np.mean(sig**2) * np.eye(d), columns=sigmat.columns, index=sigmat.index) + sigmat = pd.DataFrame(sigmat.to_numpy() + lambda_covmat_regularization * np.mean(sig**2) * np.eye(d), + columns=sigmat.columns, index=sigmat.index) return sigmat + diff --git a/src/data_loader.py b/src/data_loader.py index 100aa05..de96939 100644 --- a/src/data_loader.py +++ b/src/data_loader.py @@ -1,3 +1,5 @@ + + ''' PorQua : a python library for portfolio optimization and backtesting PorQua is part of GeomScale project @@ -8,15 +10,37 @@ ''' - import os from typing import Optional, Union, Any import pandas as pd import pickle -def load_pickle(filename: str, - path: Optional[str] = None) -> Union[Any, None]: +def load_pickle(filename: str, path: Optional[str] = None) -> Union[Any, None]: + + """ + Loads a Python object from a pickle file. + + Parameters + ---------- + filename : str + The name of the pickle file to load. + path : Optional[str], optional + The directory path to the file. If not provided, only the filename is used. + + Returns + ------- + Any + The object loaded from the pickle file, or None if an error occurs. + + Raises + ------ + EOFError + If the file is empty or corrupted. + Exception + For other errors encountered during the unpickling process. + """ + if path is not None: filename = os.path.join(path, filename) try: @@ -30,29 +54,60 @@ def load_pickle(filename: str, return None + def load_data_msci(path: str = None, n: int = 24) -> dict[str, pd.DataFrame]: - '''Loads MSCI daily returns data from 1999-01-01 to 2023-04-18''' + """ + Loads MSCI daily returns data and benchmark series. + + Parameters + ---------- + path : str, optional + The directory containing the data files. If None, defaults to the `data` folder + in the current working directory. + n : int, optional + The number of MSCI country indices to load. Default is 24. + + Returns + ------- + dict[str, pd.DataFrame] + A dictionary with the following keys: + - 'return_series': A DataFrame containing the MSCI country index return series. + - 'bm_series': A DataFrame containing the MSCI World index return series. + + Raises + ------ + FileNotFoundError + If the required data files are not found in the specified path. + ValueError + If the data files have unexpected formatting or contents. + """ path = os.path.join(os.getcwd(), f'data{os.sep}') if path is None else path - # Load msci country index return series - df = pd.read_csv(os.path.join(path, 'msci_country_indices.csv'), + # Load MSCI country index return series + try: + df = pd.read_csv(os.path.join(path, 'msci_country_indices.csv'), + sep=';', + index_col=0, + header=0, + parse_dates=True) + df.index = pd.to_datetime(df.index, format='%d/%m/%Y') + series_id = df.columns[0:n] + X = df[series_id] + except Exception as e: + raise FileNotFoundError(f"Error loading MSCI country indices data: {e}") + + # Load MSCI World index return series + try: + y = pd.read_csv(f'{path}NDDLWI.csv', sep=';', index_col=0, header=0, parse_dates=True) - df.index = pd.to_datetime(df.index, format='%d/%m/%Y') - series_id = df.columns[0:n] - X = df[series_id] - - # Load msci world index return series - y = pd.read_csv(f'{path}NDDLWI.csv', - sep=';', - index_col=0, - header=0, - parse_dates=True) - - y.index = pd.to_datetime(y.index, format='%d/%m/%Y') + y.index = pd.to_datetime(y.index, format='%d/%m/%Y') + except Exception as e: + raise FileNotFoundError(f"Error loading MSCI World index data: {e}") return {'return_series': X, 'bm_series': y} + diff --git a/src/helper_functions.py b/src/helper_functions.py index 868deab..ad698d9 100644 --- a/src/helper_functions.py +++ b/src/helper_functions.py @@ -8,13 +8,10 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - ############################################################################ ### HELPER FUNCTIONS ############################################################################ - - from typing import Optional import pandas as pd import numpy as np @@ -23,19 +20,29 @@ from portfolio import Portfolio, Strategy +def nearestPD(A: np.ndarray) -> np.ndarray: + """ + Finds the nearest positive-definite matrix to the input matrix. + Parameters + ---------- + A : np.ndarray + The input square matrix. -def nearestPD(A): - """Find the nearest positive-definite matrix to input + Returns + ------- + np.ndarray + The nearest positive-definite matrix. A Python/Numpy port of John D'Errico's `nearestSPD` MATLAB code [1], which credits [2]. The code below is written by Cyril. + References + ---------- [1] https://www.mathworks.com/matlabcentral/fileexchange/42885-nearestspd - - [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite - matrix" (1988): https://doi.org/10.1016/0024-3795(88)90223-6 + [2] N.J. Higham, "Computing a nearest symmetric positive semidefinite matrix" (1988): + https://doi.org/10.1016/0024-3795(88)90223-6 """ B = (A + A.T) / 2 @@ -57,33 +64,88 @@ def nearestPD(A): return A3 +def isPD(B: np.ndarray) -> bool: + + """ + Checks if a matrix is positive-definite using Cholesky decomposition. + + Parameters + ---------- + B : np.ndarray + The input square matrix. + + Returns + ------- + bool + True if the matrix is positive-definite, False otherwise. + """ -def isPD(B): - """Returns true when input is positive-definite, via Cholesky""" try: _ = np.linalg.cholesky(B) return True except np.linalg.LinAlgError: return False -def serialize_solution(name_suffix, solution, runtime): +def serialize_solution(name_suffix: str, solution, runtime: float) -> None: + + """ + Serializes a solution to a pickle file. + + Parameters + ---------- + name_suffix : str + The suffix for the filename. + solution : object + The optimization solution object containing attributes like x and obj. + runtime : float + The runtime of the solution process. + """ + result = { - 'solution' : solution.x, - 'objective' : solution.obj, - 'primal_residual' :solution.primal_residual(), - 'dual_residual' : solution.dual_residual(), - 'duality_gap' : solution.duality_gap(), - 'runtime' : runtime + 'solution': solution.x, + 'objective': solution.obj, + 'primal_residual': solution.primal_residual(), + 'dual_residual': solution.dual_residual(), + 'duality_gap': solution.duality_gap(), + 'runtime': runtime } with open(f'{name_suffix}.pickle', 'wb') as handle: pickle.dump(result, handle, protocol=pickle.HIGHEST_PROTOCOL) def to_numpy(data): - return None if data is None else data.to_numpy() if hasattr(data, 'to_numpy') else data + """ + Converts input data to a NumPy array if possible. + + Parameters + ---------- + data : Any + The input data, which could be a DataFrame or array-like. + + Returns + ------- + np.ndarray or None + The converted NumPy array or None if the input is None. + """ + + return None if data is None else data.to_numpy() if hasattr(data, 'to_numpy') else data def output_to_strategies(output: dict) -> dict[int, Strategy]: + + """ + Converts output data into a dictionary of Strategy objects. + + Parameters + ---------- + output : dict + Dictionary containing portfolio weights for different rebalancing dates. + + Returns + ------- + dict[int, Strategy] + A dictionary of Strategy objects indexed by quintiles. + """ N = len(output[list(output.keys())[0]]) strategy_dict = {} @@ -92,38 +154,80 @@ def output_to_strategies(output: dict) -> dict[int, Strategy]: for rebdate in output.keys(): weights = output[rebdate][f'weights_{i+1}'] if hasattr(weights, 'to_dict'): - weights = weights.to_dict() + weights = weights.to_dict() portfolio = Portfolio(rebdate, weights) strategy_dict[f'q{i+1}'].portfolios.append(portfolio) return strategy_dict - -#------------------- Machine learning helpers ------------------- +# ------------------- Machine learning helpers ------------------- def calculate_rmse(y_true, y_pred): + """ - Calculate the Root Mean Squared Error (RMSE) + Calculates the Root Mean Squared Error (RMSE). + + Parameters + ---------- + y_true : array-like + The true target values. + y_pred : array-like + The predicted values. + + Returns + ------- + float + The RMSE value. """ + rmse = np.sqrt(np.mean((np.array(y_true) - np.array(y_pred.values)) ** 2)) return rmse - def calculate_mape(y_true, y_pred): + """ - Calculate the Mean Absolute Percentage Error (MAPE) % + Calculates the Mean Absolute Percentage Error (MAPE). + + Parameters + ---------- + y_true : array-like + The true target values. + y_pred : array-like + The predicted values. + + Returns + ------- + float + The MAPE value as a percentage. """ + y_pred, y_true = np.array(y_pred), np.array(y_true) mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100 return mape -def show_result(predictions, y_test, y_actual, method = None): - print(f'RMSE of linear regression: {calculate_rmse(y_test, predictions)}') - print(f'MAPE of linear regression: {calculate_mape(y_test, predictions)}') +def show_result(predictions, y_test, y_actual, method=None): + + """ + Displays results including RMSE, MAPE, and a plot of predictions vs actual values. + + Parameters + ---------- + predictions : array-like + The predicted values. + y_test : array-like + The test target values. + y_actual : array-like + The actual target values. + method : str, optional + The name of the method used for prediction, by default None. + """ + + print(f'RMSE of {method}: {calculate_rmse(y_test, predictions)}') + print(f'MAPE of {method}: {calculate_mape(y_test, predictions)}') - plt.plot(y_actual, color = 'cyan') - plt.plot(predictions, color = 'green') + plt.plot(y_actual, color='cyan') + plt.plot(predictions, color='green') plt.legend(["True values", "Prediction"]) plt.title(method) plt.show() diff --git a/src/mean_estimation.py b/src/mean_estimation.py index d29ab48..2af1170 100644 --- a/src/mean_estimation.py +++ b/src/mean_estimation.py @@ -1,3 +1,4 @@ + ''' PorQua : a python library for portfolio optimization and backtesting PorQua is part of GeomScale project @@ -8,21 +9,37 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - ############################################################################ ### MEAN ESTIMATOR ############################################################################ - import pandas as pd import numpy as np +class MeanEstimator: + """ + Estimates the expected return of financial assets using various methods. - - -class MeanEstimator(): + Attributes + ---------- + spec : dict + Specification for the estimation method, including: + - 'method': The method of estimation (default: 'geometric'). + - 'scalefactor': Scaling factor for the estimated return. + - 'n_mom': Number of moments used for estimation. + - 'n_rev': Number of moments to reverse. + """ def __init__(self, **kwargs) -> None: + """ + Initializes the MeanEstimator with default or user-provided specifications. + + Parameters + ---------- + **kwargs : + Keyword arguments to customize the specification, such as 'method', 'scalefactor', + 'n_mom', and 'n_rev'. + """ self.spec = { 'method': 'geometric', 'scalefactor': 1, @@ -32,17 +49,45 @@ def __init__(self, **kwargs) -> None: self.spec.update(kwargs) def estimate(self, X: pd.DataFrame) -> pd.DataFrame or pd.Series: + """ + Estimates the mean return using the specified method. + + Parameters + ---------- + X : pd.DataFrame + Input data containing historical returns. + + Returns + ------- + pd.DataFrame or pd.Series + The estimated mean return. + + Raises + ------ + AttributeError + If the specified method is not implemented. + """ fun = getattr(self, f'estimate_{self.spec["method"]}') - mu = fun(X = X) + mu = fun(X=X) return mu - def estimate_geometric(self, X: pd.DataFrame): + def estimate_geometric(self, X: pd.DataFrame) -> pd.Series: + """ + Estimates the mean return using the geometric mean method. + + Parameters + ---------- + X : pd.DataFrame + Input data containing historical returns. + + Returns + ------- + pd.Series + The estimated mean return. + """ n_mom = X.shape[0] if self.spec.get('n_mom') is None else self.spec.get('n_mom') n_rev = 0 if self.spec.get('n_rev') is None else self.spec.get('n_rev') scalefactor = 1 if self.spec.get('scalefactor') is None else self.spec.get('scalefactor') - X = X.tail(n_mom).head(n_mom-n_rev) + X = X.tail(n_mom).head(n_mom - n_rev) mu = np.exp(np.log(1 + X).mean(axis=0) * scalefactor) - 1 - # Alternatively: - # from scipy.stats import gmean - # mu = (gmean(1 + X) - 1).tolist() return mu diff --git a/src/optimization.py b/src/optimization.py index e87ba19..dfb3126 100644 --- a/src/optimization.py +++ b/src/optimization.py @@ -8,20 +8,16 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - ############################################################################ ### OPTIMIZATION ############################################################################ - - from abc import ABC, abstractmethod from typing import Optional import numpy as np import pandas as pd - from helper_functions import to_numpy from covariance import Covariance from mean_estimation import MeanEstimator @@ -32,33 +28,92 @@ # https://github.com/qpsolvers/qpsolvers - - - - - class OptimizationParameter(dict): + """ + A class to handle parameters for optimization. + + Attributes + ---------- + solver_name : str + The solver to use for the optimization. Default is 'cvxopt'. + verbose : bool + Whether to enable verbose output. Default is True. + allow_suboptimal : bool + Whether to allow suboptimal solutions. Default is False. + """ def __init__(self, **kwargs): + """ + Initializes the OptimizationParameter instance with default values. + + Parameters + ---------- + **kwargs : + Additional parameters to override defaults. + """ super(OptimizationParameter, self).__init__(**kwargs) self.__dict__ = self - if not self.get('solver_name'): self['solver_name'] = 'cvxopt' - if not self.get('verbose'): self['verbose'] = True - if not self.get('allow_suboptimal'): self['allow_suboptimal'] = False + if not self.get('solver_name'): + self['solver_name'] = 'cvxopt' + if not self.get('verbose'): + self['verbose'] = True + if not self.get('allow_suboptimal'): + self['allow_suboptimal'] = False class Objective(dict): + """ + A class to define optimization objectives. + """ def __init__(self, *args, **kwargs): + """ + Initializes the Objective instance. + + Parameters + ---------- + *args : + Positional arguments for the dictionary. + **kwargs : + Keyword arguments for the dictionary. + """ super(Objective, self).__init__(*args, **kwargs) class Optimization(ABC): + """ + Abstract base class for optimization problems. + + Attributes + ---------- + params : OptimizationParameter + Parameters for the optimization process. + objective : Objective + The optimization objective. + constraints : Constraints + Constraints applied to the optimization. + model : Any + The optimization model. + results : dict + Results of the optimization. + """ def __init__(self, params: OptimizationParameter = None, constraints: Constraints = None, **kwargs): + """ + Initializes the Optimization instance. + + Parameters + ---------- + params : OptimizationParameter, optional + Parameters for the optimization. Defaults to None. + constraints : Constraints, optional + Constraints for the optimization. Defaults to None. + **kwargs : + Additional parameters to override defaults. + """ self.params = OptimizationParameter(**kwargs) if params is None else params self.objective = Objective() self.constraints = Constraints() if constraints is None else constraints @@ -67,14 +122,37 @@ def __init__(self, @abstractmethod def set_objective(self, optimization_data: OptimizationData) -> None: + """ + Abstract method to set the optimization objective. + + Parameters + ---------- + optimization_data : OptimizationData + The data used to define the objective. + """ raise NotImplementedError("Method 'set_objective' must be implemented in derived class.") @abstractmethod def solve(self) -> bool: + """ + Abstract method to solve the optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ self.solve_qpsolvers() return self.results['status'] def solve_qpsolvers(self) -> None: + """ + Solves the optimization problem using qpsolvers. + + Returns + ------- + None + """ self.model_qpsolvers() self.model.solve() universe = self.constraints.selection @@ -86,9 +164,14 @@ def solve_qpsolvers(self) -> None: self.results = {'weights': weights.to_dict(), 'status': self.model['solution'].found} - return None - def model_qpsolvers(self) -> None: + """ + Sets up the quadratic programming problem for qpsolvers. + + Returns + ------- + None + """ # Ensure that P and q are numpy arrays if 'P' in self.objective.keys(): P = to_numpy(self.objective['P']) @@ -104,7 +187,7 @@ def model_qpsolvers(self) -> None: universe = self.constraints.selection - # constraints + # Constraints constraints = self.constraints GhAb = constraints.to_GhAb() @@ -121,30 +204,10 @@ def model_qpsolvers(self) -> None: lb=lb, ub=ub, params=self.params) - - # Choose which reference position to be used - tocon = self.constraints.l1.get('turnover') - x0 = tocon['x0'] if tocon is not None and tocon.get('x0') is not None else self.params.get('x0') - x_init = {asset: x0.get(asset, 0) for asset in universe} if x0 is not None else None - - # Transaction cost in the objective - transaction_cost = self.params.get('transaction_cost') - if transaction_cost is not None and x_init is not None: - self.model.linearize_turnover_objective(pd.Series(x_init), transaction_cost) - - # Turnover constraint - if tocon and not transaction_cost and x_init is not None: - self.model.linearize_turnover_constraint(pd.Series(x_init), tocon['rhs']) - - # Leverage constraint - levcon = self.constraints.l1.get('leverage') - if levcon is not None: - self.model.linearize_leverage_constraint(N=len(universe), leverage_budget=levcon['rhs']) return None - class EmptyOptimization(Optimization): def set_objective(self) -> None: @@ -155,35 +218,105 @@ def solve(self) -> bool: class MeanVariance(Optimization): + """ + Mean-variance optimization problem. + + Attributes + ---------- + covariance : Covariance + Covariance estimator used in the optimization. + mean_estimator : MeanEstimator + Mean return estimator used in the optimization. + """ def __init__(self, covariance: Optional[Covariance] = None, mean_estimator: Optional[MeanEstimator] = None, **kwargs): + """ + Initializes the MeanVariance instance. + + Parameters + ---------- + covariance : Covariance, optional + Covariance estimator. Defaults to None. + mean_estimator : MeanEstimator, optional + Mean return estimator. Defaults to None. + **kwargs : + Additional parameters for the optimization. + """ super().__init__(**kwargs) self.covariance = Covariance() if covariance is None else covariance - self.mean_estimator = MeanEstimator() if mean_estimator is None else MeanEstimator + self.mean_estimator = MeanEstimator() if mean_estimator is None else mean_estimator self.params.setdefault('risk_aversion', 1) def set_objective(self, optimization_data: OptimizationData) -> None: - covmat = self.covariance.estimate(X = optimization_data['return_series']) + """ + Sets the mean-variance optimization objective. + + Parameters + ---------- + optimization_data : OptimizationData + Data used to compute the objective. + + Returns + ------- + None + """ + covmat = self.covariance.estimate(X=optimization_data['return_series']) covmat = covmat * self.params['risk_aversion'] * 2 - mu = self.mean_estimator.estimate(X = optimization_data['return_series']) * (-1) - self.objective = Objective(q = mu, - P = covmat) + mu = self.mean_estimator.estimate(X=optimization_data['return_series']) * (-1) + self.objective = Objective(q=mu, P=covmat) return None - def solve(self) -> bool: + def solve(self) -> bool: + """ + Solves the mean-variance optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ return super().solve() + class QEQW(Optimization): + """ + Quasi-Equal Weighted (QEQW) optimization problem. + + Attributes + ---------- + covariance : Covariance + Covariance estimator used for QEQW optimization. + """ def __init__(self, **kwargs): + """ + Initializes the QEQW optimization instance. + + Parameters + ---------- + **kwargs : + Additional parameters for the optimization. + """ super().__init__(**kwargs) self.covariance = Covariance(method='duv') def set_objective(self, optimization_data: OptimizationData) -> None: + """ + Sets the optimization objective for QEQW. + + Parameters + ---------- + optimization_data : OptimizationData + Data used to define the objective. + + Returns + ------- + None + """ X = optimization_data['return_series'] covmat = self.covariance.estimate(X=X) * 2 mu = np.zeros(X.shape[1]) @@ -191,27 +324,62 @@ def set_objective(self, optimization_data: OptimizationData) -> None: return None def solve(self) -> bool: + """ + Solves the QEQW optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ return super().solve() - class LeastSquares(Optimization): + """ + Least Squares optimization problem. + + Attributes + ---------- + covariance : Covariance, optional + Covariance estimator used in the optimization. + """ def __init__(self, covariance: Optional[Covariance] = None, **kwargs): + """ + Initializes the Least Squares optimization instance. + + Parameters + ---------- + covariance : Covariance, optional + Covariance estimator. Defaults to None. + **kwargs : + Additional parameters for the optimization. + """ super().__init__(**kwargs) self.covariance = covariance def set_objective(self, optimization_data: OptimizationData) -> None: - + """ + Sets the Least Squares optimization objective. + + Parameters + ---------- + optimization_data : OptimizationData + Data used to compute the objective. + + Returns + ------- + None + """ X = optimization_data['return_series'] y = optimization_data['bm_series'] if self.params.get('log_transform'): X = np.log(1 + X) y = np.log(1 + y) - # 0.5 * w * P * w' - q * w' + constant P = 2 * (X.T @ X) q = to_numpy(-2 * X.T @ y).reshape((-1,)) constant = to_numpy(y.T @ y).item() @@ -220,19 +388,39 @@ def set_objective(self, optimization_data: OptimizationData) -> None: if l2_penalty is not None and l2_penalty != 0: P += 2 * l2_penalty * np.eye(X.shape[1]) - self.objective = Objective(P=P, - q=q, - constant=constant) + self.objective = Objective(P=P, q=q, constant=constant) return None def solve(self) -> bool: + """ + Solves the Least Squares optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ return super().solve() class WeightedLeastSquares(Optimization): + """ + Weighted Least Squares optimization problem. + """ def set_objective(self, optimization_data: OptimizationData) -> None: - + """ + Sets the Weighted Least Squares optimization objective. + + Parameters + ---------- + optimization_data : OptimizationData + Data used to compute the objective. + + Returns + ------- + None + """ X = optimization_data['return_series'] y = optimization_data['bm_series'] if self.params.get('log_transform'): @@ -256,19 +444,60 @@ def set_objective(self, optimization_data: OptimizationData) -> None: return None def solve(self) -> bool: + """ + Solves the Weighted Least Squares optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ return super().solve() class LAD(Optimization): - # Least Absolute Deviation (same as mean absolute deviation, MAD) + """ + Least Absolute Deviation (LAD) optimization problem, also known as mean absolute deviation (MAD). + + Attributes + ---------- + params : dict + Parameters for the optimization problem. + objective : Objective + Objective function for the optimization problem. + model : QuadraticProgram + Optimization model instance. + results : dict + Optimization results including weights. + """ def __init__(self, **kwargs): + """ + Initializes the LAD optimization instance. + + Parameters + ---------- + **kwargs : + Additional parameters for the optimization. + """ super().__init__(**kwargs) self.params['use_level'] = self.params.get('use_level', True) self.params['use_log'] = self.params.get('use_log', True) def set_objective(self, optimization_data: OptimizationData) -> None: + """ + Sets the LAD optimization objective. + + Parameters + ---------- + optimization_data : OptimizationData + Data used to compute the objective. + + Returns + ------- + None + """ X = optimization_data['return_series'] y = optimization_data['bm_series'] if self.params.get('use_level'): @@ -279,11 +508,17 @@ def set_objective(self, optimization_data: OptimizationData) -> None: y = np.log(y) self.objective = Objective(X=X, y=y) - return None def solve(self) -> bool: - # Note: Should use an interior point linear solver instead of qpsolvers + """ + Solves the LAD optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ self.model_qpsolvers() self.model.solve() weights = pd.Series(self.model['solution'].x[0:len(self.constraints.selection)], @@ -292,6 +527,13 @@ def solve(self) -> bool: return True def model_qpsolvers(self) -> None: + """ + Constructs the optimization model using QP solvers. + + Returns + ------- + None + """ # Data and constraints X = to_numpy(self.objective['X']) y = to_numpy(self.objective['y']) @@ -354,12 +596,40 @@ def model_qpsolvers(self) -> None: class PercentilePortfolios(Optimization): + """ + Percentile-based portfolio optimization. + + Attributes + ---------- + estimator : MeanEstimator, optional + Estimator for computing mean returns. + params : dict + Parameters for the optimization problem. + objective : Objective + Objective function for the optimization problem. + results : dict + Optimization results including weights and portfolio allocations. + """ def __init__(self, field: Optional[str] = None, estimator: Optional[MeanEstimator] = None, - n_percentiles = 5, # creates quintile portfolios by default. + n_percentiles: int = 5, **kwargs): + """ + Initializes the Percentile Portfolios optimization instance. + + Parameters + ---------- + field : str, optional + Field for scoring data. + estimator : MeanEstimator, optional + Estimator for mean return computation. + n_percentiles : int, optional + Number of percentiles. Defaults to 5 (quintile portfolios). + **kwargs : + Additional parameters for the optimization. + """ super().__init__(**kwargs) self.estimator = estimator self.params = {'solver_name': 'percentile', @@ -367,36 +637,51 @@ def __init__(self, 'field': field} def set_objective(self, optimization_data: OptimizationData) -> None: - + """ + Sets the objective for Percentile Portfolios optimization. + + Parameters + ---------- + optimization_data : OptimizationData + Data used to compute the objective. + + Returns + ------- + None + """ field = self.params.get('field') if self.estimator is not None: if field is not None: raise ValueError('Either specify a "field" or pass an "estimator", but not both.') else: - scores = self.estimator.estimate(X = optimization_data['return_series']) + scores = self.estimator.estimate(X=optimization_data['return_series']) else: if field is not None: scores = optimization_data['scores'][field] else: score_weights = self.params.get('score_weights') if score_weights is not None: - # Compute weighted average scores = ( optimization_data['scores'][score_weights.keys()] .multiply(score_weights.values()) .sum(axis=1) ) else: - scores = optimization_data['scores'].mean(axis = 1).squeeze() + scores = optimization_data['scores'].mean(axis=1).squeeze() - # Add tiny noise to zeros since otherwise there might be two threshold values == 0 scores[scores == 0] = np.random.normal(0, 1e-10, scores[scores == 0].shape) - self.objective = Objective(scores = -scores) - + self.objective = Objective(scores=-scores) return None def solve(self) -> bool: - + """ + Solves the Percentile Portfolios optimization problem. + + Returns + ------- + bool + Whether the optimization was successful. + """ scores = self.objective['scores'] N = self.params['n_percentiles'] q_vec = np.linspace(0, 100, N + 1) diff --git a/src/optimization_data.py b/src/optimization_data.py index f7a6f8a..b1d893a 100644 --- a/src/optimization_data.py +++ b/src/optimization_data.py @@ -1,3 +1,4 @@ + ''' PorQua : a python library for portfolio optimization and backtesting PorQua is part of GeomScale project @@ -8,8 +9,6 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - - import numpy as np import pandas as pd from helper_functions import to_numpy @@ -18,7 +17,38 @@ class OptimizationData(dict): + """ + A container for managing optimization-related data. + + This class extends the Python dictionary to support specific operations + like aligning dates and handling lagged data for optimization tasks. + + Attributes + ---------- + align : bool + Whether to align dates across variables on initialization. + lags : dict + A dictionary specifying lag values for variables. + """ + def __init__(self, align=True, lags={}, *args, **kwargs): + + """ + Initializes the OptimizationData instance. + + Parameters + ---------- + align : bool, optional + Whether to align dates across variables. Default is True. + lags : dict, optional + Dictionary specifying lags for variables. Keys are variable names + and values are the lag amounts. Default is an empty dictionary. + *args : + Additional positional arguments for the dictionary. + **kwargs : + Additional keyword arguments for the dictionary. + """ + super(OptimizationData, self).__init__(*args, **kwargs) self.__dict__ = self if len(lags) > 0: @@ -28,16 +58,46 @@ def __init__(self, align=True, lags={}, *args, **kwargs): self.align_dates() def align_dates(self, variable_names: Optional[list[str]] = None) -> None: + + """ + Aligns dates across the specified variables. + + Parameters + ---------- + variable_names : list[str], optional + List of variable names to align. If None, all variables are aligned. + + Returns + ------- + None + """ + if variable_names is None: variable_names = self.keys() index = self.intersecting_dates(variable_names=list(variable_names)) for key in variable_names: self[key] = self[key].loc[index] - return None def intersecting_dates(self, variable_names: Optional[list[str]] = None, dropna: bool = True) -> pd.DatetimeIndex: + + """ + Finds the intersection of dates across the specified variables. + + Parameters + ---------- + variable_names : list[str], optional + List of variable names to find intersecting dates for. If None, all variables are used. + dropna : bool, optional + Whether to drop rows with NaN values in the variables. Default is True. + + Returns + ------- + pd.DatetimeIndex + The intersection of dates across the specified variables. + """ + if variable_names is None: variable_names = list(self.keys()) if dropna: @@ -47,4 +107,5 @@ def intersecting_dates(self, for variable_name in variable_names: index = index.intersection(self.get(variable_name).index) return index - + + \ No newline at end of file diff --git a/src/portfolio.py b/src/portfolio.py index dbef928..31dbb57 100644 --- a/src/portfolio.py +++ b/src/portfolio.py @@ -8,22 +8,44 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - - import pandas as pd import numpy as np - - - - class Portfolio: + """ + A class representing a financial portfolio with rebalancing capabilities. + + Attributes + ---------- + rebalancing_date : str + The date when the portfolio is rebalanced. + weights : dict + A dictionary representing asset weights in the portfolio. + name : str + The name of the portfolio. + init_weights : dict + Initial weights of the portfolio before rebalancing. + """ def __init__(self, rebalancing_date: str = None, weights: dict = {}, name: str = None, init_weights: dict = {}): + """ + Initializes a Portfolio instance. + + Parameters + ---------- + rebalancing_date : str, optional + The date of rebalancing, by default None. + weights : dict, optional + Asset weights in the portfolio, by default an empty dictionary. + name : str, optional + The name of the portfolio, by default None. + init_weights : dict, optional + Initial asset weights before rebalancing, by default an empty dictionary. + """ self.rebalancing_date = rebalancing_date self.weights = weights self.name = name @@ -42,6 +64,7 @@ def get_weights_series(self) -> pd.Series: @weights.setter def weights(self, new_weights: dict): + if not isinstance(new_weights, dict): if hasattr(new_weights, 'to_dict'): new_weights = new_weights.to_dict() @@ -70,12 +93,37 @@ def name(self, new_name: str): self._name = new_name def __repr__(self): + """ + Returns a string representation of the Portfolio object. + + Returns + ------- + str + String representation of the portfolio. + """ return f'Portfolio(rebalancing_date={self.rebalancing_date}, weights={self.weights})' def float_weights(self, return_series: pd.DataFrame, end_date: str, - rescale: bool = False): + rescale: bool = False) -> pd.DataFrame: + """ + Computes the floating weights of the portfolio over time. + + Parameters + ---------- + return_series : pd.DataFrame + A DataFrame containing asset return data indexed by date. + end_date : str + The ending date for computing floating weights. + rescale : bool, optional + Whether to rescale the weights such that their sum remains 1, by default False. + + Returns + ------- + pd.DataFrame or None + DataFrame of floating weights over time, or None if weights are not set. + """ if self.weights is not None: return floating_weights(X=return_series, w=self.weights, @@ -90,7 +138,31 @@ def initial_weights(self, return_series: pd.DataFrame, end_date: str, rescale: bool = True) -> dict[str, float]: - + """ + Computes the initial weights of the portfolio at the rebalancing date. + + Parameters + ---------- + selection : list[str] + List of asset names to include in the initial weights. + return_series : pd.DataFrame + A DataFrame containing asset return data indexed by date. + end_date : str + The ending date for computing the weights. + rescale : bool, optional + Whether to rescale the weights to sum to 1, by default True. + + Returns + ------- + dict[str, float] + Dictionary containing the initial asset weights. + + Notes + ----- + - If `self.rebalancing_date` and `self.weights` are set, the function calculates + the weights by floating them to the end date. + - If these attributes are not set, it returns None. + """ if not hasattr(self, '_initial_weights'): if self.rebalancing_date is not None and self.weights is not None: w_init = dict.fromkeys(selection, 0) @@ -106,7 +178,30 @@ def initial_weights(self, return self._initial_weights - def turnover(self, portfolio: "Portfolio", return_series: pd.DataFrame, rescale=True): + def turnover(self, portfolio: "Portfolio", return_series: pd.DataFrame, rescale: bool = True) -> float: + """ + Computes the portfolio turnover by comparing the previous and current portfolio weights. + + Parameters + ---------- + portfolio : Portfolio + The previous portfolio to compare against. + return_series : pd.DataFrame + A DataFrame containing asset return data indexed by date. + rescale : bool, optional + Whether to rescale the weights to sum to 1, by default True. + + Returns + ------- + float + The total absolute turnover of the portfolio. + + Notes + ----- + - Turnover measures the total change in portfolio weights between two consecutive rebalancing dates. + - If `portfolio.rebalancing_date` is before `self.rebalancing_date`, it uses `portfolio.initial_weights()`. + - Otherwise, it computes initial weights from `self.initial_weights()`. + """ if portfolio.rebalancing_date is not None and portfolio.rebalancing_date < self.rebalancing_date: w_init = portfolio.initial_weights(selection=self.weights.keys(), return_series=return_series, @@ -121,9 +216,26 @@ def turnover(self, portfolio: "Portfolio", return_series: pd.DataFrame, rescale= return pd.Series(w_init).sub(pd.Series(portfolio.weights), fill_value=0).abs().sum() + class Strategy: + """ + A class representing a financial trading strategy consisting of multiple portfolios. + + Attributes + ---------- + portfolios : list[Portfolio] + A list of Portfolio objects in the strategy. + """ def __init__(self, portfolios: list[Portfolio]): + """ + Initializes a Strategy instance. + + Parameters + ---------- + portfolios : list[Portfolio] + A list of Portfolio objects. + """ self.portfolios = portfolios @property @@ -137,27 +249,64 @@ def portfolios(self, new_portfolios: list[Portfolio]): if not all(isinstance(portfolio, Portfolio) for portfolio in new_portfolios): raise TypeError('all elements in portfolios must be of type Portfolio') self._portfolios = new_portfolios + def clear(self) -> None: + """ + Clears the portfolio list. + + Returns + ------- + None + """ self.portfolios.clear() return None def get_rebalancing_dates(self): + """ + Retrieves all portfolio rebalancing dates. + + Returns + ------- + list[str] + List of rebalancing dates. + """ return [portfolio.rebalancing_date for portfolio in self.portfolios] - def get_weights(self, rebalancing_date: str) -> dict[str, float]: - for portfolio in self.portfolios: - if portfolio.rebalancing_date == rebalancing_date: - return portfolio.weights - return None - def get_weights_df(self) -> pd.DataFrame: + """ + Returns portfolio weights as a DataFrame. + + Returns + ------- + pd.DataFrame + DataFrame containing portfolio weights. + """ weights_dict = {} for portfolio in self.portfolios: weights_dict[portfolio.rebalancing_date] = portfolio.weights return pd.DataFrame(weights_dict).T + def get_portfolio(self, rebalancing_date: str) -> Portfolio: + """ + Retrieves a portfolio for a specific rebalancing date. + + Parameters + ---------- + rebalancing_date : str + The date for which the portfolio is requested. + + Returns + ------- + Portfolio + The corresponding portfolio. + + Raises + ------ + ValueError + If no portfolio is found for the specified date. + """ if rebalancing_date in self.get_rebalancing_dates(): idx = self.get_rebalancing_dates().index(rebalancing_date) return self.portfolios[idx] @@ -165,33 +314,104 @@ def get_portfolio(self, rebalancing_date: str) -> Portfolio: raise ValueError(f'No portfolio found for rebalancing date {rebalancing_date}') def has_previous_portfolio(self, rebalancing_date: str) -> bool: + """ + Checks whether a previous portfolio exists before a given rebalancing date. + + Parameters + ---------- + rebalancing_date : str + The reference date. + + Returns + ------- + bool + True if there is a previous portfolio, otherwise False. + """ dates = self.get_rebalancing_dates() - ans = False - if len(dates) > 0: - ans = dates[0] < rebalancing_date - return ans + return len(dates) > 0 and dates[0] < rebalancing_date def get_previous_portfolio(self, rebalancing_date: str) -> Portfolio: + """ + Retrieves the most recent portfolio before a given rebalancing date. + + Parameters + ---------- + rebalancing_date : str + The reference date. + + Returns + ------- + Portfolio + The previous portfolio, or an empty portfolio if none exist. + """ if not self.has_previous_portfolio(rebalancing_date): return Portfolio.empty() else: - yesterday = [x for x in self.get_rebalancing_dates() if x < rebalancing_date][-1] - return self.get_portfolio(yesterday) + previous_date = [x for x in self.get_rebalancing_dates() if x < rebalancing_date][-1] + return self.get_portfolio(previous_date) def get_initial_portfolio(self, rebalancing_date: str) -> Portfolio: + """ + Retrieves the initial portfolio before the specified rebalancing date. + + Parameters + ---------- + rebalancing_date : str + The reference date. + + Returns + ------- + Portfolio + The initial portfolio before the given date, or an empty portfolio if none exist. + """ if self.has_previous_portfolio(rebalancing_date=rebalancing_date): - initial_portfolio = self.get_previous_portfolio(rebalancing_date) + return self.get_previous_portfolio(rebalancing_date) else: - initial_portfolio = Portfolio(rebalancing_date=None, weights={}) - return initial_portfolio + return Portfolio(rebalancing_date=None, weights={}) - def __repr__(self): + def __repr__(self) -> str: + """ + Returns a string representation of the Strategy object. + + Returns + ------- + str + String representation of the strategy. + """ return f'Strategy(portfolios={self.portfolios})' def number_of_assets(self, th: float = 0.0001) -> pd.Series: + """ + Computes the number of assets in each portfolio above a given threshold. + + Parameters + ---------- + th : float, optional + The minimum absolute weight to consider an asset as included, by default 0.0001. + + Returns + ------- + pd.Series + Series containing the number of assets per rebalancing date. + """ return self.get_weights_df().apply(lambda x: sum(np.abs(x) > th), axis=1) - def turnover(self, return_series, rescale=True) -> pd.Series: + def turnover(self, return_series: pd.DataFrame, rescale: bool = True) -> pd.Series: + """ + Computes the turnover for each rebalancing period. + + Parameters + ---------- + return_series : pd.DataFrame + A DataFrame containing asset return data indexed by date. + rescale : bool, optional + Whether to rescale the weights, by default True. + + Returns + ------- + pd.Series + Series of turnover values indexed by rebalancing dates. + """ dates = self.get_rebalancing_dates() turnover = {} for rebalancing_date in dates: @@ -203,11 +423,29 @@ def turnover(self, return_series, rescale=True) -> pd.Series: return pd.Series(turnover) def simulate(self, - return_series=None, + return_series: pd.DataFrame = None, fc: float = 0, vc: float = 0, n_days_per_year: int = 252) -> pd.Series: - + """ + Simulates portfolio performance over time, incorporating fixed and variable transaction costs. + + Parameters + ---------- + return_series : pd.DataFrame + DataFrame containing asset return data indexed by date. + fc : float, optional + Fixed transaction cost per rebalancing, by default 0. + vc : float, optional + Variable transaction cost proportional to turnover, by default 0. + n_days_per_year : int, optional + Number of trading days per year, by default 252. + + Returns + ------- + pd.Series + A series of portfolio returns over time. + """ rebdates = self.get_rebalancing_dates() ret_list = [] for rebdate in rebdates: @@ -216,7 +454,7 @@ def simulate(self, portfolio = self.get_portfolio(rebdate) w_float = portfolio.float_weights(return_series=return_series, end_date=next_rebdate, - rescale=False) # Note that rescale is hardcoded to False. + rescale=False) # Rescale is hardcoded to False. short_positions = list(filter(lambda x: x < 0, portfolio.weights.values())) long_positions = list(filter(lambda x: x >= 0, portfolio.weights.values())) margin = abs(sum(short_positions)) @@ -226,32 +464,57 @@ def simulate(self, w_float.insert(0, 'cash', cash) w_float.insert(0, 'loan', loan) level = w_float.sum(axis=1) - ret_tmp = level.pct_change(1) # 1 for one day lookback + ret_tmp = level.pct_change(1) # 1-day lookback ret_list.append(ret_tmp) portf_ret = pd.concat(ret_list).dropna() if vc != 0: to = self.turnover(return_series=return_series, - rescale=False) # Note that rescale is hardcoded to False. + rescale=False) # Rescale is hardcoded to False. varcost = to * vc - portf_ret[0] -= varcost[0] - portf_ret[varcost[1:].index] -= varcost[1:].values + portf_ret.iloc[0] -= varcost.iloc[0] + portf_ret.iloc[1:] -= varcost.iloc[1:].values + if fc != 0: n_days = (portf_ret.index[1:] - portf_ret.index[:-1]).to_numpy().astype('timedelta64[D]').astype(int) fixcost = (1 + fc) ** (n_days / n_days_per_year) - 1 - portf_ret[1:] -= fixcost + portf_ret.iloc[1:] -= fixcost return portf_ret - - # -------------------------------------------------------------------------- # Helper functions # -------------------------------------------------------------------------- def floating_weights(X, w, start_date, end_date, rescale=True): + """ + Computes floating weights over a time period given initial weights. + + Parameters + ---------- + X : pd.DataFrame + DataFrame containing asset returns. + w : dict + Dictionary of initial asset weights. + start_date : str + Start date for weight computation. + end_date : str + End date for weight computation. + rescale : bool, optional + Whether to rescale weights to sum to 1, by default True. + + Returns + ------- + pd.DataFrame + DataFrame of floating weights. + + Raises + ------ + ValueError + If start_date or end_date are not contained in the dataset. + """ start_date = pd.to_datetime(start_date) end_date = pd.to_datetime(end_date) if start_date < X.index[0]: @@ -271,12 +534,7 @@ def floating_weights(X, w, start_date, end_date, rescale=True): raise ValueError('Not all assets in w are contained in X.') X_tmp = X.loc[start_date:end_date, wnames].copy().fillna(0) - # TODO : To extend to short positions cases when the weights can be negative - # short_positions = wnames[w.iloc[0,:] < 0 ] - # if len(short_positions) > 0: - # X_tmp[short_positions] = X_tmp[short_positions] * (-1) xmat = 1 + X_tmp - # xmat.iloc[0] = w.dropna(how='all').fillna(0).abs() xmat.iloc[0] = w.dropna(how='all').fillna(0) w_float = xmat.cumprod() diff --git a/src/qp_problems.py b/src/qp_problems.py index 44bbf7d..c17af0f 100644 --- a/src/qp_problems.py +++ b/src/qp_problems.py @@ -8,41 +8,89 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - - import numpy as np import qpsolvers import scipy import pickle from helper_functions import isPD, nearestPD -IGNORED_SOLVERS = {'gurobi', # Restricted license - for non-production use only - expires 2025-11-24 - 'mosek', # Commercial solver - 'ecos', # LinAlgError: 0-dimensional array given. Array must be at least two-dimensional - 'scs', # ValueError: Failed to parse cone field bu - 'piqp', - 'proxqp', - 'clarabel' - } - +IGNORED_SOLVERS = {'gurobi', 'mosek', 'ecos', 'scs', 'piqp', 'proxqp', 'clarabel'} SPARSE_SOLVERS = {'clarabel', 'ecos', 'gurobi', 'mosek', 'highs', 'qpalm', 'osqp', 'qpswift', 'scs'} ALL_SOLVERS = {'clarabel', 'cvxopt', 'daqp', 'ecos', 'gurobi', 'highs', 'mosek', 'osqp', 'piqp', 'proxqp', 'qpalm', 'quadprog', 'scs'} USABLE_SOLVERS = ALL_SOLVERS - IGNORED_SOLVERS - -# This class converts a financial optimization problem to a standard quadratic optimization. class QuadraticProgram(dict): + """ + A class for representing and solving quadratic optimization problems. + + This class provides methods for transforming financial optimization problems into + a standard quadratic programming format. + + Attributes + ---------- + solver : str + The name of the solver used for optimization. + + Methods + ------- + linearize_turnover_constraint(x_init, to_budget) + Adds a turnover constraint by introducing auxiliary variables. + linearize_leverage_constraint(N, leverage_budget) + Adds a leverage constraint by introducing auxiliary variables. + linearize_turnover_objective(x_init, transaction_cost) + Modifies the objective function to account for transaction costs. + is_feasible() + Checks if the quadratic program is feasible. + solve() + Solves the quadratic optimization problem. + objective_value(x, with_const=True) + Computes the objective function value for a given solution. + serialize(path, **kwargs) + Saves the quadratic program to a file. + load(path, **kwargs) + Loads a quadratic program from a file. + """ def __init__(self, *args, **kwargs): + """ + Initializes the QuadraticProgram instance. + + Parameters + ---------- + *args : + Positional arguments passed to the dictionary constructor. + **kwargs : + Keyword arguments passed to the dictionary constructor. + """ super(QuadraticProgram, self).__init__(*args, **kwargs) self.solver = self['params']['solver_name'] - def linearize_turnover_constraint(self, x_init: np.ndarray, to_budget=float('inf')) -> None: + + def linearize_turnover_constraint(self, x_init: np.ndarray, to_budget: float = float('inf')) -> None: + """ + Adds turnover constraints to the quadratic program by introducing auxiliary variables. + + Parameters + ---------- + x_init : np.ndarray + Initial portfolio weights before rebalancing. + to_budget : float, optional + Maximum allowed turnover budget, by default infinity. + + Returns + ------- + None + + Notes + ----- + - This method modifies the objective function and constraints to account for turnover. + - It extends the quadratic program with additional variables to track turnover. + """ # Dimensions n = len(self.get('q')) m = 0 if self.get('G') is None else self.get('G').shape[0] - # Objective + # Extend matrices for turnover constraints P = np.pad(self['P'], (0, n)) if self.get('P') is not None else None q = np.pad(self['q'], (0, n)) if self.get('q') is not None else None @@ -51,38 +99,51 @@ def linearize_turnover_constraint(self, x_init: np.ndarray, to_budget=float('inf if self.get('G') is not None: G[0:m, 0:n] = self.get('G') G[m:(m + n), 0:n] = np.eye(n) - G[m:(m + n), n:(2 * n)] = np.eye(n) * (-1) - G[(m + n):(m + 2 * n), 0:n] = np.eye(n) * (-1) - G[(m + n):(m + 2 * n), n:(2 * n)] = np.eye(n) * (-1) + G[m:(m + n), n:(2 * n)] = -np.eye(n) + G[(m + n):(m + 2 * n), 0:n] = -np.eye(n) + G[(m + n):(m + 2 * n), n:(2 * n)] = -np.eye(n) G[(m + 2 * n),] = np.append(np.zeros(n), np.ones(n)) h = self.get('h') if self.get('h') is not None else np.empty(shape=(0,)) h = np.append(h, np.append(np.append(x_init, -x_init), to_budget)) # Equality constraints - #A = concat_constant_columns(self.get('A'), n) A = np.pad(self['A'], [(0, 0), (0, n)]) if self.get('A') is not None else None + # Adjust bounds lb = np.pad(self['lb'], (0, n)) if self.get('lb') is not None else None ub = np.pad(self['ub'], (0, n), constant_values=float('inf')) if self.get('ub') is not None else None - # Override the original matrices - self.update({'P': P, - 'q': q, - 'G': G, - 'h': h, - 'A': A, - 'lb': lb, - 'ub': ub}) + # Update problem + self.update({'P': P, 'q': q, 'G': G, 'h': h, 'A': A, 'lb': lb, 'ub': ub}) return None - def linearize_leverage_constraint(self, N=None, leverage_budget=2) -> None: + def linearize_leverage_constraint(self, N: int = None, leverage_budget: float = 2) -> None: + """ + Adds leverage constraints to the quadratic program by introducing auxiliary variables. + + Parameters + ---------- + N : int + Number of assets. + leverage_budget : float, optional + Maximum leverage allowed, by default 2. + + Returns + ------- + None + + Notes + ----- + - This method extends the quadratic program to account for leverage constraints. + - It introduces auxiliary variables to ensure that leverage does not exceed the defined budget. + """ # Dimensions n = len(self.get('q')) mG = 0 if self.get('G') is None else self.get('G').shape[0] mA = 1 if self.get('A').ndim == 1 else self.get('A').shape[0] - # Objective + # Extend matrices for leverage constraints P = np.pad(self['P'], (0, 2 * N)) if self.get('P') is not None else None q = np.pad(self['q'], (0, 2 * N)) if self.get('q') is not None else None @@ -102,29 +163,41 @@ def linearize_leverage_constraint(self, N=None, leverage_budget=2) -> None: A[mA:(mA + N), (n + N):(n + 2 * N)] = -np.eye(N) b = np.pad(self.get('b'), (0, N)) + # Adjust bounds lb = np.pad(self['lb'], (0, 2 * N)) if self.get('lb') is not None else None ub = np.pad(self['ub'], (0, 2 * N), constant_values=float('inf')) if self.get('ub') is not None else None - # Override the original matrices - self.update({'P': P, - 'q': q, - 'G': G, - 'h': h, - 'A': A, - 'b': b, - 'lb': lb, - 'ub': ub}) + # Update problem + self.update({'P': P, 'q': q, 'G': G, 'h': h, 'A': A, 'b': b, 'lb': lb, 'ub': ub}) return None - def linearize_turnover_objective(self, - x_init: np.ndarray, - transaction_cost=0.002) -> None: + def linearize_turnover_objective(self, x_init: np.ndarray, transaction_cost: float = 0.002) -> None: + """ + Modifies the objective function to include transaction costs in the turnover. + + Parameters + ---------- + x_init : np.ndarray + Initial portfolio weights before rebalancing. + transaction_cost : float, optional + Cost per unit turnover, by default 0.002. + + Returns + ------- + None + + Notes + ----- + - This method introduces additional variables to the optimization problem + to model turnover-related transaction costs. + - The objective function is modified to penalize excessive turnover. + """ # Dimensions n = len(self.get('q')) m = 0 if self.get('G') is None else self.get('G').shape[0] - # Objective + # Extend matrices for turnover objective P = np.pad(self['P'], (0, n)) if self.get('P') is not None else None q = np.pad(self['q'], (0, n), constant_values=transaction_cost) if self.get('q') is not None else None @@ -142,21 +215,25 @@ def linearize_turnover_objective(self, # Equality constraints A = np.pad(self['A'], [(0, 0), (0, n)]) if self.get('A') is not None else None + # Adjust bounds lb = np.pad(self['lb'], (0, n)) if self.get('lb') is not None else None ub = np.pad(self['ub'], (0, n), constant_values=float('inf')) if self.get('ub') is not None else None - # Override the original matrices - self.update({'P': P, - 'q': q, - 'G': G, - 'h': h, - 'A': A, - 'lb': lb, - 'ub': ub}) + # Update problem + self.update({'P': P, 'q': q, 'G': G, 'h': h, 'A': A, 'lb': lb, 'ub': ub}) return None + def is_feasible(self) -> bool: + """ + Checks whether the quadratic program is feasible. + + Returns + ------- + bool + True if the program is feasible, otherwise False. + """ problem = qpsolvers.Problem(P=np.zeros(self.get('P').shape), q=np.zeros(self.get('P').shape[0]), G=self.get('G'), @@ -166,22 +243,22 @@ def is_feasible(self) -> bool: lb=self.get('lb'), ub=self.get('ub')) - # Convert to sparse matrices for best performance - if self.solver in SPARSE_SOLVERS: - if self['params'].get('sparse'): - if problem.P is not None: - problem.P = scipy.sparse.csc_matrix(problem.P) - if problem.A is not None: - problem.A = scipy.sparse.csc_matrix(problem.A) - if problem.G is not None: - problem.G = scipy.sparse.csc_matrix(problem.G) - solution = qpsolvers.solve_problem(problem=problem, - solver=self.solver, - initvals=self.get('x0'), - verbose=False) + if self.solver in SPARSE_SOLVERS and self['params'].get('sparse'): + problem.P = scipy.sparse.csc_matrix(problem.P) if problem.P is not None else None + problem.A = scipy.sparse.csc_matrix(problem.A) if problem.A is not None else None + problem.G = scipy.sparse.csc_matrix(problem.G) if problem.G is not None else None + + solution = qpsolvers.solve_problem(problem=problem, solver=self.solver, initvals=self.get('x0'), verbose=False) return solution.found def solve(self) -> None: + """ + Solves the quadratic optimization problem. + + Returns + ------- + None + """ if self.solver in ['ecos', 'scs', 'clarabel']: if self.get('b').size == 1: self['b'] = np.array(self.get('b')).reshape(-1) @@ -189,6 +266,7 @@ def solve(self) -> None: P = self.get('P') if P is not None and not isPD(P): self['P'] = nearestPD(P) + problem = qpsolvers.Problem(P=self.get('P'), q=self.get('q'), G=self.get('G'), @@ -198,33 +276,53 @@ def solve(self) -> None: lb=self.get('lb'), ub=self.get('ub')) - # Convert to sparse matrices for best performance - if self.solver in SPARSE_SOLVERS: - if self['params'].get('sparse'): - if problem.P is not None: - problem.P = scipy.sparse.csc_matrix(problem.P) - if problem.A is not None: - problem.A = scipy.sparse.csc_matrix(problem.A) - if problem.G is not None: - problem.G = scipy.sparse.csc_matrix(problem.G) - - solution = qpsolvers.solve_problem(problem=problem, - solver=self.solver, - initvals=self.get('x0'), - verbose=False) + if self.solver in SPARSE_SOLVERS and self['params'].get('sparse'): + problem.P = scipy.sparse.csc_matrix(problem.P) if problem.P is not None else None + problem.A = scipy.sparse.csc_matrix(problem.A) if problem.A is not None else None + problem.G = scipy.sparse.csc_matrix(problem.G) if problem.G is not None else None + + solution = qpsolvers.solve_problem(problem=problem, solver=self.solver, initvals=self.get('x0'), verbose=False) self['solution'] = solution return None - # 0.5 * x' * P * x + q' * x + const def objective_value(self, x: np.ndarray, with_const: bool = True) -> float: + """ + Computes the objective function value for a given solution. + + Parameters + ---------- + x : np.ndarray + Decision variable values. + with_const : bool, optional + Whether to include the constant term, by default True. + + Returns + ------- + float + The computed objective function value. + """ const = 0 if self.get('constant') is None or not with_const else self['constant'] return (0.5 * (x @ self.get('P') @ x) + self.get('q') @ x).item() + const - def serialize(self, path, **kwargs): + def serialize(self, path: str, **kwargs) -> None: + """ + Saves the QuadraticProgram instance to a file. + + Parameters + ---------- + path : str + Path to save the serialized file. + **kwargs : + Additional arguments for pickle. + + Returns + ------- + None + """ with open(path, 'wb') as f: pickle.dump(self, f, kwargs) @staticmethod - def load(path, **kwargs): - with open(path, 'rb'): - return pickle.load(path, kwargs) + def load(path: str, **kwargs) -> 'QuadraticProgram': + with open(path, 'rb') as f: + return pickle.load(f, **kwargs) diff --git a/src/selection.py b/src/selection.py index 562f8db..c8c0c6c 100644 --- a/src/selection.py +++ b/src/selection.py @@ -8,21 +8,47 @@ Licensed under GNU LGPL.3, see LICENCE file ''' - ############################################################################ ### CLASS Selection ############################################################################ - - from typing import Union, Optional import pandas as pd - - class Selection: + """ + A class to manage asset selection using filters and binary selection criteria. + + Attributes + ---------- + selected : pd.Index + The selected assets based on filtering criteria. + _filtered : dict[str, Union[pd.Series, pd.DataFrame]] + Dictionary storing filtering criteria as pandas Series or DataFrame. + + Methods + ------- + get_selected(filter_names=None) + Retrieves the selected asset indices based on the applied filters. + clear() + Resets the selection and clears all applied filters. + add_filtered(filter_name, value) + Adds a new filtering criterion to the selection. + df(filter_names=None) + Returns a DataFrame containing all applied filters. + df_binary(filter_names=None) + Returns a binary DataFrame where 1 indicates asset selection. + """ def __init__(self, ids: pd.Index = pd.Index([])): + """ + Initializes the Selection object. + + Parameters + ---------- + ids : pd.Index, optional + The initial selection of asset indices, default is an empty index. + """ self._filtered: dict[str, Union[pd.Series, pd.DataFrame]] = {} self.selected = ids @@ -31,7 +57,7 @@ def selected(self) -> pd.Index: return self._selected @selected.setter - def selected(self, value): + def selected(self, value: pd.Index): if not isinstance(value, pd.Index): raise ValueError( "Inconsistent input type for selected.setter. Needs to be a pd.Index." @@ -39,47 +65,77 @@ def selected(self, value): self._selected = value @property - def filtered(self): + def filtered(self) -> dict[str, Union[pd.Series, pd.DataFrame]]: return self._filtered def get_selected(self, filter_names: Optional[list[str]] = None) -> pd.Index: - if filter_names is not None: - df = self.df_binary(filter_names) - else: - df = self.df_binary() + """ + Retrieves the selected asset indices based on applied filters. + + Parameters + ---------- + filter_names : list[str], optional + List of filter names to consider. If None, all filters are used. + + Returns + ------- + pd.Index + The asset indices that satisfy all applied filters. + """ + df = self.df_binary(filter_names) if filter_names is not None else self.df_binary() return df[df.eq(1).all(axis=1)].index def clear(self) -> None: + """ + Clears all filters and resets the selection. + + Returns + ------- + None + """ self.selected = pd.Index([]) self._filtered = {} - def add_filtered(self, - filter_name: str, - value: Union[pd.Series, pd.DataFrame]) -> None: - - # Check input types + def add_filtered(self, filter_name: str, value: Union[pd.Series, pd.DataFrame]) -> None: + """ + Adds a new filtering criterion to the selection. + + Parameters + ---------- + filter_name : str + The name of the filter to be added. + value : Union[pd.Series, pd.DataFrame] + The filtering data as a pandas Series or DataFrame. + + Returns + ------- + None + + Raises + ------ + ValueError + If filter_name is not a non-empty string. + If value is not a pandas Series or DataFrame. + If the 'binary' column contains values other than 0 or 1. + """ if not isinstance(filter_name, str) or not filter_name.strip(): raise ValueError("Argument 'filter_name' must be a nonempty string.") - if not isinstance(value, pd.Series) and not isinstance(value, pd.DataFrame): + if not isinstance(value, (pd.Series, pd.DataFrame)): raise ValueError( 'Inconsistent input type. Needs to be a pd.Series or a pd.DataFrame.' ) # Ensure that column 'binary' is of type int if it exists if isinstance(value, pd.Series): - if value.name == 'binary': - if not value.isin([0, 1]).all(): - raise ValueError("Column 'binary' must contain only 0s and 1s.") - else: - value = value.astype(int) - - if isinstance(value, pd.DataFrame): - if 'binary' in value.columns: - if not value['binary'].isin([0, 1]).all(): - raise ValueError("Column 'binary' must contain only 0s and 1s.") - else: - value['binary'] = value['binary'].astype(int) + if value.name == 'binary' and not value.isin([0, 1]).all(): + raise ValueError("Column 'binary' must contain only 0s and 1s.") + value = value.astype(int) if value.name == 'binary' else value + + if isinstance(value, pd.DataFrame) and 'binary' in value.columns: + if not value['binary'].isin([0, 1]).all(): + raise ValueError("Column 'binary' must contain only 0s and 1s.") + value['binary'] = value['binary'].astype(int) # Add to filtered self._filtered[filter_name] = value @@ -89,7 +145,19 @@ def add_filtered(self, return None def df(self, filter_names: Optional[list[str]] = None) -> pd.DataFrame: - + """ + Returns a DataFrame containing all applied filters. + + Parameters + ---------- + filter_names : list[str], optional + List of filter names to include. If None, all filters are used. + + Returns + ------- + pd.DataFrame + A DataFrame where each column represents a filter applied to the selection. + """ if filter_names is None: filter_names = self.filtered.keys() return pd.concat( @@ -101,13 +169,25 @@ def df(self, filter_names: Optional[list[str]] = None) -> pd.DataFrame: ) for key in filter_names }, - axis = 1, + axis=1, ) def df_binary(self, filter_names: Optional[list[str]] = None) -> pd.DataFrame: - + """ + Returns a binary DataFrame where 1 indicates an asset is selected. + + Parameters + ---------- + filter_names : list[str], optional + List of filter names to include. If None, all filters are used. + + Returns + ------- + pd.DataFrame + A DataFrame with 1s indicating selected assets and 0s otherwise. + """ if filter_names is None: filter_names = self.filtered.keys() - df = self.df(filter_names = filter_names).filter(like = 'binary').dropna() + df = self.df(filter_names=filter_names).filter(like='binary').dropna() df.columns = df.columns.droplevel(1) return df From 8a50aa2a54347fc5b315120ca8186bab83dc40fc Mon Sep 17 00:00:00 2001 From: Andrea Catelli Date: Fri, 14 Feb 2025 14:14:06 +0100 Subject: [PATCH 3/3] Improved version of README.md file --- README.md | 57 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 960f73c..fbcccf1 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,19 @@ + + # PorQua -**PorQua** is an advanced Python library designed for portfolio optimization and index replication, part of the [GeomScale project](https://github.com/GeomScale). The library includes tools and models tailored for financial data analysis, providing efficient solutions for portfolio management and asset selection. -## Features +**PorQua** is an advanced **Python** library for **portfolio optimization** and **index replication**, designed as part of the [GeomScale project](https://github.com/GeomScale). It provides **efficient tools for financial data analysis, portfolio management, and asset selection**. -- **Portfolio Optimization**: Implement algorithms for creating efficient and optimal portfolios. -- **Index Replication**: Build portfolios to closely track specific indices. -- **Machine Learning Models**: Use data-driven models for financial analysis, asset selection, and universe selection. +![License: LGPL-3.0](https://img.shields.io/badge/license-LGPL--3.0-blue.svg) +![Docs](https://img.shields.io/badge/docs-passing-brightgreen) +![Python](https://img.shields.io/badge/python-3.10%2B-blue.svg) +![Chat on Gitter](https://badges.gitter.im/GeomScale/PorQua.svg) + -## Installation +--- + +## ๐Ÿ“ฆ Installation Clone the repository: @@ -17,17 +22,39 @@ git clone https://github.com/GeomScale/PorQua.git cd PorQua ``` +### ๐Ÿงช Run a Quick Test + +```sh +python -m unittest test/tests_quadratic_program.py +``` + +--- + + + + +--- + +## ๐Ÿ“Š Examples + +๐Ÿ’ก Here are some notebooks showing potential use cases of the PorQua library. + +| Feature | Example | +|---------|---------| +| **Backtesting a portfolio strategy using historical data** | [๐Ÿ”— Backtesting](example/backtest.ipynb) | +| **Evaluating different quadratic programming (QP) solvers** | [๐Ÿ”— Compare solver](example/compare_solver.ipynb) | +| **Index Replication (Using LSTM model)** | [๐Ÿ”— Index replication](example/index_replication.ipynb) | +| **Time series forecasting using LSTM model** | [๐Ÿ”— LSTM for prediction](example/lstm.ipynb) | +| **Time series forecasting using linear regression and XGBoost** | [๐Ÿ”— ML forecasting](example/ml.ipynb) | +| **Probit and Logit ordinal regression models** | [๐Ÿ”— Ordinal regression](example/ordinal_regression.ipynb) | +--- -## Example Notebooks -- [example/backtest.ipynb](example/backtest.ipynb): Example of running a backtest. -- [example/compare_solver.ipynb](example/compare_solver.ipynb): Example of comparing solvers. -- [example/index_replication.ipynb](example/index_replication.ipynb): Example of index replication. -- [example/lstm.ipynb](example/lstm.ipynb): Example of using LSTM for universe selection. -- [example/ml.ipynb](example/ml.ipynb): Example of machine learning models. -- [example/ordinal_regression.ipynb](example/ordinal_regression.ipynb): Example of ordinal regression. +--- -## License +## ๐Ÿ“œ License -This project is licensed under the GNU LGPL.3 License - see the [LICENSE](LICENSE) file for details. \ No newline at end of file +You may redistribute or modify the software under the [GNU Lesser General Public License](LICENSE) as published by Free Software Foundation, either version 3 of the License, or (at your option) any later version. It is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY.