Open
Description
The pmt() function of numpy_financial is too slow and can become the main bottleneck in all those cases where it must be run thousands, if not millions of times - e.g. multiple scenarios on many loans or mortgages based on a floating rate.
I propose below a few alternative ways to write new, more optimised functions, which can be 7 to 60 times faster, depending on the circumstances:
- if you need to run this function millions of times and your code allows it, you'll be better off using one function for when the inputs, and the output, are scalar, and a separate one for when they are arrays
- unless you have reasons not to, using numba will speed things up even more
- if you cannot use numba and never run this function on scalars, you won't see much benefit
My findings are that, on my machine at least:
- For scalars: npf is slower than all the other implementations: about 30 times slower than my scalar, non-numba function, about 60 times slower than my scalar numba one but also significantly slower than my array function
- For arrays: my numba function is about 7 times faster than npf's; without numba, they are about the same
I copied below the exact code I used to test all of this:
import numpy as np
import pandas as pd
import numpy_financial as npf
import timeit
import numba
start = time.time()
def my_pmt(rate, nper, pv, fv =0, when =0):
c = (1+rate)**nper
# multipl by 1 converts an array of size 1 to a scalar
return 1 * np.where(nper == 0, np.nan,
np.where(rate ==0, -(fv + pv) /nper ,
(-pv *c - fv) * rate / ( (c - 1) *( 1 + rate * when ) ) ) )
@numba.jit
def pmt_numba_array(rate, nper, pv, fv =0, when =0):
c = (1+rate)**nper
return np.where(nper == 0, np.nan,
np.where(rate ==0, -(fv + pv) /nper ,
(-pv *c - fv) * rate / ( (c - 1) *( 1 + rate * when ) ) ) )
def my_pmt_optimised(rate,nper, pv, fv =0, when =0):
if np.isscalar(rate) and np.isscalar(nper) and np.isscalar(pv) and np.isscalar(fv):
return pmt_numba_scalar(rate, nper, pv, fv, when)
else:
return pmt_numba_array(rate, nper, pv, fv, when)
@numba.jit
def pmt_numba_scalar(rate, nper, pv, fv=0, when = 0):
# 0 = end, 1 = begin
if nper == 0:
return(np.nan)
elif rate == 0:
return ( -(fv+pv)/nper )
else:
c= (1 + rate) ** nper
return (-pv *c - fv) * rate / ( (c - 1) *( 1 + rate * when) )
def pmt_no_numba_scalar(rate, nper, pv, fv=0, when = 0):
if nper == 0:
return(np.nan)
elif rate == 0:
return ( -(fv+pv)/nper )
else:
c= (1 + rate) ** nper
return (-pv *c - fv) * rate / ( (c - 1) *( 1 + rate * when) )
def pmt_npf(rate, nper, pv, fv=0, when = 0):
#(rate, nper, pv, fv, when) = map(np.array, [rate, nper, pv, fv, when])
temp = (1 + rate)**nper
mask = (rate == 0)
masked_rate = np.where(mask, 1, rate)
fact = np.where(mask != 0, nper,
(1 + masked_rate*when)*(temp - 1)/masked_rate)
return -(fv + pv*temp) / fact
r = 4
n = int(1e4)
rate = 5e-2
nper = 120
pv = 1e6
fv = -100e3
t_my_numba = timeit.Timer("pmt_numba_scalar(rate, nper, pv, fv ) " , globals = globals() ).repeat(repeat = r, number = n)
t_my_no_numba = timeit.Timer("pmt_no_numba_scalar(rate, nper, pv, fv ) " , globals = globals() ).repeat(repeat = r, number = n)
t_npf = timeit.Timer("npf.pmt(rate, nper, pv, fv )" , globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt = timeit.Timer("my_pmt(rate, nper, pv, fv )" , globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt_optimised = timeit.Timer("my_pmt_optimised(rate, nper, pv, fv )" , globals = globals() ).repeat(repeat = r, number = n)
resdf_scalar = pd.DataFrame(index = ['min time'])
resdf_scalar['my scalar func, numba'] = [min(t_my_numba)]
resdf_scalar['my scalar func, no numba'] = [min(t_my_no_numba)]
resdf_scalar['npf'] = [min(t_npf)]
resdf_scalar['my array function, no numba'] = [min(t_my_pmt)]
resdf_scalar['my scalar/array function, numba'] = [min(t_my_pmt_optimised)]
# the docs explain why we should take the min and not the avg
resdf_scalar = resdf_scalar.transpose()
resdf_scalar['diff vs fastest'] = (resdf_scalar / resdf_scalar.min() )
rate =np.arange(2,12)*1e-2
nper = np.arange(200,210)
pv = np.arange(1e6,1e6+10)
fv = -100e3
t_npf_array = timeit.Timer("npf.pmt(rate, nper, pv, fv )" , globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt_array = timeit.Timer("my_pmt(rate, nper, pv, fv )" , globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt_optimised_array = timeit.Timer("my_pmt_optimised(rate, nper, pv, fv )" , globals = globals() ).repeat(repeat = r, number = n)
resdf_array = pd.DataFrame(index = ['min time'])
resdf_array['npf'] = [min(t_npf_array)]
resdf_array['my array function, no numba'] = [min(t_my_pmt_array)]
resdf_array['my scalar/array function, numba'] = [min(t_my_pmt_optimised_array)]
# the docs explain why we should take the min and not the avg
resdf_array = resdf_array.transpose()
resdf_array['diff vs fastest'] = (resdf_array / resdf_array.min() )