Skip to content

pmt() function too slow - here are some ways to make it faster #36

Open
@garfieldthecat

Description

@garfieldthecat

The pmt() function of numpy_financial is too slow and can become the main bottleneck in all those cases where it must be run thousands, if not millions of times - e.g. multiple scenarios on many loans or mortgages based on a floating rate.

I propose below a few alternative ways to write new, more optimised functions, which can be 7 to 60 times faster, depending on the circumstances:

  • if you need to run this function millions of times and your code allows it, you'll be better off using one function for when the inputs, and the output, are scalar, and a separate one for when they are arrays
  • unless you have reasons not to, using numba will speed things up even more
  • if you cannot use numba and never run this function on scalars, you won't see much benefit

My findings are that, on my machine at least:

  • For scalars: npf is slower than all the other implementations: about 30 times slower than my scalar, non-numba function, about 60 times slower than my scalar numba one but also significantly slower than my array function
  • For arrays: my numba function is about 7 times faster than npf's; without numba, they are about the same

I copied below the exact code I used to test all of this:

import numpy as np
import pandas as pd
import numpy_financial as npf
import timeit
import numba

start = time.time()


def my_pmt(rate, nper, pv, fv =0, when =0):
    c = (1+rate)**nper
    # multipl by 1 converts an array of size 1 to a scalar
    return 1 * np.where(nper == 0, np.nan,
                    np.where(rate ==0, -(fv + pv) /nper ,
                             (-pv *c  - fv) * rate / ( (c - 1) *( 1 + rate * when ) ) ) )

@numba.jit
def pmt_numba_array(rate, nper, pv, fv =0, when =0):
    c = (1+rate)**nper
    return np.where(nper == 0, np.nan,
                    np.where(rate ==0, -(fv + pv) /nper ,
                             (-pv *c  - fv) * rate / ( (c - 1) *( 1 + rate * when ) ) ) )

def my_pmt_optimised(rate,nper, pv, fv =0, when =0):
    if np.isscalar(rate) and np.isscalar(nper) and np.isscalar(pv) and np.isscalar(fv):
        return pmt_numba_scalar(rate, nper, pv, fv, when)
    else:
        return pmt_numba_array(rate, nper, pv, fv, when)
    
    
    


@numba.jit
def pmt_numba_scalar(rate, nper, pv, fv=0, when = 0): 
    # 0 = end, 1 = begin
    if nper == 0:
        return(np.nan)   
    elif rate == 0:
        return ( -(fv+pv)/nper )
    else:
        c= (1 + rate) ** nper
        return (-pv *c  - fv) * rate / ( (c - 1) *( 1 + rate * when) )  
    
    
def pmt_no_numba_scalar(rate, nper, pv, fv=0, when = 0):
    if nper == 0:
        return(np.nan)   
    elif rate == 0:
        return ( -(fv+pv)/nper )
    else:
        c= (1 + rate) ** nper
        return (-pv *c  - fv) * rate / ( (c - 1) *( 1 + rate * when)  )


def pmt_npf(rate, nper, pv, fv=0, when = 0):
    #(rate, nper, pv, fv, when) = map(np.array, [rate, nper, pv, fv, when])
    temp = (1 + rate)**nper
    mask = (rate == 0)
    masked_rate = np.where(mask, 1, rate)
    fact = np.where(mask != 0, nper,
                    (1 + masked_rate*when)*(temp - 1)/masked_rate)
    return -(fv + pv*temp) / fact

r = 4
n = int(1e4)

rate = 5e-2
nper = 120
pv = 1e6
fv = -100e3



t_my_numba = timeit.Timer("pmt_numba_scalar(rate, nper, pv, fv ) " ,  globals = globals() ).repeat(repeat = r, number = n)
t_my_no_numba = timeit.Timer("pmt_no_numba_scalar(rate, nper, pv, fv ) " ,  globals = globals() ).repeat(repeat = r, number = n)
t_npf = timeit.Timer("npf.pmt(rate, nper, pv, fv )" ,  globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt = timeit.Timer("my_pmt(rate, nper, pv, fv )" ,  globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt_optimised = timeit.Timer("my_pmt_optimised(rate, nper, pv, fv )" ,  globals = globals() ).repeat(repeat = r, number = n)


resdf_scalar = pd.DataFrame(index = ['min time'])
resdf_scalar['my scalar func, numba'] = [min(t_my_numba)]
resdf_scalar['my scalar func, no numba'] = [min(t_my_no_numba)]
resdf_scalar['npf'] = [min(t_npf)]
resdf_scalar['my array function, no numba'] = [min(t_my_pmt)]
resdf_scalar['my scalar/array function, numba'] = [min(t_my_pmt_optimised)]

# the docs explain why we should take the min and not the avg
resdf_scalar = resdf_scalar.transpose()
resdf_scalar['diff vs fastest'] = (resdf_scalar / resdf_scalar.min() )

rate =np.arange(2,12)*1e-2
nper = np.arange(200,210)
pv = np.arange(1e6,1e6+10)
fv = -100e3

t_npf_array = timeit.Timer("npf.pmt(rate, nper, pv, fv )" ,  globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt_array = timeit.Timer("my_pmt(rate, nper, pv, fv )" ,  globals = globals() ).repeat(repeat = r, number = n)
t_my_pmt_optimised_array = timeit.Timer("my_pmt_optimised(rate, nper, pv, fv )" ,  globals = globals() ).repeat(repeat = r, number = n)


resdf_array = pd.DataFrame(index = ['min time'])
resdf_array['npf'] = [min(t_npf_array)]
resdf_array['my array function, no numba'] = [min(t_my_pmt_array)]
resdf_array['my scalar/array function, numba'] = [min(t_my_pmt_optimised_array)]

# the docs explain why we should take the min and not the avg
resdf_array = resdf_array.transpose()
resdf_array['diff vs fastest'] = (resdf_array / resdf_array.min() )

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions