pandas-dev · TomAugspurger · Dec 28, 2017 · Dec 20, 2017 · jreback · Dec 21, 2017
diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in
@@ -36,7 +36,8 @@ def get_dispatch(dtypes):
 def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{c_type}}, ndim=2] values,
-                       ndarray[int64_t] labels):
+                       ndarray[int64_t] labels,
+                       Py_ssize_t min_count=1):
     """
     Only aggregates on axis=0
     """
@@ -88,7 +89,7 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
         for i in range(ncounts):
             for j in range(K):
-                if nobs[i, j] == 0:
+                if nobs[i, j] < min_count:
                     out[i, j] = NAN
                 else:
                     out[i, j] = sumx[i, j]
@@ -99,7 +100,8 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                         ndarray[int64_t] counts,
                         ndarray[{{c_type}}, ndim=2] values,
-                        ndarray[int64_t] labels):
+                        ndarray[int64_t] labels,
+                        Py_ssize_t min_count=1):
     """
     Only aggregates on axis=0
     """
@@ -147,7 +149,7 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 
         for i in range(ncounts):
             for j in range(K):
-                if nobs[i, j] == 0:
+                if nobs[i, j] < min_count:
                     out[i, j] = NAN
                 else:
                     out[i, j] = prodx[i, j]
@@ -159,12 +161,15 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{dest_type2}}, ndim=2] values,
-                       ndarray[int64_t] labels):
+                       ndarray[int64_t] labels,
+                       Py_ssize_t min_count=-1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         {{dest_type2}} val, ct, oldmean
         ndarray[{{dest_type2}}, ndim=2] nobs, mean
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
@@ -208,12 +213,15 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                         ndarray[int64_t] counts,
                         ndarray[{{dest_type2}}, ndim=2] values,
-                        ndarray[int64_t] labels):
+                        ndarray[int64_t] labels,
+                        Py_ssize_t min_count=-1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         {{dest_type2}} val, count
         ndarray[{{dest_type2}}, ndim=2] sumx, nobs
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
@@ -263,7 +271,8 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                   ndarray[int64_t] counts,
                   ndarray[{{dest_type2}}, ndim=2] values,
-                  ndarray[int64_t] labels):
+                  ndarray[int64_t] labels,
+                  Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
@@ -272,6 +281,8 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
         {{dest_type2}} val, count
         Py_ssize_t ngroups = len(counts)
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if len(labels) == 0:
         return
 
@@ -332,7 +343,8 @@ def get_dispatch(dtypes):
 def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                         ndarray[int64_t] counts,
                         ndarray[{{c_type}}, ndim=2] values,
-                        ndarray[int64_t] labels):
+                        ndarray[int64_t] labels,
+                        Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
@@ -342,6 +354,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
         ndarray[{{dest_type2}}, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
@@ -382,7 +396,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{c_type}}, ndim=2] values,
-                       ndarray[int64_t] labels, int64_t rank):
+                       ndarray[int64_t] labels, int64_t rank,
+                       Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
@@ -392,6 +407,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
         ndarray[{{dest_type2}}, ndim=2] resx
         ndarray[int64_t, ndim=2] nobs
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
@@ -455,7 +472,8 @@ def get_dispatch(dtypes):
 def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{dest_type2}}, ndim=2] values,
-                       ndarray[int64_t] labels):
+                       ndarray[int64_t] labels,
+                       Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
@@ -464,6 +482,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
         {{dest_type2}} val, count
         ndarray[{{dest_type2}}, ndim=2] maxx, nobs
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
@@ -526,7 +546,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
                        ndarray[int64_t] counts,
                        ndarray[{{dest_type2}}, ndim=2] values,
-                       ndarray[int64_t] labels):
+                       ndarray[int64_t] labels,
+                       Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
@@ -535,6 +556,8 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
         {{dest_type2}} val, count
         ndarray[{{dest_type2}}, ndim=2] minx, nobs
 
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     if not len(values) == len(labels):
         raise AssertionError("len(index) != len(labels)")
 
@@ -686,7 +709,8 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
 def group_median_float64(ndarray[float64_t, ndim=2] out,
                          ndarray[int64_t] counts,
                          ndarray[float64_t, ndim=2] values,
-                         ndarray[int64_t] labels):
+                         ndarray[int64_t] labels,
+                         Py_ssize_t min_count=-1):
     """
     Only aggregates on axis=0
     """
@@ -695,6 +719,9 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
         ndarray[int64_t] _counts
         ndarray data
         float64_t* ptr
+
+    assert min_count == -1, "'min_count' only used in add and prod"
+
     ngroups = len(counts)
     N, K = (<object> values).shape
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7322,7 +7322,8 @@ def _add_numeric_operations(cls):
         @Substitution(outname='mad',
                       desc="Return the mean absolute deviation of the values "
                            "for the requested axis",
-                      name1=name, name2=name2, axis_descr=axis_descr)
+                      name1=name, name2=name2, axis_descr=axis_descr,
+                      min_count='', examples='')
         @Appender(_num_doc)
         def mad(self, axis=None, skipna=None, level=None):
             if skipna is None:
@@ -7363,7 +7364,8 @@ def mad(self, axis=None, skipna=None, level=None):
         @Substitution(outname='compounded',
                       desc="Return the compound percentage of the values for "
                       "the requested axis", name1=name, name2=name2,
-                      axis_descr=axis_descr)
+                      axis_descr=axis_descr,
+                      min_count='', examples='')
         @Appender(_num_doc)
         def compound(self, axis=None, skipna=None, level=None):
             if skipna is None:
@@ -7387,10 +7389,10 @@ def compound(self, axis=None, skipna=None, level=None):
             lambda y, axis: np.maximum.accumulate(y, axis), "max",
             -np.inf, np.nan)
 
-        cls.sum = _make_stat_function(
+        cls.sum = _make_min_count_stat_function(
             cls, 'sum', name, name2, axis_descr,
             'Return the sum of the values for the requested axis',
-            nanops.nansum)
+            nanops.nansum, _sum_examples)
         cls.mean = _make_stat_function(
             cls, 'mean', name, name2, axis_descr,
             'Return the mean of the values for the requested axis',
@@ -7406,10 +7408,10 @@ def compound(self, axis=None, skipna=None, level=None):
             "by N-1\n",
             nanops.nankurt)
         cls.kurtosis = cls.kurt
-        cls.prod = _make_stat_function(
+        cls.prod = _make_min_count_stat_function(
             cls, 'prod', name, name2, axis_descr,
             'Return the product of the values for the requested axis',
-            nanops.nanprod)
+            nanops.nanprod, _prod_examples)
         cls.product = cls.prod
         cls.median = _make_stat_function(
             cls, 'median', name, name2, axis_descr,
@@ -7540,10 +7542,13 @@ def _doc_parms(cls):
 numeric_only : boolean, default None
     Include only float, int, boolean columns. If None, will attempt to use
     everything, then use only numeric data. Not implemented for Series.
+%(min_count)s\
 
 Returns
 -------
-%(outname)s : %(name1)s or %(name2)s (if level specified)\n"""
+%(outname)s : %(name1)s or %(name2)s (if level specified)
+
+%(examples)s"""
 
 _num_ddof_doc = """
 
@@ -7611,9 +7616,92 @@ def _doc_parms(cls):
 """
 
 
+_sum_examples = """\
+Examples
+--------
+By default, the sum of an empty series is ``NaN``.
+
+>>> pd.Series([]).sum()  # min_count=1 is the default
+nan
+
+This can be controlled with the ``min_count`` parameter. For example, if
+you'd like the sum of an empty series to be 0, pass ``min_count=0``.
+
+>>> pd.Series([]).sum(min_count=0)
+0.0
+
+Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
+empty series identically.
+
+>>> pd.Series([np.nan]).sum()
+nan
+
+>>> pd.Series([np.nan]).sum(min_count=0)
+0.0
+"""
+
+_prod_examples = """\
+Examples
+--------
+By default, the product of an empty series is ``NaN``
+
+>>> pd.Series([]).prod()
+nan
+
+This can be controlled with the ``min_count`` parameter
+
+>>> pd.Series([]).prod(min_count=0)
+1.0
+
+Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
+empty series identically.
+
+>>> pd.Series([np.nan]).prod()
+nan
+
+>>> pd.Series([np.nan]).sum(min_count=0)
+1.0
+"""
+
+
+_min_count_stub = """\
+min_count : int, default 1
+    The required number of valid values to perform the operation. If fewer than
+    ``min_count`` non-NA values are present the result will be NA.
+
+    .. versionadded :: 0.21.2
+
+       Added with the default being 1. This means the sum or product
+       of an all-NA or empty series is ``NaN``.
+"""
+
+
+def _make_min_count_stat_function(cls, name, name1, name2, axis_descr, desc,
+                                  f, examples):
+    @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
+                  axis_descr=axis_descr, min_count=_min_count_stub,
+                  examples=examples)
+    @Appender(_num_doc)
+    def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
+                  min_count=1,
+                  **kwargs):
+        nv.validate_stat_func(tuple(), kwargs, fname=name)
+        if skipna is None:
+            skipna = True
+        if axis is None:
+            axis = self._stat_axis_number
+        if level is not None:
+            return self._agg_by_level(name, axis=axis, level=level,
+                                      skipna=skipna, min_count=min_count)
+        return self._reduce(f, name, axis=axis, skipna=skipna,
+                            numeric_only=numeric_only, min_count=min_count)
+
+    return set_function_name(stat_func, name, cls)
+
+
 def _make_stat_function(cls, name, name1, name2, axis_descr, desc, f):
     @Substitution(outname=name, desc=desc, name1=name1, name2=name2,
-                  axis_descr=axis_descr)
+                  axis_descr=axis_descr, min_count='', examples='')
     @Appender(_num_doc)
     def stat_func(self, axis=None, skipna=None, level=None, numeric_only=None,
                   **kwargs):

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -986,15 +986,17 @@ def _cython_transform(self, how, numeric_only=True):
 
         return self._wrap_transformed_output(output, names)
 
-    def _cython_agg_general(self, how, alt=None, numeric_only=True):
+    def _cython_agg_general(self, how, alt=None, numeric_only=True,
+                            min_count=-1):
         output = {}
         for name, obj in self._iterate_slices():
             is_numeric = is_numeric_dtype(obj.dtype)
             if numeric_only and not is_numeric:
                 continue
 
             try:
-                result, names = self.grouper.aggregate(obj.values, how)
+                result, names = self.grouper.aggregate(obj.values, how,
+                                                       min_count=min_count)
             except AssertionError as e:
                 raise GroupByError(str(e))
             output[name] = self._try_cast(result, obj)
@@ -1301,7 +1303,8 @@ def _add_numeric_operations(cls):
         """ add numeric operations to the GroupBy generically """
 
         def groupby_function(name, alias, npfunc,
-                             numeric_only=True, _convert=False):
+                             numeric_only=True, _convert=False,
+                             min_count=-1):
 
             _local_template = "Compute %(f)s of group values"
 
@@ -1311,6 +1314,8 @@ def groupby_function(name, alias, npfunc,
             def f(self, **kwargs):
                 if 'numeric_only' not in kwargs:
                     kwargs['numeric_only'] = numeric_only
+                if 'min_count' not in kwargs:
+                    kwargs['min_count'] = min_count
                 self._set_group_selection()
                 try:
                     return self._cython_agg_general(
@@ -1358,8 +1363,8 @@ def last(x):
             else:
                 return last(x)
 
-        cls.sum = groupby_function('sum', 'add', np.sum)
-        cls.prod = groupby_function('prod', 'prod', np.prod)
+        cls.sum = groupby_function('sum', 'add', np.sum, min_count=1)
+        cls.prod = groupby_function('prod', 'prod', np.prod, min_count=1)
         cls.min = groupby_function('min', 'min', np.min, numeric_only=False)
         cls.max = groupby_function('max', 'max', np.max, numeric_only=False)
         cls.first = groupby_function('first', 'first', first_compat,
@@ -2139,7 +2144,7 @@ def get_group_levels(self):
             'var': 'group_var',
             'first': {
                 'name': 'group_nth',
-                'f': lambda func, a, b, c, d: func(a, b, c, d, 1)
+                'f': lambda func, a, b, c, d, e: func(a, b, c, d, 1, -1)
             },
             'last': 'group_last',
             'ohlc': 'group_ohlc',
@@ -2209,7 +2214,7 @@ def wrapper(*args, **kwargs):
                                       (how, dtype_str))
         return func, dtype_str
 
-    def _cython_operation(self, kind, values, how, axis):
+    def _cython_operation(self, kind, values, how, axis, min_count=-1):
         assert kind in ['transform', 'aggregate']
 
         # can we do this operation with our cython functions
@@ -2294,11 +2299,12 @@ def _cython_operation(self, kind, values, how, axis):
             counts = np.zeros(self.ngroups, dtype=np.int64)
             result = self._aggregate(
                 result, counts, values, labels, func, is_numeric,
-                is_datetimelike)
+                is_datetimelike, min_count)
         elif kind == 'transform':
             result = _maybe_fill(np.empty_like(values, dtype=out_dtype),
                                  fill_value=np.nan)
 
+            # TODO: min_count
             result = self._transform(
                 result, values, labels, func, is_numeric, is_datetimelike)
 
@@ -2335,14 +2341,15 @@ def _cython_operation(self, kind, values, how, axis):
 
         return result, names
 
-    def aggregate(self, values, how, axis=0):
-        return self._cython_operation('aggregate', values, how, axis)
+    def aggregate(self, values, how, axis=0, min_count=-1):
+        return self._cython_operation('aggregate', values, how, axis,
+                                      min_count=min_count)
 
     def transform(self, values, how, axis=0):
         return self._cython_operation('transform', values, how, axis)
 
     def _aggregate(self, result, counts, values, comp_ids, agg_func,
-                   is_numeric, is_datetimelike):
+                   is_numeric, is_datetimelike, min_count=-1):
         if values.ndim > 3:
             # punting for now
             raise NotImplementedError("number of dimensions is currently "
@@ -2351,9 +2358,10 @@ def _aggregate(self, result, counts, values, comp_ids, agg_func,
             for i, chunk in enumerate(values.transpose(2, 0, 1)):
 
                 chunk = chunk.squeeze()
-                agg_func(result[:, :, i], counts, chunk, comp_ids)
+                agg_func(result[:, :, i], counts, chunk, comp_ids,
+                         min_count)
         else:
-            agg_func(result, counts, values, comp_ids)
+            agg_func(result, counts, values, comp_ids, min_count)
 
         return result
 
@@ -3643,9 +3651,10 @@ def _iterate_slices(self):
                 continue
             yield val, slicer(val)
 
-    def _cython_agg_general(self, how, alt=None, numeric_only=True):
+    def _cython_agg_general(self, how, alt=None, numeric_only=True,
+                            min_count=-1):
         new_items, new_blocks = self._cython_agg_blocks(
-            how, alt=alt, numeric_only=numeric_only)
+            how, alt=alt, numeric_only=numeric_only, min_count=min_count)
         return self._wrap_agged_blocks(new_items, new_blocks)
 
     def _wrap_agged_blocks(self, items, blocks):
@@ -3671,7 +3680,8 @@ def _wrap_agged_blocks(self, items, blocks):
 
     _block_agg_axis = 0
 
-    def _cython_agg_blocks(self, how, alt=None, numeric_only=True):
+    def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
+                           min_count=-1):
         # TODO: the actual managing of mgr_locs is a PITA
         # here, it should happen via BlockManager.combine
 
@@ -3688,7 +3698,7 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True):
             locs = block.mgr_locs.as_array
             try:
                 result, _ = self.grouper.aggregate(
-                    block.values, how, axis=agg_axis)
+                    block.values, how, axis=agg_axis, min_count=min_count)
             except NotImplementedError:
                 # generally if we have numeric_only=False
                 # and non-applicable functions

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -107,21 +107,9 @@ def f(values, axis=None, skipna=True, **kwds):
                     if k not in kwds:
                         kwds[k] = v
             try:
-                if values.size == 0:
-
-                    # we either return np.nan or pd.NaT
-                    if is_numeric_dtype(values):
-                        values = values.astype('float64')
-                    fill_value = na_value_for_dtype(values.dtype)
-
-                    if values.ndim == 1:
-                        return fill_value
-                    else:
-                        result_shape = (values.shape[:axis] +
-                                        values.shape[axis + 1:])
-                        result = np.empty(result_shape, dtype=values.dtype)
-                        result.fill(fill_value)
-                        return result
+                if values.size == 0 and kwds.get('min_count') is None:
+                    # We are empty, returning NA for our type
+                    return _na_for_min_count(values, axis)
 
                 if (_USE_BOTTLENECK and skipna and
                         _bn_ok_dtype(values.dtype, bn_name)):
@@ -292,6 +280,22 @@ def _wrap_results(result, dtype):
     return result
 
 
+def _na_for_min_count(values, axis):
+    # we either return np.nan or pd.NaT
+    if is_numeric_dtype(values):
+        values = values.astype('float64')
+    fill_value = na_value_for_dtype(values.dtype)
+
+    if values.ndim == 1:
+        return fill_value
+    else:
+        result_shape = (values.shape[:axis] +
+                        values.shape[axis + 1:])
+        result = np.empty(result_shape, dtype=values.dtype)
+        result.fill(fill_value)
+        return result
+
+
 def nanany(values, axis=None, skipna=True):
     values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna)
     return values.any(axis)
@@ -304,15 +308,15 @@ def nanall(values, axis=None, skipna=True):
 
 @disallow('M8')
 @bottleneck_switch()
-def nansum(values, axis=None, skipna=True):
+def nansum(values, axis=None, skipna=True, min_count=1):
     values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
     dtype_sum = dtype_max
     if is_float_dtype(dtype):
         dtype_sum = dtype
     elif is_timedelta64_dtype(dtype):
         dtype_sum = np.float64
     the_sum = values.sum(axis, dtype=dtype_sum)
-    the_sum = _maybe_null_out(the_sum, axis, mask)
+    the_sum = _maybe_null_out(the_sum, axis, mask, min_count=min_count)
 
     return _wrap_results(the_sum, dtype)
 
@@ -641,13 +645,13 @@ def nankurt(values, axis=None, skipna=True):
 
 
 @disallow('M8', 'm8')
-def nanprod(values, axis=None, skipna=True):
+def nanprod(values, axis=None, skipna=True, min_count=1):
     mask = isna(values)
     if skipna and not is_any_int_dtype(values):
         values = values.copy()
         values[mask] = 1
     result = values.prod(axis)
-    return _maybe_null_out(result, axis, mask)
+    return _maybe_null_out(result, axis, mask, min_count=min_count)
 
 
 def _maybe_arg_null_out(result, axis, mask, skipna):
@@ -683,9 +687,9 @@ def _get_counts(mask, axis, dtype=float):
         return np.array(count, dtype=dtype)
 
 
-def _maybe_null_out(result, axis, mask):
+def _maybe_null_out(result, axis, mask, min_count=1):
     if axis is not None and getattr(result, 'ndim', False):
-        null_mask = (mask.shape[axis] - mask.sum(axis)) == 0
+        null_mask = (mask.shape[axis] - mask.sum(axis) - min_count) < 0
         if np.any(null_mask):
             if is_numeric_dtype(result):
                 if np.iscomplexobj(result):
@@ -698,7 +702,7 @@ def _maybe_null_out(result, axis, mask):
                 result[null_mask] = None
     elif result is not tslib.NaT:
         null_mask = mask.size - mask.sum()
-        if null_mask == 0:
+        if null_mask < min_count:
             result = np.nan
 
     return result

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -625,9 +625,20 @@ def size(self):
 
 Resampler._deprecated_valids += dir(Resampler)
 
+
+# downsample methods
+for method in ['sum', 'prod']:
+
+    def f(self, _method=method, min_count=1, *args, **kwargs):
+        nv.validate_resampler_func(_method, args, kwargs)
+        return self._downsample(_method, min_count=min_count)
+    f.__doc__ = getattr(GroupBy, method).__doc__
+    setattr(Resampler, method, f)
+
+
 # downsample methods
-for method in ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
-               'median', 'prod', 'ohlc']:
+for method in ['min', 'max', 'first', 'last', 'mean', 'sem',
+               'median', 'ohlc']:
 
     def f(self, _method=method, *args, **kwargs):
         nv.validate_resampler_func(_method, args, kwargs)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -973,6 +973,37 @@ def test_sum_corner(self):
         assert len(axis0) == 0
         assert len(axis1) == 0
 
+    @pytest.mark.parametrize('method, unit', [
+        ('sum', 0),
+        ('prod', 1),
+    ])
+    def test_sum_prod_nanops(self, method, unit):
+        idx = ['a', 'b', 'c']
+        df = pd.DataFrame({"a": [unit, unit],
+                           "b": [unit, np.nan],
+                           "c": [np.nan, np.nan]})
+
+        result = getattr(df, method)(min_count=1)
+        expected = pd.Series([unit, unit, np.nan], index=idx)
+        tm.assert_series_equal(result, expected)
+
+        result = getattr(df, method)(min_count=0)
+        expected = pd.Series([unit, unit, unit], index=idx, dtype='float64')
+        tm.assert_series_equal(result, expected)
+
+        result = getattr(df.iloc[1:], method)(min_count=1)
+        expected = pd.Series([unit, np.nan, np.nan], index=idx)
+        tm.assert_series_equal(result, expected)
+
+        df = pd.DataFrame({"A": [unit] * 10, "B": [unit] * 5 + [np.nan] * 5})
+        result = getattr(df, method)(min_count=5)
+        expected = pd.Series(result, index=['A', 'B'])
+        tm.assert_series_equal(result, expected)
+
+        result = getattr(df, method)(min_count=6)
+        expected = pd.Series(result, index=['A', 'B'])
+        tm.assert_series_equal(result, expected)
+
     def test_sum_object(self):
         values = self.frame.values.astype(int)
         frame = DataFrame(values, index=self.frame.index,

diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py
@@ -809,26 +809,33 @@ def test__cython_agg_general(self):
                 exc.args += ('operation: %s' % op, )
                 raise
 
-    def test_cython_agg_empty_buckets(self):
-        ops = [('mean', np.mean),
-               ('median', lambda x: np.median(x) if len(x) > 0 else np.nan),
-               ('var', lambda x: np.var(x, ddof=1)),
-               ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan),
-               ('prod', np.prod),
-               ('min', np.min),
-               ('max', np.max), ]
-
+    @pytest.mark.parametrize('op, targop', [
+        ('mean', np.mean),
+        ('median', lambda x: np.median(x) if len(x) > 0 else np.nan),
+        ('var', lambda x: np.var(x, ddof=1)),
+        ('add', lambda x: np.sum(x) if len(x) > 0 else np.nan),
+        ('prod', np.prod),
+        ('min', np.min),
+        ('max', np.max), ]
+    )
+    def test_cython_agg_empty_buckets(self, op, targop):
         df = pd.DataFrame([11, 12, 13])
         grps = range(0, 55, 5)
 
-        for op, targop in ops:
-            result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(op)
-            expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x))
-            try:
-                tm.assert_frame_equal(result, expected)
-            except BaseException as exc:
-                exc.args += ('operation: %s' % op,)
-                raise
+        # calling _cython_agg_general directly, instead of via the user API
+        # which sets different values for min_count, so do that here.
+        if op in ('add', 'prod'):
+            min_count = 1
+        else:
+            min_count = -1
+        result = df.groupby(pd.cut(df[0], grps))._cython_agg_general(
+            op, min_count=min_count)
+        expected = df.groupby(pd.cut(df[0], grps)).agg(lambda x: targop(x))
+        try:
+            tm.assert_frame_equal(result, expected)
+        except BaseException as exc:
+            exc.args += ('operation: %s' % op,)
+            raise
 
     def test_agg_over_numpy_arrays(self):
         # GH 3788

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -662,3 +662,48 @@ def test_groupby_categorical_two_columns(self):
                          "C3": [nan, nan, nan, nan, 10, 100,
                                 nan, nan, nan, nan, 200, 34]}, index=idx)
         tm.assert_frame_equal(res, exp)
+
+    def test_empty_sum(self):
+        # https://github.com/pandas-dev/pandas/issues/18678
+        df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
+                                               categories=['a', 'b', 'c']),
+                           'B': [1, 2, 1]})
+        expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+
+        # NA by default
+        result = df.groupby("A").B.sum()
+        expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
+        tm.assert_series_equal(result, expected)
+
+        # min_count=0
+        result = df.groupby("A").B.sum(min_count=0)
+        expected = pd.Series([3, 1, 0], expected_idx, name='B')
+        tm.assert_series_equal(result, expected)
+
+        # min_count=1
+        result = df.groupby("A").B.sum(min_count=1)
+        expected = pd.Series([3, 1, np.nan], expected_idx, name='B')
+        tm.assert_series_equal(result, expected)
+
+    def test_empty_prod(self):
+        # https://github.com/pandas-dev/pandas/issues/18678
+        df = pd.DataFrame({"A": pd.Categorical(['a', 'a', 'b'],
+                                               categories=['a', 'b', 'c']),
+                           'B': [1, 2, 1]})
+
+        expected_idx = pd.CategoricalIndex(['a', 'b', 'c'], name='A')
+
+        # NA by default
+        result = df.groupby("A").B.prod()
+        expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
+        tm.assert_series_equal(result, expected)
+
+        # min_count=0
+        result = df.groupby("A").B.prod(min_count=0)
+        expected = pd.Series([2, 1, 1], expected_idx, name='B')
+        tm.assert_series_equal(result, expected)
+
+        # min_count=1
+        result = df.groupby("A").B.prod(min_count=1)
+        expected = pd.Series([2, 1, np.nan], expected_idx, name='B')
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -30,38 +30,122 @@
 class TestSeriesAnalytics(TestData):
 
     @pytest.mark.parametrize("use_bottleneck", [True, False])
-    @pytest.mark.parametrize("method", ["sum", "prod"])
-    def test_empty(self, method, use_bottleneck):
-
+    @pytest.mark.parametrize("method, unit", [
+        ("sum", 0.0),
+        ("prod", 1.0)
+    ])
+    def test_empty(self, method, unit, use_bottleneck):
         with pd.option_context("use_bottleneck", use_bottleneck):
             # GH 9422
-            # treat all missing as NaN
+            # Entirely empty
             s = Series([])
+            # NA by default
             result = getattr(s, method)()
             assert isna(result)
 
+            # Explict
+            result = getattr(s, method)(min_count=0)
+            assert result == unit
+
+            result = getattr(s, method)(min_count=1)
+            assert isna(result)
+
+            # Skipna, default
             result = getattr(s, method)(skipna=True)
             assert isna(result)
 
+            # Skipna, explicit
+            result = getattr(s, method)(skipna=True, min_count=0)
+            assert result == unit
+
+            result = getattr(s, method)(skipna=True, min_count=1)
+            assert isna(result)
+
+            # All-NA
             s = Series([np.nan])
+            # NA by default
             result = getattr(s, method)()
             assert isna(result)
 
+            # Explicit
+            result = getattr(s, method)(min_count=0)
+            assert result == unit
+
+            result = getattr(s, method)(min_count=1)
+            assert isna(result)
+
+            # Skipna, default
             result = getattr(s, method)(skipna=True)
             assert isna(result)
 
+            # skipna, explicit
+            result = getattr(s, method)(skipna=True, min_count=0)
+            assert result == unit
+
+            result = getattr(s, method)(skipna=True, min_count=1)
+            assert isna(result)
+
+            # Mix of valid, empty
             s = Series([np.nan, 1])
+            # Default
             result = getattr(s, method)()
             assert result == 1.0
 
-            s = Series([np.nan, 1])
+            # Explicit
+            result = getattr(s, method)(min_count=0)
+            assert result == 1.0
+
+            result = getattr(s, method)(min_count=1)
+            assert result == 1.0
+
+            # Skipna
             result = getattr(s, method)(skipna=True)
             assert result == 1.0
 
+            result = getattr(s, method)(skipna=True, min_count=0)
+            assert result == 1.0
+
+            result = getattr(s, method)(skipna=True, min_count=1)
+            assert result == 1.0
+
             # GH #844 (changed in 9422)
             df = DataFrame(np.empty((10, 0)))
             assert (df.sum(1).isnull()).all()
 
+            s = pd.Series([1])
+            result = getattr(s, method)(min_count=2)
+            assert isna(result)
+
+            s = pd.Series([np.nan])
+            result = getattr(s, method)(min_count=2)
+            assert isna(result)
+
+            s = pd.Series([np.nan, 1])
+            result = getattr(s, method)(min_count=2)
+            assert isna(result)
+
+    @pytest.mark.parametrize('method, unit', [
+        ('sum', 0.0),
+        ('prod', 1.0),
+    ])
+    def test_empty_multi(self, method, unit):
+        s = pd.Series([1, np.nan, np.nan, np.nan],
+                      index=pd.MultiIndex.from_product([('a', 'b'), (0, 1)]))
+        # NaN by default
+        result = getattr(s, method)(level=0)
+        expected = pd.Series([1, np.nan], index=['a', 'b'])
+        tm.assert_series_equal(result, expected)
+
+        # min_count=0
+        result = getattr(s, method)(level=0, min_count=0)
+        expected = pd.Series([1, unit], index=['a', 'b'])
+        tm.assert_series_equal(result, expected)
+
+        # min_count=1
+        result = getattr(s, method)(level=0, min_count=1)
+        expected = pd.Series([1, np.nan], index=['a', 'b'])
+        tm.assert_series_equal(result, expected)
+
     @pytest.mark.parametrize(
         "method", ['sum', 'mean', 'median', 'std', 'var'])
     def test_ops_consistency_on_empty(self, method):

diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
@@ -4,6 +4,7 @@
 from datetime import datetime, timedelta
 from functools import partial
 from textwrap import dedent
+from operator import methodcaller
 
 import pytz
 import pytest
@@ -3382,6 +3383,34 @@ def test_aggregate_normal(self):
             assert_frame_equal(expected, dt_result)
         """
 
+    @pytest.mark.parametrize('method, unit', [
+        ('sum', 0),
+        ('prod', 1),
+    ])
+    def test_resample_entirly_nat_window(self, method, unit):
+        s = pd.Series([0] * 2 + [np.nan] * 2,
+                      index=pd.date_range('2017', periods=4))
+        # nan by default
+        result = methodcaller(method)(s.resample("2d"))
+        expected = pd.Series([0.0, np.nan],
+                             index=pd.to_datetime(['2017-01-01',
+                                                   '2017-01-03']))
+        tm.assert_series_equal(result, expected)
+
+        # min_count=0
+        result = methodcaller(method, min_count=0)(s.resample("2d"))
+        expected = pd.Series([0.0, unit],
+                             index=pd.to_datetime(['2017-01-01',
+                                                   '2017-01-03']))
+        tm.assert_series_equal(result, expected)
+
+        # min_count=1
+        result = methodcaller(method, min_count=1)(s.resample("2d"))
+        expected = pd.Series([0.0, np.nan],
+                             index=pd.to_datetime(['2017-01-01',
+                                                   '2017-01-03']))
+        tm.assert_series_equal(result, expected)
+
     def test_aggregate_with_nat(self):
         # check TimeGrouper's aggregation is identical as normal groupby
 
@@ -3441,3 +3470,29 @@ def test_repr(self):
                     "closed='left', label='left', how='mean', "
                     "convention='e', base=0)")
         assert result == expected
+
+    @pytest.mark.parametrize('method, unit', [
+        ('sum', 0),
+        ('prod', 1),
+    ])
+    def test_upsample_sum(self, method, unit):
+        s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
+        resampled = s.resample("30T")
+        index = pd.to_datetime(['2017-01-01T00:00:00',
+                                '2017-01-01T00:30:00',
+                                '2017-01-01T01:00:00'])
+
+        # NaN by default
+        result = methodcaller(method)(resampled)
+        expected = pd.Series([1, np.nan, 1], index=index)
+        tm.assert_series_equal(result, expected)
+
+        # min_count=0
+        result = methodcaller(method, min_count=0)(resampled)
+        expected = pd.Series([1, unit, 1], index=index)
+        tm.assert_series_equal(result, expected)
+
+        # min_count=1
+        result = methodcaller(method, min_count=1)(resampled)
+        expected = pd.Series([1, np.nan, 1], index=index)
+        tm.assert_series_equal(result, expected)