diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index d0785f7db5..b731fcff0b 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -16,6 +16,7 @@
 ### Deprecations
 
 - DIC and BPIC calculations have been removed
+- `njobs` and `nchains` deprecated in favor of `cores` and `chains` for `sample`
 
 ## PyMC 3.3 (January 9, 2018)
 
diff --git a/benchmarks/benchmarks/benchmarks.py b/benchmarks/benchmarks/benchmarks.py
index a9401be3fe..ee056a4d98 100644
--- a/benchmarks/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks/benchmarks.py
@@ -115,11 +115,11 @@ def time_drug_evaluation(self):
             pm.Deterministic('difference of stds', group1_std - group2_std)
             pm.Deterministic(
                 'effect size', diff_of_means / np.sqrt((group1_std**2 + group2_std**2) / 2))
-            pm.sample(20000, njobs=4, chains=4)
+            pm.sample(20000, cores=4, chains=4)
 
     def time_glm_hierarchical(self):
         with glm_hierarchical_model():
-            pm.sample(draws=20000, njobs=4, chains=4)
+            pm.sample(draws=20000, cores=4, chains=4)
 
 
 class NUTSInitSuite(object):
@@ -141,7 +141,7 @@ def track_glm_hierarchical_ess(self, init):
         with glm_hierarchical_model():
             start, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123)
             t0 = time.time()
-            trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=self.chains,
+            trace = pm.sample(draws=self.draws, step=step, cores=4, chains=self.chains,
                               start=start, random_seed=100)
             tot = time.time() - t0
         ess = pm.effective_n(trace, ('mu_a',))['mu_a']
@@ -154,7 +154,7 @@ def track_marginal_mixture_model_ess(self, init):
                                    progressbar=False, random_seed=123)
             start = [{k: v for k, v in start.items()} for _ in range(self.chains)]
             t0 = time.time()
-            trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=self.chains,
+            trace = pm.sample(draws=self.draws, step=step, cores=4, chains=self.chains,
                               start=start, random_seed=100)
             tot = time.time() - t0
         ess = pm.effective_n(trace, ('mu',))['mu'].min()  # worst case
@@ -178,7 +178,7 @@ def track_glm_hierarchical_ess(self, step):
             if step is not None:
                 step = step()
             t0 = time.time()
-            trace = pm.sample(draws=self.draws, step=step, njobs=4, chains=4,
+            trace = pm.sample(draws=self.draws, step=step, cores=4, chains=4,
                               random_seed=100)
             tot = time.time() - t0
         ess = pm.effective_n(trace, ('mu_a',))['mu_a']
diff --git a/docs/source/notebooks/AR.ipynb b/docs/source/notebooks/AR.ipynb
index 83999431a4..6643878e5d 100644
--- a/docs/source/notebooks/AR.ipynb
+++ b/docs/source/notebooks/AR.ipynb
@@ -152,7 +152,7 @@
     "with pm.Model() as ar1:\n",
     "    beta = pm.Normal('beta', mu=0, sd=tau)\n",
     "    data = pm.AR('y', beta, sd=1.0, observed=y)\n",
-    "    trace = pm.sample(1000, njobs=4)\n",
+    "    trace = pm.sample(1000, cores=4)\n",
     "    \n",
     "pm.traceplot(trace);"
    ]
@@ -278,7 +278,7 @@
     "with pm.Model() as ar2:\n",
     "    beta = pm.Normal('beta', mu=0, sd=tau, shape=2)\n",
     "    data = pm.AR('y', beta, sd=1.0, observed=y)\n",
-    "    trace = pm.sample(1000, njobs=4)\n",
+    "    trace = pm.sample(1000, cores=4)\n",
     "    \n",
     "pm.traceplot(trace);"
    ]
@@ -340,7 +340,7 @@
     "    beta = pm.Normal('beta', mu=0, sd=tau)\n",
     "    beta2 = pm.Uniform('beta2')\n",
     "    data = pm.AR('y', [beta, beta2], sd=1.0, observed=y)\n",
-    "    trace = pm.sample(1000, njobs=4)\n",
+    "    trace = pm.sample(1000, cores=4)\n",
     "    \n",
     "pm.traceplot(trace);"
    ]
diff --git a/docs/source/notebooks/BEST.ipynb b/docs/source/notebooks/BEST.ipynb
index e356ca551f..35de247974 100644
--- a/docs/source/notebooks/BEST.ipynb
+++ b/docs/source/notebooks/BEST.ipynb
@@ -260,7 +260,7 @@
    ],
    "source": [
     "with model:\n",
-    "    trace = pm.sample(2000, njobs=2)"
+    "    trace = pm.sample(2000, cores=2)"
    ]
   },
   {
diff --git a/docs/source/notebooks/GLM-linear.ipynb b/docs/source/notebooks/GLM-linear.ipynb
index 40a077d998..25e9f0f93d 100644
--- a/docs/source/notebooks/GLM-linear.ipynb
+++ b/docs/source/notebooks/GLM-linear.ipynb
@@ -198,7 +198,7 @@
     "                        sd=sigma, observed=y)\n",
     "    \n",
     "    # Inference!\n",
-    "    trace = sample(3000, njobs=2) # draw 3000 posterior samples using NUTS sampling"
+    "    trace = sample(3000, cores=2) # draw 3000 posterior samples using NUTS sampling"
    ]
   },
   {
@@ -234,7 +234,7 @@
     "    # specify glm and pass in data. The resulting linear model, its likelihood and \n",
     "    # and all its parameters are automatically added to our model.\n",
     "    glm.GLM.from_formula('y ~ x', data)\n",
-    "    trace = sample(3000, njobs=2) # draw 3000 posterior samples using NUTS sampling"
+    "    trace = sample(3000, cores=2) # draw 3000 posterior samples using NUTS sampling"
    ]
   },
   {
diff --git a/docs/source/notebooks/GLM-negative-binomial-regression.ipynb b/docs/source/notebooks/GLM-negative-binomial-regression.ipynb
index 6796a99df1..a9b878faee 100644
--- a/docs/source/notebooks/GLM-negative-binomial-regression.ipynb
+++ b/docs/source/notebooks/GLM-negative-binomial-regression.ipynb
@@ -452,7 +452,7 @@
     "    # C = pm.approx_hessian(start)\n",
     "    # trace = pm.sample(4000, step=pm.NUTS(scaling=C))\n",
     "    \n",
-    "    trace = pm.sample(2000, njobs=2)"
+    "    trace = pm.sample(2000, cores=2)"
    ]
   },
   {
diff --git a/docs/source/notebooks/GLM-poisson-regression.ipynb b/docs/source/notebooks/GLM-poisson-regression.ipynb
index 9c7e6c32bb..30f10d235b 100644
--- a/docs/source/notebooks/GLM-poisson-regression.ipynb
+++ b/docs/source/notebooks/GLM-poisson-regression.ipynb
@@ -640,7 +640,7 @@
    ],
    "source": [
     "with mdl_fish:\n",
-    "    trc_fish = pm.sample(2000, tune=1000, njobs=4)[1000:]"
+    "    trc_fish = pm.sample(2000, tune=1000, cores=4)[1000:]"
    ]
   },
   {
diff --git a/docs/source/notebooks/GLM-robust.ipynb b/docs/source/notebooks/GLM-robust.ipynb
index d6b282da80..18fbec26fa 100644
--- a/docs/source/notebooks/GLM-robust.ipynb
+++ b/docs/source/notebooks/GLM-robust.ipynb
@@ -142,7 +142,7 @@
    "source": [
     "with pm.Model() as model:\n",
     "    pm.glm.GLM.from_formula('y ~ x', data)\n",
-    "    trace = pm.sample(2000, njobs=2)"
+    "    trace = pm.sample(2000, cores=2)"
    ]
   },
   {
@@ -262,7 +262,7 @@
     "with pm.Model() as model_robust:\n",
     "    family = pm.glm.families.StudentT()\n",
     "    pm.glm.GLM.from_formula('y ~ x', data, family=family)\n",
-    "    trace_robust = pm.sample(2000, njobs=2)\n",
+    "    trace_robust = pm.sample(2000, cores=2)\n",
     "\n",
     "plt.figure(figsize=(7, 5))\n",
     "plt.plot(x_out, y_out, 'x')\n",
diff --git a/docs/source/notebooks/GLM-rolling-regression.ipynb b/docs/source/notebooks/GLM-rolling-regression.ipynb
index f8401d5de9..3edf4de19e 100644
--- a/docs/source/notebooks/GLM-rolling-regression.ipynb
+++ b/docs/source/notebooks/GLM-rolling-regression.ipynb
@@ -327,7 +327,7 @@
    ],
    "source": [
     "with model_randomwalk:\n",
-    "    trace_rw = pm.sample(tune=2000, njobs=4, samples=200, \n",
+    "    trace_rw = pm.sample(tune=2000, cores=4, samples=200, \n",
     "                         nuts_kwargs=dict(target_accept=.9))"
    ]
   },
diff --git a/docs/source/notebooks/GLM.ipynb b/docs/source/notebooks/GLM.ipynb
index 543b8aa214..f42311c62c 100644
--- a/docs/source/notebooks/GLM.ipynb
+++ b/docs/source/notebooks/GLM.ipynb
@@ -88,7 +88,7 @@
     "    lm = glm.LinearComponent.from_formula('y ~ x', data)\n",
     "    sigma = Uniform('sigma', 0, 20)\n",
     "    y_obs = Normal('y_obs', mu=lm.y_est, sd=sigma, observed=y)\n",
-    "    trace = sample(2000, njobs=2)\n",
+    "    trace = sample(2000, cores=2)\n",
     "\n",
     "plt.figure(figsize=(5, 5))\n",
     "plt.plot(x, y, 'x')\n",
@@ -135,7 +135,7 @@
    "source": [
     "with Model() as model:\n",
     "    GLM.from_formula('y ~ x', data)\n",
-    "    trace = sample(2000, njobs=2)\n",
+    "    trace = sample(2000, cores=2)\n",
     "\n",
     "plt.figure(figsize=(5, 5))\n",
     "plt.plot(x, y, 'x')\n",
@@ -194,7 +194,7 @@
    "source": [
     "with Model() as model:\n",
     "    GLM.from_formula('y ~ x', data_outlier)\n",
-    "    trace = sample(2000, njobs=2)\n",
+    "    trace = sample(2000, cores=2)\n",
     "\n",
     "plt.figure(figsize=(5, 5))\n",
     "plt.plot(x_out, y_out, 'x')\n",
@@ -244,7 +244,7 @@
     "                                   priors={'nu': 1.5,\n",
     "                                           'lam': Uniform.dist(0, 20)})\n",
     "    GLM.from_formula('y ~ x', data_outlier, family=family)    \n",
-    "    trace = sample(2000, njobs=2)\n",
+    "    trace = sample(2000, cores=2)\n",
     "\n",
     "plt.figure(figsize=(5, 5))\n",
     "plt.plot(x_out, y_out, 'x')\n",
@@ -301,7 +301,7 @@
     "          'prcnt_take': Normal.dist(mu=grp_mean, sd=grp_sd)\n",
     "    }\n",
     "    GLM.from_formula('sat_t ~ spend + stu_tea_rat + salary + prcnt_take', sat_data, priors=priors)\n",
-    "    trace_sat = sample(2000, njobs=2)"
+    "    trace_sat = sample(2000, cores=2)"
    ]
   },
   {
@@ -358,7 +358,7 @@
     "    intercept = Normal.dist(mu=sat_data.sat_t.mean(), sd=sat_data.sat_t.std())\n",
     "    GLM.from_formula('sat_t ~ spend + stu_tea_rat + salary + prcnt_take', sat_data,\n",
     "        priors={'Intercept': intercept, 'Regressor': slope})\n",
-    "    trace_sat = sample(2000, njobs=2)"
+    "    trace_sat = sample(2000, cores=2)"
    ]
   },
   {
@@ -417,7 +417,7 @@
     "    GLM.from_formula('sat_t ~ spend + stu_tea_rat + salary + prcnt_take', sat_data,\n",
     "                priors={'Intercept': intercept, 'Regressor': slope})\n",
     "\n",
-    "    trace_sat = sample(2000, njobs=2)"
+    "    trace_sat = sample(2000, cores=2)"
    ]
   },
   {
@@ -599,7 +599,7 @@
    "source": [
     "with Model() as model_htwt:\n",
     "    GLM.from_formula('male ~ height + weight', htwt_data, family=glm.families.Binomial())\n",
-    "    trace_htwt = sample(2000, njobs=2)"
+    "    trace_htwt = sample(2000, cores=2)"
    ]
   },
   {
@@ -747,7 +747,7 @@
     "    GLM.from_formula('male ~ height + weight', htwt_data, family=glm.families.Binomial(),\n",
     "                    priors=priors)\n",
     "    \n",
-    "    trace_lasso = sample(500, njobs=2)\n",
+    "    trace_lasso = sample(500, cores=2)\n",
     "    \n",
     "trace_df = trace_to_dataframe(trace_lasso)\n",
     "scatter_matrix(trace_df, figsize=(8, 8));\n",
diff --git a/docs/source/notebooks/LKJ.ipynb b/docs/source/notebooks/LKJ.ipynb
index 2e5f9f6a70..60f0626ffc 100644
--- a/docs/source/notebooks/LKJ.ipynb
+++ b/docs/source/notebooks/LKJ.ipynb
@@ -288,7 +288,7 @@
    ],
    "source": [
     "with model:\n",
-    "    trace = pm.sample(random_seed=SEED, njobs=4)"
+    "    trace = pm.sample(random_seed=SEED, cores=4)"
    ]
   },
   {
diff --git a/docs/source/notebooks/MvGaussianRandomWalk_demo.ipynb b/docs/source/notebooks/MvGaussianRandomWalk_demo.ipynb
index 5861158847..e7072fb000 100644
--- a/docs/source/notebooks/MvGaussianRandomWalk_demo.ipynb
+++ b/docs/source/notebooks/MvGaussianRandomWalk_demo.ipynb
@@ -139,7 +139,7 @@
     "\n",
     "        sd = pm.Uniform('sd', 0, 1)\n",
     "        likelihood = pm.Normal('y', mu=regression, sd=sd, observed=y_t)\n",
-    "        trace = pm.sample(n_samples, njobs=4)\n",
+    "        trace = pm.sample(n_samples, cores=4)\n",
     "\n",
     "    return trace, y_scaler, t_scaler, t_section"
    ]
diff --git a/docs/source/notebooks/PyMC3_tips_and_heuristic.ipynb b/docs/source/notebooks/PyMC3_tips_and_heuristic.ipynb
index d75aa33b1d..8878dc4bd3 100644
--- a/docs/source/notebooks/PyMC3_tips_and_heuristic.ipynb
+++ b/docs/source/notebooks/PyMC3_tips_and_heuristic.ipynb
@@ -484,7 +484,7 @@
     "    # Proportion sptial variance\n",
     "    alpha = pm.Deterministic('alpha', sd_c/(sd_h+sd_c))\n",
     "\n",
-    "    trace1 = pm.sample(3e3, njobs=2, tune=1000, nuts_kwargs={'max_treedepth': 15})"
+    "    trace1 = pm.sample(3e3, cores=2, tune=1000, nuts_kwargs={'max_treedepth': 15})"
    ]
   },
   {
@@ -702,7 +702,7 @@
     "    # Proportion sptial variance\n",
     "    alpha = pm.Deterministic('alpha', sd_c/(sd_h+sd_c))\n",
     "\n",
-    "    trace2 = pm.sample(3e3, njobs=2, tune=1000, nuts_kwargs={'max_treedepth': 15})"
+    "    trace2 = pm.sample(3e3, cores=2, tune=1000, nuts_kwargs={'max_treedepth': 15})"
    ]
   },
   {
@@ -856,7 +856,7 @@
     "    # Likelihood\n",
     "    Yi = pm.Poisson('Yi', mu=mu.ravel(), observed=O)\n",
     "\n",
-    "    trace3 = pm.sample(3e3, njobs=2, tune=1000)"
+    "    trace3 = pm.sample(3e3, cores=2, tune=1000)"
    ]
   },
   {
@@ -1104,7 +1104,7 @@
     "    # Likelihood\n",
     "    Yi = pm.Poisson('Yi', mu=mu.ravel(), observed=O)\n",
     "\n",
-    "    trace4 = pm.sample(3e3, njobs=2, tune=1000)"
+    "    trace4 = pm.sample(3e3, cores=2, tune=1000)"
    ]
   },
   {
diff --git a/docs/source/notebooks/api_quickstart.ipynb b/docs/source/notebooks/api_quickstart.ipynb
index 419e3534a4..17ce63374a 100644
--- a/docs/source/notebooks/api_quickstart.ipynb
+++ b/docs/source/notebooks/api_quickstart.ipynb
@@ -814,7 +814,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can also run multiple chains in parallel using the `njobs` kwarg:"
+    "You can also run multiple chains in parallel using the `cores` kwarg:"
    ]
   },
   {
@@ -837,7 +837,7 @@
     "    mu = pm.Normal('mu', mu=0, sd=1)\n",
     "    obs = pm.Normal('obs', mu=mu, sd=1, observed=np.random.randn(100))\n",
     "    \n",
-    "    trace = pm.sample(njobs=4)"
+    "    trace = pm.sample(cores=4)"
    ]
   },
   {
@@ -1009,7 +1009,7 @@
     "    \n",
     "    step1 = pm.Metropolis(vars=[mu])\n",
     "    step2 = pm.Slice(vars=[sd])\n",
-    "    trace = pm.sample(10000, step=[step1, step2], njobs=4)"
+    "    trace = pm.sample(10000, step=[step1, step2], cores=4)"
    ]
   },
   {
@@ -1159,7 +1159,7 @@
    "source": [
     "with pm.Model() as model:\n",
     "    x = pm.Normal('x', mu=0, sd=1, shape=100)    \n",
-    "    trace = pm.sample(njobs=4)\n",
+    "    trace = pm.sample(cores=4)\n",
     "    \n",
     "pm.energyplot(trace);"
    ]
diff --git a/docs/source/notebooks/hierarchical_partial_pooling.ipynb b/docs/source/notebooks/hierarchical_partial_pooling.ipynb
index aecdcf5986..c08b63c0a8 100644
--- a/docs/source/notebooks/hierarchical_partial_pooling.ipynb
+++ b/docs/source/notebooks/hierarchical_partial_pooling.ipynb
@@ -170,7 +170,7 @@
    ],
    "source": [
     "with baseball_model:\n",
-    "    trace = pm.sample(2000, tune=1000, nchains=2,\n",
+    "    trace = pm.sample(2000, tune=1000, chains=2,\n",
     "                      nuts_kwargs={'target_accept': 0.95})"
    ]
   },
diff --git a/docs/source/notebooks/normalizing_flows_overview.ipynb b/docs/source/notebooks/normalizing_flows_overview.ipynb
index c46f94b41f..65aafa1bf4 100644
--- a/docs/source/notebooks/normalizing_flows_overview.ipynb
+++ b/docs/source/notebooks/normalizing_flows_overview.ipynb
@@ -438,7 +438,7 @@
     "pm.set_tt_rng(42)\n",
     "np.random.seed(42)\n",
     "with pot1m:\n",
-    "    trace = pm.sample(1000, init='auto', njobs=2, start=[dict(pot1=np.array([-2, 0])),\n",
+    "    trace = pm.sample(1000, init='auto', cores=2, start=[dict(pot1=np.array([-2, 0])),\n",
     "                                                         dict(pot1=np.array([2, 0]))])"
    ]
   },
@@ -975,7 +975,7 @@
    ],
    "source": [
     "with pot_m:\n",
-    "    traceNUTS = pm.sample(3000, tune=1000, target_accept=0.9, njobs=2)"
+    "    traceNUTS = pm.sample(3000, tune=1000, target_accept=0.9, cores=2)"
    ]
   },
   {
diff --git a/docs/source/notebooks/probabilistic_matrix_factorization.ipynb b/docs/source/notebooks/probabilistic_matrix_factorization.ipynb
index 271e7ca6a1..90bc726511 100644
--- a/docs/source/notebooks/probabilistic_matrix_factorization.ipynb
+++ b/docs/source/notebooks/probabilistic_matrix_factorization.ipynb
@@ -810,17 +810,17 @@
     "    basename = 'pmf-mcmc-d%d' % self.dim\n",
     "    return os.path.join(DATA_DIR, basename)\n",
     "\n",
-    "def _draw_samples(self, nsamples=1000, njobs=2):\n",
+    "def _draw_samples(self, nsamples=1000, cores=2):\n",
     "    # First make sure the trace_dir does not already exist.\n",
     "    if os.path.isdir(self.trace_dir):\n",
     "        shutil.rmtree(self.trace_dir)\n",
     "\n",
     "    with self.model:\n",
-    "        logging.info('drawing %d samples using %d jobs' % (nsamples, njobs))\n",
+    "        logging.info('drawing %d samples using %d jobs' % (nsamples, cores))\n",
     "        backend = pm.backends.Text(self.trace_dir)\n",
     "        logging.info('backing up trace to directory: %s' % self.trace_dir)\n",
     "        self.trace = pm.sample(draws=nsamples, init='advi',\n",
-    "                               n_init=150000, njobs=njobs, trace=backend)\n",
+    "                               n_init=150000, cores=cores, trace=backend)\n",
     "        \n",
     "def _load_trace(self):\n",
     "    with self.model:\n",
@@ -837,7 +837,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We could define some kind of default trace property like we did for the MAP, but that would mean using possibly nonsensical values for `nsamples` and `njobs`. Better to leave it as a non-optional call to `draw_samples`. Finally, we'll need a function to make predictions using our inferred values for $U$ and $V$. For user $i$ and joke $j$, a prediction is generated by drawing from $\\mathcal{N}(U_i V_j^T, \\alpha)$. To generate predictions from the sampler, we generate an $R$ matrix for each $U$ and $V$ sampled, then we combine these by averaging over the $K$ samples.\n",
+    "We could define some kind of default trace property like we did for the MAP, but that would mean using possibly nonsensical values for `nsamples` and `cores`. Better to leave it as a non-optional call to `draw_samples`. Finally, we'll need a function to make predictions using our inferred values for $U$ and $V$. For user $i$ and joke $j$, a prediction is generated by drawing from $\\mathcal{N}(U_i V_j^T, \\alpha)$. To generate predictions from the sampler, we generate an $R$ matrix for each $U$ and $V$ sampled, then we combine these by averaging over the $K$ samples.\n",
     "\n",
     "\\begin{equation}\n",
     "P(R_{ij}^* \\given R, \\alpha, \\alpha_U, \\alpha_V) \\approx\n",
diff --git a/docs/source/notebooks/rugby_analytics.ipynb b/docs/source/notebooks/rugby_analytics.ipynb
index 724282b5a5..4ceb82e19a 100644
--- a/docs/source/notebooks/rugby_analytics.ipynb
+++ b/docs/source/notebooks/rugby_analytics.ipynb
@@ -815,7 +815,7 @@
    ],
    "source": [
     "with model:\n",
-    "    trace = pm.sample(1000, tune=1000, njobs=3)\n",
+    "    trace = pm.sample(1000, tune=1000, cores=3)\n",
     "    pm.traceplot(trace)"
    ]
   },
diff --git a/docs/source/notebooks/sampler-stats.ipynb b/docs/source/notebooks/sampler-stats.ipynb
index 5937490e09..65ee3e7fdf 100644
--- a/docs/source/notebooks/sampler-stats.ipynb
+++ b/docs/source/notebooks/sampler-stats.ipynb
@@ -79,7 +79,7 @@
    "source": [
     "with model:\n",
     "    step = pm.NUTS()\n",
-    "    trace = pm.sample(2000, tune=1000, init=None, step=step, njobs=2)"
+    "    trace = pm.sample(2000, tune=1000, init=None, step=step, cores=2)"
    ]
   },
   {
@@ -439,7 +439,7 @@
     "with model:\n",
     "    step1 = pm.BinaryMetropolis([mu1])\n",
     "    step2 = pm.Metropolis([mu2])\n",
-    "    trace = pm.sample(10000, init=None, step=[step1, step2], njobs=2, tune=1000)"
+    "    trace = pm.sample(10000, init=None, step=[step1, step2], cores=2, tune=1000)"
    ]
   },
   {
diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py
index 95d519525d..95256bbf99 100644
--- a/pymc3/backends/__init__.py
+++ b/pymc3/backends/__init__.py
@@ -34,7 +34,7 @@
 
 The call will return the sampling values of `x`, with the values for
 all chains concatenated. (For a single call to `sample`, the number of
-chains will correspond to the `njobs` argument.)
+chains will correspond to the `cores` argument.)
 
 To discard the first N values of each chain, slicing syntax can be
 used.
diff --git a/pymc3/examples/custom_dists.py b/pymc3/examples/custom_dists.py
index 37f5ad80ea..a535fd4fff 100644
--- a/pymc3/examples/custom_dists.py
+++ b/pymc3/examples/custom_dists.py
@@ -24,7 +24,7 @@
 ydata = np.random.normal(ydata, 10)
 data = {'x': xdata, 'y': ydata}
 
-# define loglikelihood outside of the model context, otherwise njobs wont work:
+# define loglikelihood outside of the model context, otherwise cores wont work:
 # Lambdas defined in local namespace are not picklable (see issue #1995)
 def loglike1(value):
     return -1.5 * tt.log(1 + value**2)
@@ -40,7 +40,7 @@ def loglike2(value):
     like = pm.Normal('y_est', mu=alpha + beta *
                         xdata, sd=sigma, observed=ydata)
 
-    trace = pm.sample(2000, njobs=2)
+    trace = pm.sample(2000, cores=2)
 
 
 #################################################
diff --git a/pymc3/examples/disaster_model_theano_op.py b/pymc3/examples/disaster_model_theano_op.py
index 3eda694154..de11b57079 100644
--- a/pymc3/examples/disaster_model_theano_op.py
+++ b/pymc3/examples/disaster_model_theano_op.py
@@ -56,5 +56,5 @@ def rate_(switchpoint, early_mean, late_mean):
     # Initial values for stochastic nodes
     start = {'early_mean': 2., 'late_mean': 3.}
 
-    tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=2)
+    tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], cores=2)
     pm.traceplot(tr)
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 6fa81d595a..0fc8e874a0 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -184,7 +184,7 @@ def _cpu_count():
 
 
 def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
-           trace=None, chain_idx=0, chains=None, njobs=None, tune=500,
+           trace=None, chain_idx=0, chains=None, cores=None, tune=500,
            nuts_kwargs=None, step_kwargs=None, progressbar=True, model=None,
            random_seed=None, live_plot=False, discard_tuned_samples=True,
            live_plot_kwargs=None, compute_convergence_checks=True, **kwargs):
@@ -249,8 +249,8 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         The number of chains to sample. Running independent chains
         is important for some convergence statistics and can also
         reveal multiple modes in the posterior. If `None`, then set to
-        either `njobs` or 2, whichever is larger.
-    njobs : int
+        either `chains` or 2, whichever is larger.
+    cores : int
         The number of chains to run in parallel. If `None`, set to the
         number of CPUs in the system, but at most 4. Keep in mind that
         some chains might themselves be multithreaded via openmp or
@@ -287,7 +287,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         completion ("expected time of arrival"; ETA).
     model : Model (optional if in `with` context)
     random_seed : int or list of ints
-        A list is accepted if `njobs` is greater than one.
+        A list is accepted if `cores` is greater than one.
     live_plot : bool
         Flag for live plotting the trace while sampling
     live_plot_kwargs : dict
@@ -318,17 +318,22 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         >>> with pm.Model() as model: # context management
         ...     p = pm.Beta('p', alpha=alpha, beta=beta)
         ...     y = pm.Binomial('y', n=n, p=p, observed=h)
-        ...     trace = pm.sample(2000, tune=1000, njobs=4)
+        ...     trace = pm.sample(2000, tune=1000, cores=4)
         >>> pm.summary(trace)
                mean        sd  mc_error   hpd_2.5  hpd_97.5
         p  0.604625  0.047086   0.00078  0.510498  0.694774
     """
     model = modelcontext(model)
 
-    if njobs is None:
-        njobs = min(4, _cpu_count())
+    if cores is None:
+        cores = min(4, _cpu_count())
+    if 'njobs' in kwargs:
+        cores = kwargs['njobs']
+        warnings.warn(
+            "The njobs argument has been deprecated. Use cores instead.",
+            DeprecationWarning)
     if chains is None:
-        chains = max(2, njobs)
+        chains = max(2, cores)
     if isinstance(start, dict):
         start = [start] * chains
     if random_seed == -1:
@@ -342,7 +347,11 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
     if not isinstance(random_seed, Iterable):
         raise TypeError(
             'Invalid value for `random_seed`. Must be tuple, list or int')
-
+    if 'nchains' in kwargs:
+        chains = kwargs['nchains']
+        warnings.warn(
+            "The nchains argument has been deprecated. Use chains instead.",
+            DeprecationWarning)
     if 'chain' in kwargs:
         chain_idx = kwargs['chain']
         warnings.warn(
@@ -403,7 +412,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         'random_seed': random_seed,
         'live_plot': live_plot,
         'live_plot_kwargs': live_plot_kwargs,
-        'njobs': njobs,
+        'cores': cores,
     }
 
     sample_args.update(kwargs)
@@ -412,9 +421,9 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None,
         isinstance(m, arraystep.PopulationArrayStepShared)
         for m in (step.methods if isinstance(step, CompoundStep) else [step])
     ])
-    parallel = njobs > 1 and chains > 1 and not has_population_samplers
+    parallel = cores > 1 and chains > 1 and not has_population_samplers
     if parallel:
-        _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, njobs))
+        _log.info('Multiprocess sampling ({} chains in {} jobs)'.format(chains, cores))
         _print_step_hierarchy(step)
         try:
             trace = _mp_sample(**sample_args)
@@ -564,13 +573,13 @@ def iter_sample(draws, step, start=None, trace=None, chain=0, tune=None,
         is given, it must contain samples for the chain number `chain`.
         If None or a list of variables, the NDArray backend is used.
     chain : int
-        Chain number used to store sample in backend. If `njobs` is
+        Chain number used to store sample in backend. If `cores` is
         greater than one, chain numbers will start here.
     tune : int
         Number of iterations to tune, if applicable (defaults to None)
     model : Model (optional if in `with` context)
     random_seed : int or list of ints
-        A list is accepted if more if `njobs` is greater than one.
+        A list is accepted if more if `cores` is greater than one.
 
     Examples
     --------
@@ -937,7 +946,7 @@ def _choose_backend(trace, chain, shortcuts=None, **kwds):
 
 
 def _mp_sample(**kwargs):
-    njobs = kwargs.pop('njobs')
+    cores = kwargs.pop('cores')
     chain = kwargs.pop('chain')
     rseed = kwargs.pop('random_seed')
     start = kwargs.pop('start')
@@ -947,7 +956,7 @@ def _mp_sample(**kwargs):
     pbars = [kwargs.pop('progressbar')] + [False] * (chains - 1)
     jobs = (delayed(_sample)(*args, **kwargs)
             for args in zip(chain_nums, pbars, rseed, start))
-    traces = Parallel(n_jobs=njobs)(jobs)
+    traces = Parallel(n_jobs=cores)(jobs)
     return MultiTrace(traces)
 
 
diff --git a/pymc3/step_methods/smc.py b/pymc3/step_methods/smc.py
index 75c803dd99..3f4d37a966 100644
--- a/pymc3/step_methods/smc.py
+++ b/pymc3/step_methods/smc.py
@@ -433,51 +433,51 @@ def resample(self):
         return outindx
 
 
-def sample_smc(samples=1000, n_chains=100, step=None, start=None, homepath=None, stage=0, n_jobs=1,
-               tune_interval=10, progressbar=False, model=None, random_seed=-1, rm_flag=True):
+def sample_smc(samples=1000, chains=100, step=None, start=None, homepath=None, stage=0, cores=1,
+               tune_interval=10, progressbar=False, model=None, random_seed=-1, rm_flag=True, **kwargs):
     """Sequential Monte Carlo sampling
 
-    Samples the solution space with n_chains of Metropolis chains, where each chain has n_steps
+    Samples the solution space with `chains` of Metropolis chains, where each chain has `n_steps`=`samples`/`chains`
     iterations. Once finished, the sampled traces are evaluated:
 
     (1) Based on the likelihoods of the final samples, chains are weighted
     (2) the weighted covariance of the ensemble is calculated and set as new proposal distribution
-    (3) the variation in the ensemble is calculated and also the next tempering parameter (beta)
-    (4) New n_chains Markov chains are seeded on the traces with high weight for n_steps iterations
-    (5) Repeat until beta > 1.
+    (3) the variation in the ensemble is calculated and also the next tempering parameter (`beta`)
+    (4) New `chains` Markov chains are seeded on the traces with high weight for n_steps iterations
+    (5) Repeat until `beta` > 1.
 
     Parameters
     ----------
     samples : int
         The number of samples to draw from the last stage, i.e. the posterior. Defaults to 1000.
-        The number of samples should be a multiple of `n_chains`, otherwise the returned number of
-        draws will be the lowest closest multiple of `n_chains`.
-    n_chains : int
+        The number of samples should be a multiple of `chains`, otherwise the returned number of
+        draws will be the lowest closest multiple of `chains`.
+    chains : int
         Number of chains used to store samples in backend.
     step : :class:`SMC`
         SMC initialization object
     start : List of dictionaries
-        with length of (n_chains). Starting points in parameter space (or partial point)
+        with length of (`chains`). Starting points in parameter space (or partial point)
         Defaults to random draws from variables (defaults to empty dict)
     homepath : string
         Result_folder for storing stages, will be created if not existing.
     stage : int
         Stage where to start or continue the calculation. It is possible to continue after completed
-        stages (stage should be the number of the completed stage + 1). If None the start will be at
-        stage = 0.
-    n_jobs : int
+        stages (`stage` should be the number of the completed stage + 1). If None the start will be at
+        `stage=0`.
+    cores : int
         The number of cores to be used in parallel. Be aware that Theano has internal
         parallelization. Sometimes this is more efficient especially for simple models.
-        step.n_chains / n_jobs has to be an integer number!
+        `step.n_chains / cores` has to be an integer number!
     tune_interval : int
         Number of steps to tune for. Defaults to 10.
     progressbar : bool
         Flag for displaying a progress bar
     model : :class:`pymc3.Model`
         (optional if in `with` context) has to contain deterministic variable name defined under
-        step.likelihood_name' that contains the model likelihood
+        `step.likelihood_name` that contains the model likelihood
     random_seed : int or list of ints
-        A list is accepted, more if `n_jobs` is greater than one.
+        A list is accepted, more if `cores` is greater than one.
     rm_flag : bool
         If True existing stage result folders are being deleted prior to sampling.
 
@@ -490,9 +490,15 @@ def sample_smc(samples=1000, n_chains=100, step=None, start=None, homepath=None,
     """
     warnings.warn(EXPERIMENTAL_WARNING)
 
+    n_chains = chains
+    if 'n_chains' in kwargs:
+        n_chains = kwargs['n_chains']
+        warnings.warn(
+            "The n_chains argument has been deprecated. Use chains instead.",
+            DeprecationWarning)
     remainder = samples % n_chains
     if remainder != 0:
-        warnings.warn("'samples' {} is not a multiple of 'n_chains' {}. Hence, you will get {} "
+        warnings.warn("'samples' {} is not a multiple of 'chains' {}. Hence, you will get {} "
                       "draws from the posterior".format(samples, n_chains, samples - remainder))
 
     model = modelcontext(model)
@@ -508,9 +514,14 @@ def sample_smc(samples=1000, n_chains=100, step=None, start=None, homepath=None,
     if homepath is None:
         raise TypeError('Argument `homepath` should be path to result_directory.')
 
-    if n_jobs > 1:
-        if not (step.n_chains / float(n_jobs)).is_integer():
-            raise TypeError('n_chains / n_jobs has to be a whole number!')
+    if 'n_jobs' in kwargs:
+        cores = kwargs['n_jobs']
+        warnings.warn(
+            "The n_jobs argument has been deprecated. Use cores instead.",
+            DeprecationWarning)
+    if cores > 1:
+        if not (step.n_chains / float(cores)).is_integer():
+            raise TypeError('chains / cores has to be a whole number!')
 
     if start is not None:
         if len(start) != step.n_chains:
@@ -525,7 +536,7 @@ def sample_smc(samples=1000, n_chains=100, step=None, start=None, homepath=None,
 
     stage_handler = atext.TextStage(homepath)
 
-    if progressbar and n_jobs > 1:
+    if progressbar and cores > 1:
         progressbar = False
 
     if stage == 0:
@@ -558,7 +569,7 @@ def sample_smc(samples=1000, n_chains=100, step=None, start=None, homepath=None,
                            'stage_path': stage_handler.stage_path(step.stage),
                            'progressbar': progressbar,
                            'model': model,
-                           'n_jobs': n_jobs,
+                           'n_jobs': cores,
                            'chains': chains}
 
             _iter_parallel_chains(**sample_args)
diff --git a/pymc3/tests/sampler_fixtures.py b/pymc3/tests/sampler_fixtures.py
index 284daface4..2687764d34 100644
--- a/pymc3/tests/sampler_fixtures.py
+++ b/pymc3/tests/sampler_fixtures.py
@@ -128,7 +128,7 @@ def setup_class(cls):
         cls.model = cls.make_model()
         with cls.model:
             cls.step = cls.make_step()
-            cls.trace = pm.sample(cls.n_samples, tune=cls.tune, step=cls.step, njobs=cls.chains)
+            cls.trace = pm.sample(cls.n_samples, tune=cls.tune, step=cls.step, cores=cls.chains)
         cls.samples = {}
         for var in cls.model.unobserved_RVs:
             cls.samples[str(var)] = cls.trace.get_values(var, burn=cls.burn)
diff --git a/pymc3/tests/test_diagnostics.py b/pymc3/tests/test_diagnostics.py
index 382012744d..35c8854adc 100644
--- a/pymc3/tests/test_diagnostics.py
+++ b/pymc3/tests/test_diagnostics.py
@@ -24,7 +24,7 @@ def get_ptrace(self, n_samples):
             step1 = Slice([model.early_mean_log__, model.late_mean_log__])
             step2 = Metropolis([model.switchpoint])
             start = {'early_mean': 7., 'late_mean': 5., 'switchpoint': 10}
-            ptrace = sample(n_samples, tune=0, step=[step1, step2], start=start, njobs=2,
+            ptrace = sample(n_samples, tune=0, step=[step1, step2], start=start, cores=2,
                             progressbar=False, random_seed=[20090425, 19700903])
         return ptrace
 
@@ -161,7 +161,7 @@ def test_effective_n(self):
             start = find_MAP()
             step = NUTS(scaling=start)
             ptrace = sample(0, tune=n_samples, step=step, start=start,
-                            njobs=n_jobs, discard_tuned_samples=False,
+                            cores=n_jobs, discard_tuned_samples=False,
                             random_seed=42)
 
         n_effective = effective_n(ptrace)['x']
@@ -183,7 +183,7 @@ def test_effective_n_right_shape_python_float(self,
             start = find_MAP()
             step = NUTS(scaling=start)
             ptrace = sample(0, tune=n_samples, step=step, start=start,
-                            njobs=n_jobs, discard_tuned_samples=False,
+                            cores=n_jobs, discard_tuned_samples=False,
                             random_seed=42)
 
         n_effective = effective_n(ptrace)['x']
diff --git a/pymc3/tests/test_plots.py b/pymc3/tests/test_plots.py
index b55935ccfe..a16ba9b3e2 100644
--- a/pymc3/tests/test_plots.py
+++ b/pymc3/tests/test_plots.py
@@ -34,7 +34,7 @@ def test_plots():
 
 def test_energyplot():
     with asmod.build_model():
-        trace = sample(njobs=1)
+        trace = sample(cores=1)
 
     energyplot(trace)
     energyplot(trace, shade=0.5, alpha=0)
@@ -66,7 +66,7 @@ def test_plots_multidimensional():
     forestplot(trace)
     densityplot(trace)
 
-@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on GPU due to njobs=2")
+@pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on GPU due to cores=2")
 def test_multichain_plots():
     model = build_disaster_model()
     with model:
@@ -74,7 +74,7 @@ def test_multichain_plots():
         step1 = Slice([model.early_mean_log__, model.late_mean_log__])
         step2 = Metropolis([model.switchpoint])
         start = {'early_mean': 2., 'late_mean': 3., 'switchpoint': 50}
-        ptrace = sample(1000, tune=0, step=[step1, step2], start=start, njobs=2)
+        ptrace = sample(1000, tune=0, step=[step1, step2], start=start, cores=2)
 
     forestplot(ptrace, varnames=['early_mean', 'late_mean'])
     autocorrplot(ptrace, varnames=['switchpoint'])
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 2cb7c15839..30362019c0 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -33,15 +33,15 @@ def test_sample_does_not_set_seed(self):
         assert random_numbers[0] == random_numbers[1]
 
     def test_parallel_sample_does_not_reuse_seed(self):
-        njobs = 4
+        cores = 4
         random_numbers = []
         draws = []
         for _ in range(2):
             np.random.seed(1)  # seeds in other processes don't effect main process
             with self.model:
-                trace = pm.sample(100, tune=0, njobs=njobs)
+                trace = pm.sample(100, tune=0, cores=cores)
             # numpy thread mentioned race condition.  might as well check none are equal
-            for first, second in combinations(range(njobs), 2):
+            for first, second in combinations(range(cores), 2):
                 first_chain = trace.get_values('x', chains=first)
                 second_chain = trace.get_values('x', chains=second)
                 assert not (first_chain == second_chain).all()
@@ -53,11 +53,11 @@ def test_parallel_sample_does_not_reuse_seed(self):
         assert (draws[0] == draws[1]).all()
 
     def test_sample(self):
-        test_njobs = [1]
+        test_cores = [1]
         with self.model:
-            for njobs in test_njobs:
+            for cores in test_cores:
                 for steps in [1, 10, 300]:
-                    pm.sample(steps, tune=0, step=self.step, njobs=njobs,
+                    pm.sample(steps, tune=0, step=self.step, cores=cores,
                               random_seed=self.random_seed)
 
     def test_sample_init(self):
@@ -93,7 +93,7 @@ def test_iter_sample(self):
 
     def test_parallel_start(self):
         with self.model:
-            tr = pm.sample(0, tune=5, njobs=2,
+            tr = pm.sample(0, tune=5, cores=2,
                            discard_tuned_samples=False,
                            start=[{'x': [10, 10]}, {'x': [-10, -10]}],
                            random_seed=self.random_seed)
@@ -102,12 +102,12 @@ def test_parallel_start(self):
 
     def test_sample_tune_len(self):
         with self.model:
-            trace = pm.sample(draws=100, tune=50, njobs=1)
+            trace = pm.sample(draws=100, tune=50, cores=1)
             assert len(trace) == 100
-            trace = pm.sample(draws=100, tune=50, njobs=1,
+            trace = pm.sample(draws=100, tune=50, cores=1,
                               discard_tuned_samples=False)
             assert len(trace) == 150
-            trace = pm.sample(draws=100, tune=50, njobs=4)
+            trace = pm.sample(draws=100, tune=50, cores=4)
             assert len(trace) == 100
 
     @pytest.mark.parametrize(
diff --git a/pymc3/tests/test_sgfs.py b/pymc3/tests/test_sgfs.py
index 589eccf508..06d8749443 100644
--- a/pymc3/tests/test_sgfs.py
+++ b/pymc3/tests/test_sgfs.py
@@ -31,6 +31,6 @@ def f(x, a, b, c):
         pm.Normal('y', mu=y, observed=y_obs)
 
         step_method = pm.SGFS(batch_size=batch_size, step_size=1., total_size=total_size)
-        trace = pm.sample(draws=draws, step=step_method, init=None, njobs=2)
+        trace = pm.sample(draws=draws, step=step_method, init=None, cores=2)
 
     np.testing.assert_allclose(np.mean(trace['abc'], axis=0), np.asarray([a, b, c]), rtol=0.1)
diff --git a/pymc3/tests/test_smc.py b/pymc3/tests/test_smc.py
index b54897f9ff..f106ba0a18 100644
--- a/pymc3/tests/test_smc.py
+++ b/pymc3/tests/test_smc.py
@@ -51,9 +51,9 @@ def two_gaussians(x):
     def test_sample_n_core(self, n_jobs, stage):
 
         mtrace = smc.sample_smc(samples=self.samples,
-                                n_chains=self.n_chains,
+                                chains=self.n_chains,
                                 stage=stage,
-                                n_jobs=n_jobs,
+                                cores=n_jobs,
                                 progressbar=True,
                                 homepath=self.test_folder,
                                 model=self.ATMIP_test,
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index df83a58f1d..817fafa121 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -198,10 +198,10 @@ def check_trace(self, step_method):
             x = Normal('x', mu=0, sd=1)
             if step_method.__name__ == 'SMC':
                 trace = smc.sample_smc(samples=200,
-                                       n_chains=2,
+                                       chains=2,
                                        start=[{'x':1.}, {'x':-1.}],
                                        random_seed=1,
-                                       n_jobs=1, progressbar=False,
+                                       cores=1, progressbar=False,
                                        homepath=self.temp_dir)
             elif step_method.__name__ == 'NUTS':
                 step = step_method(scaling=model.test_point)
diff --git a/pymc3/tests/test_text_backend.py b/pymc3/tests/test_text_backend.py
index 457c8e78bf..f524d16605 100644
--- a/pymc3/tests/test_text_backend.py
+++ b/pymc3/tests/test_text_backend.py
@@ -12,7 +12,7 @@ def test_supports_sampler_stats(self):
         with pm.Model():
             pm.Normal("mu", mu=0, sd=1, shape=2)
             db = text.Text(self.name)
-            pm.sample(20, tune=10, init=None, trace=db, njobs=2)
+            pm.sample(20, tune=10, init=None, trace=db, cores=2)
 
     def teardown_method(self):
         bf.remove_file_or_directory(self.name)