Skip to content

Commit 25add57

Browse files
authored
Merge pull request #1654 from mathbunnyru/asalikhov/pre_commit_notebooks
pre-commit hooks added/run for jupyter notebooks: nbstripout, nbqa-pyupgrade, nbqa-black, nbqa-flake8
2 parents 442e703 + 9346d39 commit 25add57

File tree

7 files changed

+189
-313
lines changed

7 files changed

+189
-313
lines changed

.pre-commit-config.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,25 @@ repos:
9999
- id: markdownlint
100100
args: ["--fix"]
101101

102+
# Run tools on Jupyter notebooks
103+
104+
# strip output from Jupyter notebooks
105+
- repo: https://github.com/kynan/nbstripout
106+
rev: 0.5.0
107+
hooks:
108+
- id: nbstripout
109+
110+
# nbQA provides tools from the Python ecosystem like
111+
# pyupgrade, black, and flake8, adjusted for notebooks.
112+
- repo: https://github.com/nbQA-dev/nbQA
113+
rev: 1.3.1
114+
hooks:
115+
- id: nbqa-pyupgrade
116+
args: [--py39-plus]
117+
- id: nbqa-black
118+
args: [--target-version=py39]
119+
- id: nbqa-flake8
120+
102121
# Docker hooks do not work in pre-commit.ci
103122
# See: <https://github.com/pre-commit-ci/issues/issues/11>
104123
ci:

binder/README.ipynb

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
"outputs": [],
1919
"source": [
2020
"import os\n",
21-
"print(f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image')"
21+
"\n",
22+
"print(\n",
23+
" f'This container is using tag {os.environ[\"TAG\"]} of the jupyter/base-notebook image'\n",
24+
")"
2225
]
2326
},
2427
{
@@ -111,7 +114,7 @@
111114
],
112115
"metadata": {
113116
"kernelspec": {
114-
"display_name": "Python 3",
117+
"display_name": "Python 3 (ipykernel)",
115118
"language": "python",
116119
"name": "python3"
117120
},
@@ -125,9 +128,9 @@
125128
"name": "python",
126129
"nbconvert_exporter": "python",
127130
"pygments_lexer": "ipython3",
128-
"version": "3.7.1"
131+
"version": "3.9.10"
129132
}
130133
},
131134
"nbformat": 4,
132-
"nbformat_minor": 2
135+
"nbformat_minor": 4
133136
}

tests/all-spark-notebook/data/issue_1168.ipynb

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,48 +9,37 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 6,
12+
"execution_count": null,
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
1616
"from pyspark.sql import SparkSession\n",
17-
"from pyspark.sql.functions import pandas_udf\n",
1817
"\n",
1918
"# Spark session & context\n",
20-
"spark = SparkSession.builder.master('local').getOrCreate()"
19+
"spark = SparkSession.builder.master(\"local\").getOrCreate()"
2120
]
2221
},
2322
{
2423
"cell_type": "code",
25-
"execution_count": 7,
24+
"execution_count": null,
2625
"metadata": {},
27-
"outputs": [
28-
{
29-
"name": "stdout",
30-
"output_type": "stream",
31-
"text": [
32-
"+---+---+\n",
33-
"| id|age|\n",
34-
"+---+---+\n",
35-
"| 1| 21|\n",
36-
"+---+---+\n",
37-
"\n"
38-
]
39-
}
40-
],
26+
"outputs": [],
4127
"source": [
4228
"df = spark.createDataFrame([(1, 21), (2, 30)], (\"id\", \"age\"))\n",
29+
"\n",
30+
"\n",
4331
"def filter_func(iterator):\n",
4432
" for pdf in iterator:\n",
4533
" yield pdf[pdf.id == 1]\n",
4634
"\n",
35+
"\n",
4736
"df.mapInPandas(filter_func, df.schema).show()"
4837
]
4938
}
5039
],
5140
"metadata": {
5241
"kernelspec": {
53-
"display_name": "Python 3",
42+
"display_name": "Python 3 (ipykernel)",
5443
"language": "python",
5544
"name": "python3"
5645
},
@@ -64,7 +53,7 @@
6453
"name": "python",
6554
"nbconvert_exporter": "python",
6655
"pygments_lexer": "ipython3",
67-
"version": "3.8.6"
56+
"version": "3.9.10"
6857
}
6958
},
7059
"nbformat": 4,

tests/all-spark-notebook/data/local_pyspark.ipynb

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,14 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 2,
5+
"execution_count": null,
66
"metadata": {},
7-
"outputs": [
8-
{
9-
"output_type": "error",
10-
"ename": "Error",
11-
"evalue": "Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
12-
"traceback": [
13-
"Error: Jupyter cannot be started. Error attempting to locate jupyter: Data Science libraries jupyter and notebook are not installed in interpreter Python 3.7.7 64-bit ('jupyter': conda).",
14-
"at b.startServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:270430)",
15-
"at async b.createServer (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:269873)",
16-
"at async connect (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:92:397876)",
17-
"at async w.ensureConnectionAndNotebookImpl (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556625)",
18-
"at async w.ensureConnectionAndNotebook (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:556303)",
19-
"at async w.clearResult (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:552346)",
20-
"at async w.reexecuteCell (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:540374)",
21-
"at async w.reexecuteCells (/Users/romain/.vscode/extensions/ms-python.python-2020.5.80290/out/client/extension.js:16:537541)"
22-
]
23-
}
24-
],
7+
"outputs": [],
258
"source": [
269
"from pyspark.sql import SparkSession\n",
2710
"\n",
2811
"# Spark session & context\n",
29-
"spark = SparkSession.builder.master('local').getOrCreate()\n",
12+
"spark = SparkSession.builder.master(\"local\").getOrCreate()\n",
3013
"sc = spark.sparkContext\n",
3114
"\n",
3215
"# Sum of the first 100 whole numbers\n",
@@ -38,7 +21,7 @@
3821
],
3922
"metadata": {
4023
"kernelspec": {
41-
"display_name": "Python 3",
24+
"display_name": "Python 3 (ipykernel)",
4225
"language": "python",
4326
"name": "python3"
4427
},
@@ -52,7 +35,7 @@
5235
"name": "python",
5336
"nbconvert_exporter": "python",
5437
"pygments_lexer": "ipython3",
55-
"version": "3.7.6"
38+
"version": "3.9.10"
5639
}
5740
},
5841
"nbformat": 4,

tests/all-spark-notebook/data/local_spylon.ipynb

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 6,
5+
"execution_count": null,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -14,21 +14,9 @@
1414
},
1515
{
1616
"cell_type": "code",
17-
"execution_count": 7,
17+
"execution_count": null,
1818
"metadata": {},
19-
"outputs": [
20-
{
21-
"data": {
22-
"text/plain": [
23-
"rdd: org.apache.spark.rdd.RDD[Int] = ParallelCollectionRDD[8] at parallelize at <console>:28\n",
24-
"res4: Double = 5050.0\n"
25-
]
26-
},
27-
"execution_count": 7,
28-
"metadata": {},
29-
"output_type": "execute_result"
30-
}
31-
],
19+
"outputs": [],
3220
"source": [
3321
"// Sum of the first 100 whole numbers\n",
3422
"val rdd = sc.parallelize(0 to 100)\n",

0 commit comments

Comments
 (0)