Skip to content

Commit afdf0a3

Browse files
CI/WEB: Fix github quota errors by using website as cache (#50811)
1 parent 9bf60aa commit afdf0a3

File tree

4 files changed

+64
-68
lines changed

4 files changed

+64
-68
lines changed

.github/workflows/docbuild-and-upload.yml

-6
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,6 @@ jobs:
4646
- name: Build Pandas
4747
uses: ./.github/actions/build_pandas
4848

49-
- name: Set up maintainers cache
50-
uses: actions/cache@v3
51-
with:
52-
path: maintainers.json
53-
key: maintainers
54-
5549
- name: Build website
5650
run: python web/pandas_web.py web/pandas --target-path=web/build
5751

web/pandas/about/team.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ If you want to support pandas development, you can find information in the [dona
99
## Active maintainers
1010

1111
<div class="card-group maintainers">
12-
{% for person in maintainers.active_with_github_info %}
12+
{% for username in maintainers.active %}
13+
{% set person = maintainers.github_info.get(username) %}
1314
<div class="card">
1415
<img class="card-img-top" alt="" src="{{ person.avatar_url }}"/>
1516
<div class="card-body">
@@ -67,7 +68,8 @@ The project governance is available in the [project governance page](governance.
6768
## Inactive maintainers
6869

6970
<ul>
70-
{% for person in maintainers.inactive_with_github_info %}
71+
{% for username in maintainers.inactive %}
72+
{% set person = maintainers.github_info.get(username) %}
7173
<li>
7274
<a href="{{ person.blog or person.html_url }}">
7375
{{ person.name or person.login }}

web/pandas/config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
main:
22
templates_path: _templates
33
base_template: "layout.html"
4+
production_url: "https://pandas.pydata.org/"
45
ignore:
56
- _templates/layout.html
67
- config.yml
7-
- try.md # the binder page will be added later
88
github_repo_url: pandas-dev/pandas
99
context_preprocessors:
1010
- pandas_web.Preprocessors.current_year

web/pandas_web.py

+59-59
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,6 @@
4343
import requests
4444
import yaml
4545

46-
api_token = os.environ.get("GITHUB_TOKEN")
47-
if api_token is not None:
48-
GITHUB_API_HEADERS = {"Authorization": f"Bearer {api_token}"}
49-
else:
50-
GITHUB_API_HEADERS = {}
51-
5246

5347
class Preprocessors:
5448
"""
@@ -164,37 +158,39 @@ def maintainers_add_info(context):
164158
Given the active maintainers defined in the yaml file, it fetches
165159
the GitHub user information for them.
166160
"""
167-
timestamp = time.time()
168-
169-
cache_file = pathlib.Path("maintainers.json")
170-
if cache_file.is_file():
171-
with open(cache_file) as f:
172-
context["maintainers"] = json.load(f)
173-
# refresh cache after 1 hour
174-
if (timestamp - context["maintainers"]["timestamp"]) < 3_600:
175-
return context
176-
177-
context["maintainers"]["timestamp"] = timestamp
178-
179161
repeated = set(context["maintainers"]["active"]) & set(
180162
context["maintainers"]["inactive"]
181163
)
182164
if repeated:
183165
raise ValueError(f"Maintainers {repeated} are both active and inactive")
184166

185-
for kind in ("active", "inactive"):
186-
context["maintainers"][f"{kind}_with_github_info"] = []
187-
for user in context["maintainers"][kind]:
188-
resp = requests.get(
189-
f"https://github.com/api/users/{user}", headers=GITHUB_API_HEADERS
167+
maintainers_info = {}
168+
for user in (
169+
context["maintainers"]["active"] + context["maintainers"]["inactive"]
170+
):
171+
resp = requests.get(f"https://github.com/api/users/{user}")
172+
if resp.status_code == 403:
173+
sys.stderr.write(
174+
"WARN: GitHub API quota exceeded when fetching maintainers\n"
175+
)
176+
# if we exceed github api quota, we use the github info
177+
# of maintainers saved with the website
178+
resp_bkp = requests.get(
179+
context["main"]["production_url"] + "maintainers.json"
190180
)
191-
if context["ignore_io_errors"] and resp.status_code == 403:
192-
return context
193-
resp.raise_for_status()
194-
context["maintainers"][f"{kind}_with_github_info"].append(resp.json())
181+
resp_bkp.raise_for_status()
182+
maintainers_info = resp_bkp.json()
183+
break
195184

196-
with open(cache_file, "w") as f:
197-
json.dump(context["maintainers"], f)
185+
resp.raise_for_status()
186+
maintainers_info[user] = resp.json()
187+
188+
context["maintainers"]["github_info"] = maintainers_info
189+
190+
# save the data fetched from github to use it in case we exceed
191+
# git github api quota in the future
192+
with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w") as f:
193+
json.dump(maintainers_info, f)
198194

199195
return context
200196

@@ -203,15 +199,20 @@ def home_add_releases(context):
203199
context["releases"] = []
204200

205201
github_repo_url = context["main"]["github_repo_url"]
206-
resp = requests.get(
207-
f"https://github.com/api/repos/{github_repo_url}/releases",
208-
headers=GITHUB_API_HEADERS,
209-
)
210-
if context["ignore_io_errors"] and resp.status_code == 403:
211-
return context
212-
resp.raise_for_status()
202+
resp = requests.get(f"https://github.com/api/repos/{github_repo_url}/releases")
203+
if resp.status_code == 403:
204+
sys.stderr.write("WARN: GitHub API quota exceeded when fetching releases\n")
205+
resp_bkp = requests.get(context["main"]["production_url"] + "releases.json")
206+
resp_bkp.raise_for_status()
207+
releases = resp_bkp.json()
208+
else:
209+
resp.raise_for_status()
210+
releases = resp.json()
211+
212+
with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f:
213+
json.dump(releases, f, default=datetime.datetime.isoformat)
213214

214-
for release in resp.json():
215+
for release in releases:
215216
if release["prerelease"]:
216217
continue
217218
published = datetime.datetime.strptime(
@@ -229,6 +230,7 @@ def home_add_releases(context):
229230
),
230231
}
231232
)
233+
232234
return context
233235

234236
@staticmethod
@@ -273,15 +275,22 @@ def roadmap_pdeps(context):
273275
github_repo_url = context["main"]["github_repo_url"]
274276
resp = requests.get(
275277
"https://github.com/api/search/issues?"
276-
f"q=is:pr is:open label:PDEP repo:{github_repo_url}",
277-
headers=GITHUB_API_HEADERS,
278+
f"q=is:pr is:open label:PDEP repo:{github_repo_url}"
278279
)
279-
if context["ignore_io_errors"] and resp.status_code == 403:
280-
return context
281-
resp.raise_for_status()
280+
if resp.status_code == 403:
281+
sys.stderr.write("WARN: GitHub API quota exceeded when fetching pdeps\n")
282+
resp_bkp = requests.get(context["main"]["production_url"] + "pdeps.json")
283+
resp_bkp.raise_for_status()
284+
pdeps = resp_bkp.json()
285+
else:
286+
resp.raise_for_status()
287+
pdeps = resp.json()
282288

283-
for pdep in resp.json()["items"]:
284-
context["pdeps"]["under_discussion"].append(
289+
with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w") as f:
290+
json.dump(pdeps, f)
291+
292+
for pdep in pdeps["items"]:
293+
context["pdeps"]["Under discussion"].append(
285294
{"title": pdep["title"], "url": pdep["url"]}
286295
)
287296

@@ -314,7 +323,7 @@ def get_callable(obj_as_str: str) -> object:
314323
return obj
315324

316325

317-
def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
326+
def get_context(config_fname: str, **kwargs):
318327
"""
319328
Load the config yaml as the base context, and enrich it with the
320329
information added by the context preprocessors defined in the file.
@@ -323,7 +332,6 @@ def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
323332
context = yaml.safe_load(f)
324333

325334
context["source_path"] = os.path.dirname(config_fname)
326-
context["ignore_io_errors"] = ignore_io_errors
327335
context.update(kwargs)
328336

329337
preprocessors = (
@@ -361,7 +369,9 @@ def extend_base_template(content: str, base_template: str) -> str:
361369

362370

363371
def main(
364-
source_path: str, target_path: str, base_url: str, ignore_io_errors: bool
372+
source_path: str,
373+
target_path: str,
374+
base_url: str,
365375
) -> int:
366376
"""
367377
Copy every file in the source directory to the target directory.
@@ -375,7 +385,7 @@ def main(
375385
os.makedirs(target_path, exist_ok=True)
376386

377387
sys.stderr.write("Generating context...\n")
378-
context = get_context(config_fname, ignore_io_errors, base_url=base_url)
388+
context = get_context(config_fname, base_url=base_url, target_path=target_path)
379389
sys.stderr.write("Context generated\n")
380390

381391
templates_path = os.path.join(source_path, context["main"]["templates_path"])
@@ -419,15 +429,5 @@ def main(
419429
parser.add_argument(
420430
"--base-url", default="", help="base url where the website is served from"
421431
)
422-
parser.add_argument(
423-
"--ignore-io-errors",
424-
action="store_true",
425-
help="do not fail if errors happen when fetching "
426-
"data from http sources, and those fail "
427-
"(mostly useful to allow GitHub quota errors "
428-
"when running the script locally)",
429-
)
430432
args = parser.parse_args()
431-
sys.exit(
432-
main(args.source_path, args.target_path, args.base_url, args.ignore_io_errors)
433-
)
433+
sys.exit(main(args.source_path, args.target_path, args.base_url))

0 commit comments

Comments
 (0)