Skip to content

Consistent logging #270

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 16, 2025
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ See [GitHub releases](https://github.com/pyOpenSci/pyosMeta/releases) page for a

## [Unreleased]

* Use a consistent logger for informational/debug outputs. Using print statements can make it tough to track down which line of code emitted the message and using the `warnings` module will suppress recurring warnings.
* Added `tqdm` as a dependency to improve progress monitoring when running data processing scripts (@banesullivan)

## [v1.6] - 2025-02-17

## What's Changed
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"python-dotenv",
"requests",
"ruamel-yaml>=0.17.21",
"tqdm",
]
# This is metadata that pip reads to understand what Python versions your package supports
requires-python = ">=3.10"
Expand All @@ -42,7 +43,7 @@ dev = [
"pre-commit",
"pytest",
"pytest-cov",
"pytest-mock"
"pytest-mock",
]

[project.urls]
Expand Down
13 changes: 8 additions & 5 deletions src/pyosmeta/cli/process_reviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from pyosmeta import ProcessIssues
from pyosmeta.github_api import GitHubAPI
from pyosmeta.logging import logger


def main():
Expand All @@ -40,15 +41,17 @@ def main():
# Get all issues for approved packages - load as dict
issues = process_review.get_issues()
accepted_reviews, errors = process_review.parse_issues(issues)
for url, error in errors.items():
print(f"Error in review at url: {url}")
print(error)
print("-" * 20)
if len(errors):
if errors:
logger.error("Errors found when parsing reviews (printed to stdout):")
for url, error in errors.items():
print(f"Error in review at url: {url}")
print(error)
print("-" * 20)
raise RuntimeError("Errors in parsing reviews, see printout above")

# Update gh metrics via api for all packages
# Contrib count is only available via rest api
logger.info("Getting GitHub metrics for all packages...")
repo_paths = process_review.get_repo_paths(accepted_reviews)
all_reviews = github_api.get_gh_metrics(repo_paths, accepted_reviews)

Expand Down
38 changes: 21 additions & 17 deletions src/pyosmeta/cli/update_contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@
from datetime import datetime

from pydantic import ValidationError
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

from pyosmeta.contributors import ProcessContributors
from pyosmeta.file_io import create_paths, load_pickle, open_yml_file
from pyosmeta.github_api import GitHubAPI
from pyosmeta.logging import logger
from pyosmeta.models import PersonModel


Expand Down Expand Up @@ -64,29 +67,28 @@ def main():

# Populate all existing contribs into model objects
all_contribs = {}
for a_contrib in web_contribs:
print(a_contrib["github_username"])
try:
all_contribs[a_contrib["github_username"].lower()] = PersonModel(
**a_contrib
)
except ValidationError as ve:
print(a_contrib["github_username"])
print(ve)

print("Done processing all-contribs")
for a_contrib in tqdm(web_contribs, desc="Processing all-contribs"):
username = a_contrib["github_username"]
tqdm.write(f"Processing {username}")
with logging_redirect_tqdm():
try:
all_contribs[username.lower()] = PersonModel(**a_contrib)
except ValidationError:
logger.error(f"Error processing {username}", exc_info=True)

# Create a list of all contributors across repositories
github_api = GitHubAPI()
process_contribs = ProcessContributors(github_api, json_files)
bot_all_contribs = process_contribs.combine_json_data()

print("Updating contrib types and searching for new users now")
for key, users in bot_all_contribs.items():
for key, users in tqdm(
bot_all_contribs.items(),
desc="Updating contrib types and searching for new users",
):
for gh_user in users:
# Find and populate data for any new contributors
if gh_user not in all_contribs.keys():
print("Missing", gh_user, "Adding them now")
logger.info(f"Missing {gh_user}, adding them now")
new_contrib = process_contribs.return_user_info(gh_user)
new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
all_contribs[gh_user] = PersonModel(**new_contrib)
Expand All @@ -95,8 +97,8 @@ def main():
all_contribs[gh_user].add_unique_value("contributor_type", key)

if update_all:
for user in all_contribs.keys():
print("Updating all user info from github", user)
for user in tqdm(all_contribs.keys(), dec="Updating all user info"):
tqdm.write("Updating all user info from github for {user}")
new_gh_data = process_contribs.return_user_info(user)

# TODO: turn this into a small update method
Expand Down Expand Up @@ -127,7 +129,9 @@ def main():
try:
setattr(data, "date_added", history[user])
except KeyError:
print(f"Username {user} must be new, skipping")
logger.error(
f"Username {user} must be new, skipping", exc_info=True
)

# Export to pickle which supports updates after parsing reviews
with open("all_contribs.pickle", "wb") as f:
Expand Down
69 changes: 42 additions & 27 deletions src/pyosmeta/cli/update_review_teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@
from datetime import datetime

from pydantic import ValidationError
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm

from pyosmeta.contributors import ProcessContributors
from pyosmeta.file_io import clean_export_yml, load_pickle
from pyosmeta.github_api import GitHubAPI
from pyosmeta.logging import logger
from pyosmeta.models import PersonModel, ReviewModel, ReviewUser
from pyosmeta.utils_clean import get_clean_user

Expand Down Expand Up @@ -106,13 +109,16 @@ def process_user(
if gh_user not in contribs.keys():
# If they aren't in the existing contribs.yml data, add them by using
# their github username and hitting the github api
print("Found a new contributor!", gh_user)
logger.info(f"Found a new contributor: {gh_user}")
new_contrib = processor.return_user_info(gh_user)
new_contrib["date_added"] = datetime.now().strftime("%Y-%m-%d")
try:
contribs[gh_user] = PersonModel(**new_contrib)
except ValidationError as ve:
print(ve)
except ValidationError:
logger.error(
f"Error processing new contributor {gh_user}. Skipping this user.",
exc_info=True,
)

# Update user the list of contribution types if there are new types to add
# for instance a new reviewer would have a "Reviewer" contributor type
Expand Down Expand Up @@ -143,33 +149,42 @@ def main():

contrib_types = process_contribs.contrib_types

for pkg_name, review in packages.items():
print("Processing review team for:", pkg_name)
for role in contrib_types.keys():
user: list[ReviewUser] | ReviewUser = getattr(review, role)

# Eic is a newer field, so in some instances it will be empty
# if it's empty print a message noting the data are missing
if user:
# Handle lists or single users separately
if isinstance(user, list):
for i, a_user in enumerate(user):
a_user, contribs = process_user(
a_user, role, pkg_name, contribs, process_contribs
for pkg_name, review in tqdm(
packages.items(), desc="Processing review teams"
):
with logging_redirect_tqdm():
tqdm.write(f"Processing review team for: {pkg_name}")
for role in contrib_types.keys():
user: list[ReviewUser] | ReviewUser = getattr(review, role)

# Eic is a newer field, so in some instances it will be empty
# if it's empty log a message noting the data are missing
if user:
# Handle lists or single users separately
if isinstance(user, list):
for i, a_user in enumerate(user):
a_user, contribs = process_user(
a_user,
role,
pkg_name,
contribs,
process_contribs,
)
# Update individual user in reference to issue list
user[i] = a_user
elif isinstance(user, ReviewUser):
user, contribs = process_user(
user, role, pkg_name, contribs, process_contribs
)
setattr(review, role, user)
else:
raise TypeError(
"Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
)
# Update individual user in reference to issue list
user[i] = a_user
elif isinstance(user, ReviewUser):
user, contribs = process_user(
user, role, pkg_name, contribs, process_contribs
)
setattr(review, role, user)
else:
raise TypeError(
"Keys in the `contrib_types` map must be a `ReviewUser` or `list[ReviewUser]` in the `ReviewModel`"
logger.warning(
f"I can't find a username for {role} under {pkg_name}. Moving on."
)
else:
print(f"I can't find a username for {role}. Moving on.")

# Export to yaml
contribs_ls = [model.model_dump() for model in contribs.values()]
Expand Down
15 changes: 10 additions & 5 deletions src/pyosmeta/contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import requests

from .github_api import GitHubAPI
from .logging import logger


@dataclass
Expand Down Expand Up @@ -102,8 +103,10 @@
"""
try:
response = requests.get(json_path)
except Exception as ae:
print(ae)
except Exception:
logger.error(

Check warning on line 107 in src/pyosmeta/contributors.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/contributors.py#L106-L107

Added lines #L106 - L107 were not covered by tests
f"Error loading json file: {json_path}", exec_info=True
)
return json.loads(response.text)

def process_json_file(self, json_file: str) -> Tuple[str, List]:
Expand Down Expand Up @@ -150,8 +153,10 @@
try:
key, users = self.process_json_file(json_file)
combined_data[key] = users
except Exception as e:
print("Oops - can't process", json_file, e)
except Exception:
logger.error(

Check warning on line 157 in src/pyosmeta/contributors.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/contributors.py#L156-L157

Added lines #L156 - L157 were not covered by tests
f"Oops - can't process: {json_file}", exc_info=True
)
return combined_data

def return_user_info(
Expand Down Expand Up @@ -269,6 +274,6 @@

# If the user is not in the web dict, add them
else:
print("New user found. Adding: ", gh_user)
logger.info(f"New user found. Adding: {gh_user}")
webDict[gh_user] = repoDict[gh_user]
return webDict
6 changes: 4 additions & 2 deletions src/pyosmeta/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import ruamel.yaml
from ruamel.yaml import YAML

from .logging import logger


def load_pickle(filename):
"""Opens a pickle"""
Expand Down Expand Up @@ -84,8 +86,8 @@
with urllib.request.urlopen(file_path) as f:
yaml = YAML(typ="safe", pure=True)
return yaml.load(f)
except urllib.error.URLError as url_error:
print("Oops - can find the url", file_path, url_error)
except urllib.error.URLError:
logger.error(f"Oops - can find the url: {file_path}", exc_info=True)

Check warning on line 90 in src/pyosmeta/file_io.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/file_io.py#L89-L90

Added lines #L89 - L90 were not covered by tests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
logger.error(f"Oops - can find the url: {file_path}", exc_info=True)
logger.error(f"Oops - I can't find the url: {file_path}", exc_info=True)



def export_yaml(filename: str, data_list: list):
Expand Down
13 changes: 7 additions & 6 deletions src/pyosmeta/github_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
numbers, stars and more "health & stability" related metrics
"""

import logging
import os
import time
from dataclasses import dataclass
Expand All @@ -20,6 +19,8 @@

from pyosmeta.models import ReviewModel

from .logging import logger


@dataclass
class GitHubAPI:
Expand Down Expand Up @@ -172,7 +173,7 @@

except requests.HTTPError as exception:
if exception.response.status_code == 401:
logging.error(
logger.error(
"Unauthorized request. Your token may be expired or invalid. Please refresh your token."
)
else:
Expand Down Expand Up @@ -237,7 +238,7 @@
contributors = self._get_response_rest(repo_contribs_url)

if not contributors:
logging.warning(
logger.warning(

Check warning on line 241 in src/pyosmeta/github_api.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/github_api.py#L241

Added line #L241 was not covered by tests
f"Repository not found: {repo_contribs_url}. Did the repo URL change?"
)
return None
Expand Down Expand Up @@ -339,19 +340,19 @@
]["edges"][0]["node"]["committedDate"],
}
elif response.status_code == 404:
logging.warning(
logger.warning(

Check warning on line 343 in src/pyosmeta/github_api.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/github_api.py#L343

Added line #L343 was not covered by tests
f"Repository not found: {repo_info['owner']}/{repo_info['repo_name']}. Did the repo URL change?"
)
return None
elif response.status_code == 403:
logging.warning(
logger.warning(

Check warning on line 348 in src/pyosmeta/github_api.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/github_api.py#L348

Added line #L348 was not covered by tests
f"Oops! You may have hit an API limit for repository: {repo_info['owner']}/{repo_info['repo_name']}.\n"
f"API Response Text: {response.text}\n"
f"API Response Headers: {response.headers}"
)
return None
else:
logging.warning(
logger.warning(

Check warning on line 355 in src/pyosmeta/github_api.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/github_api.py#L355

Added line #L355 was not covered by tests
f"Unexpected HTTP error: {response.status_code} for repository: {repo_info['owner']}/{repo_info['repo_name']}"
)
return None
Expand Down
4 changes: 4 additions & 0 deletions src/pyosmeta/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
11 changes: 8 additions & 3 deletions src/pyosmeta/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
field_validator,
)

from pyosmeta.logging import logger
from pyosmeta.models.github import Labels
from pyosmeta.utils_clean import (
check_url,
Expand Down Expand Up @@ -58,15 +59,19 @@
return url # Returns empty string if url is empty
else:
if url.startswith("http://"):
print(f"{url} 'http://' replacing w 'https://'")
logger.warning(

Check warning on line 62 in src/pyosmeta/models/base.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/models/base.py#L62

Added line #L62 was not covered by tests
f"Oops, http protocol for {url}, changing to https"
)
url = url.replace("http://", "https://")
elif not url.startswith("http"):
print("Oops, missing http")
logger.warning(

Check warning on line 67 in src/pyosmeta/models/base.py

View check run for this annotation

Codecov / codecov/patch

src/pyosmeta/models/base.py#L67

Added line #L67 was not covered by tests
"Oops, missing http protocol for {url}, adding it"
)
url = "https://" + url
if check_url(url=url):
return url
else: # pragma: no cover
print(f"Oops, url `{url}` is not valid, removing it")
logger.warning(f"Oops, url `{url}` is not valid, removing it")
return None


Expand Down
Loading