Skip to content

docs: Make minor edits and fix typos in sample comments #1853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions samples/snippets/data_visualization_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def test_data_visualization() -> None:

noaa_surface = bpd.read_gbq("bigquery-public-data.noaa_gsod.gsod2021")

# Calculate median temperature for each day
# Calculate the median temperature for each day.
noaa_surface_median_temps = noaa_surface[["date", "temp"]].groupby("date").median()

noaa_surface_median_temps.plot.line()
Expand All @@ -37,14 +37,14 @@ def test_data_visualization() -> None:

usa_names = bpd.read_gbq("bigquery-public-data.usa_names.usa_1910_2013")

# Count the occurences of the target names each year. The result is a dataframe with a multi-index.
# Count the occurrences of the target names each year. The result is a DataFrame with a multi-index.
name_counts = (
usa_names[usa_names["name"].isin(("Mary", "Emily", "Lisa"))]
.groupby(("year", "name"))["number"]
.sum()
)

# Flatten the index of the dataframe so that the counts for each name has their own columns.
# Flatten the index of the DataFrame so that the counts for each name have their own columns.
name_counts = name_counts.unstack(level=1).fillna(0)

name_counts.plot.area(stacked=False, alpha=0.5)
Expand All @@ -70,13 +70,13 @@ def test_data_visualization() -> None:
"bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021"
).dropna()

# Data Cleaning
# Data cleaning.
taxi_trips = taxi_trips[
taxi_trips["trip_distance"].between(0, 10, inclusive="right")
]
taxi_trips = taxi_trips[taxi_trips["fare_amount"].between(0, 50, inclusive="right")]

# If you are using partial ordering mode, you will also need to assign an order to your dataset.
# If you're using partial ordering mode, you also need to assign an order to your dataset.
# Otherwise, the next line can be skipped.
taxi_trips = taxi_trips.sort_values("pickup_datetime")

Expand All @@ -88,7 +88,7 @@ def test_data_visualization() -> None:

noaa_surface = bpd.read_gbq("bigquery-public-data.noaa_gsod.gsod2021")

# Calculate median temperature for each day
# Calculate the median temperature for each day.
noaa_surface_median_temps = noaa_surface[["date", "temp"]].groupby("date").median()

noaa_surface_median_temps.plot.line(sampling_n=40)
Expand All @@ -99,14 +99,14 @@ def test_data_visualization() -> None:

usa_names = bpd.read_gbq("bigquery-public-data.usa_names.usa_1910_2013")

# Count the occurences of the target names each year. The result is a dataframe with a multi-index.
# Count the occurrences of the target names each year. The result is a DataFrame with a multi-index.
name_counts = (
usa_names[usa_names["name"].isin(("Mary", "Emily", "Lisa"))]
.groupby(("year", "name"))["number"]
.sum()
)

# Flatten the index of the dataframe so that the counts for each name has their own columns.
# Flatten the index of the DataFrame so that the counts for each name have their own columns.
name_counts = name_counts.unstack(level=1).fillna(0)

name_counts.plot.area(subplots=True, alpha=0.5)
Expand All @@ -119,13 +119,13 @@ def test_data_visualization() -> None:
"bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2021"
).dropna()

# Data Cleaning
# Data cleaning.
taxi_trips = taxi_trips[
taxi_trips["trip_distance"].between(0, 10, inclusive="right")
]
taxi_trips = taxi_trips[taxi_trips["fare_amount"].between(0, 50, inclusive="right")]

# If you are using partial ordering mode, you also need to assign an order to your dataset.
# If you're using partial ordering mode, you also need to assign an order to your dataset.
# Otherwise, the next line can be skipped.
taxi_trips = taxi_trips.sort_values("pickup_datetime")

Expand Down
Loading