Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit c97b0fe

Browse files
Merge pull request #375 from openclimatefix/issue/304-gsp-speed-up
Speed up gsp locations
2 parents 54acae1 + 5891f39 commit c97b0fe

File tree

1 file changed

+41
-22
lines changed

1 file changed

+41
-22
lines changed

nowcasting_dataset/data_sources/gsp/gsp_data_source.py

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -124,35 +124,54 @@ def get_locations(self, t0_datetimes: pd.DatetimeIndex) -> Tuple[List[Number], L
124124
Returns: list of x and y locations
125125
126126
"""
127-
# Pick a random GSP for each t0_datetime, and then grab
128-
# their geographical location.
129-
x_locations = []
130-
y_locations = []
127+
total_gsp_nan_count = self.gsp_power.isna().sum().sum()
128+
if total_gsp_nan_count == 0:
131129

132-
# TODO: Issue 305: Speed up this function by removing this for loop?
133-
for t0_dt in t0_datetimes:
130+
# get random GSP metadata
131+
indexes = list(
132+
self.rng.integers(low=0, high=len(self.metadata), size=len(t0_datetimes))
133+
)
134+
metadata = self.metadata.iloc[indexes]
135+
136+
# get x, y locations
137+
x_centers_osgb = list(metadata.location_x)
138+
y_centers_osgb = list(metadata.location_y)
139+
140+
else:
141+
142+
logger.warning(
143+
"There are some nans in the gsp data, "
144+
"so to get x,y locations we have to do a big loop"
145+
)
146+
147+
# Pick a random GSP for each t0_datetime, and then grab
148+
# their geographical location.
149+
x_centers_osgb = []
150+
y_centers_osgb = []
151+
152+
for t0_dt in t0_datetimes:
134153

135-
# Choose start and end times
136-
start_dt = self._get_start_dt(t0_dt)
137-
end_dt = self._get_end_dt(t0_dt)
154+
# Choose start and end times
155+
start_dt = self._get_start_dt(t0_dt)
156+
end_dt = self._get_end_dt(t0_dt)
138157

139-
# remove any nans
140-
gsp_power = self.gsp_power.loc[start_dt:end_dt].dropna(axis="columns", how="any")
158+
# remove any nans
159+
gsp_power = self.gsp_power.loc[start_dt:end_dt].dropna(axis="columns", how="any")
141160

142-
# get random index
143-
random_gsp_id = self.rng.choice(gsp_power.columns)
144-
meta_data = self.metadata[(self.metadata["gsp_id"] == random_gsp_id)]
161+
# get random index
162+
random_gsp_id = self.rng.choice(gsp_power.columns)
163+
meta_data = self.metadata[(self.metadata["gsp_id"] == random_gsp_id)]
145164

146-
# Make sure there is only one GSP.
147-
# Sometimes there are multiple gsp_ids at one location e.g. 'SELL_1'.
148-
# TODO: Issue #272: Further investigation on multiple GSPs may be needed.
149-
metadata_for_gsp = meta_data.iloc[0]
165+
# Make sure there is only one GSP.
166+
# Sometimes there are multiple gsp_ids at one location e.g. 'SELL_1'.
167+
# TODO: Issue #272: Further investigation on multiple GSPs may be needed.
168+
metadata_for_gsp = meta_data.iloc[0]
150169

151-
# Get metadata for GSP
152-
x_locations.append(metadata_for_gsp.location_x)
153-
y_locations.append(metadata_for_gsp.location_y)
170+
# Get metadata for GSP
171+
x_centers_osgb.append(metadata_for_gsp.location_x)
172+
y_centers_osgb.append(metadata_for_gsp.location_y)
154173

155-
return x_locations, y_locations
174+
return x_centers_osgb, y_centers_osgb
156175

157176
def get_example(
158177
self, t0_dt: pd.Timestamp, x_meters_center: Number, y_meters_center: Number

0 commit comments

Comments
 (0)