Skip to content

Commit 9da3f7d

Browse files
[sliding window] revert and deprecate (#39301)
* bring back and deprecate * oops --------- Co-authored-by: Cyril Vallez <[email protected]>
1 parent b2fbc02 commit 9da3f7d

File tree

4 files changed

+71
-18
lines changed

4 files changed

+71
-18
lines changed

src/transformers/models/cohere2/configuration_cohere2.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2020
# See the License for the specific language governing permissions and
2121
# limitations under the License.
22+
import warnings
23+
2224
from ...configuration_utils import PretrainedConfig, layer_type_validation
2325
from ...modeling_rope_utils import rope_config_validation
2426

@@ -216,14 +218,29 @@ def __init__(
216218
**kwargs,
217219
)
218220

221+
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
222+
self._sliding_window_pattern = kwargs.get("sliding_window_pattern", 4)
223+
219224
if self.layer_types is None:
220225
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
221-
sliding_window_pattern = getattr(self, "sliding_window_pattern", 4)
226+
self._sliding_window_pattern = getattr(self, "sliding_window_pattern", 4)
222227
self.layer_types = [
223-
"sliding_attention" if bool((i + 1) % sliding_window_pattern) else "full_attention"
228+
"sliding_attention" if bool((i + 1) % self._sliding_window_pattern) else "full_attention"
224229
for i in range(self.num_hidden_layers)
225230
]
226231
layer_type_validation(self.layer_types)
227232

233+
@property
234+
def sliding_window_pattern(self):
235+
warnings.warn(
236+
"The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.",
237+
FutureWarning,
238+
)
239+
return self._sliding_window_pattern
240+
241+
@sliding_window_pattern.setter
242+
def sliding_window_pattern(self, value):
243+
self._sliding_window_pattern = value
244+
228245

229246
__all__ = ["Cohere2Config"]

src/transformers/models/cohere2/modular_cohere2.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16+
import warnings
1617
from typing import Callable, Optional
1718

1819
import torch
@@ -238,15 +239,30 @@ def __init__(
238239
**kwargs,
239240
)
240241

242+
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
243+
self._sliding_window_pattern = kwargs.get("sliding_window_pattern", 4)
244+
241245
if self.layer_types is None:
242246
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
243-
sliding_window_pattern = getattr(self, "sliding_window_pattern", 4)
247+
self._sliding_window_pattern = getattr(self, "sliding_window_pattern", 4)
244248
self.layer_types = [
245-
"sliding_attention" if bool((i + 1) % sliding_window_pattern) else "full_attention"
249+
"sliding_attention" if bool((i + 1) % self._sliding_window_pattern) else "full_attention"
246250
for i in range(self.num_hidden_layers)
247251
]
248252
layer_type_validation(self.layer_types)
249253

254+
@property
255+
def sliding_window_pattern(self):
256+
warnings.warn(
257+
"The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.",
258+
FutureWarning,
259+
)
260+
return self._sliding_window_pattern
261+
262+
@sliding_window_pattern.setter
263+
def sliding_window_pattern(self, value):
264+
self._sliding_window_pattern = value
265+
250266

251267
class Cohere2RotaryEmbedding(CohereRotaryEmbedding):
252268
pass

src/transformers/models/gemma3/configuration_gemma3.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
2020
# See the License for the specific language governing permissions and
2121
# limitations under the License.
22+
import warnings
2223
from typing import Any, Optional, Union
2324

2425
from ...configuration_utils import PretrainedConfig, layer_type_validation
@@ -145,10 +146,6 @@ class Gemma3TextConfig(PretrainedConfig):
145146
>>> # Accessing the model configuration
146147
>>> configuration = model.config
147148
```
148-
rope_local_base_freq (float, *optional*, defaults to 10000.0):
149-
The base period of the RoPE embeddings for local attention.
150-
sliding_window_pattern (`int`, *optional*, defaults to 6):
151-
Pattern for the sliding window attention.
152149
"""
153150

154151
model_type = "gemma3_text"
@@ -230,15 +227,28 @@ def __init__(
230227
self.rope_scaling = rope_scaling
231228
rope_config_validation(self)
232229

230+
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
231+
self._sliding_window_pattern = kwargs.get("sliding_window_pattern", 6)
232+
233233
if self.layer_types is None:
234-
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
235-
sliding_window_pattern = getattr(self, "sliding_window_pattern", 6)
236234
self.layer_types = [
237-
"sliding_attention" if bool((i + 1) % sliding_window_pattern) else "full_attention"
235+
"sliding_attention" if bool((i + 1) % self._sliding_window_pattern) else "full_attention"
238236
for i in range(self.num_hidden_layers)
239237
]
240238
layer_type_validation(self.layer_types)
241239

240+
@property
241+
def sliding_window_pattern(self):
242+
warnings.warn(
243+
"The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.",
244+
FutureWarning,
245+
)
246+
return self._sliding_window_pattern
247+
248+
@sliding_window_pattern.setter
249+
def sliding_window_pattern(self, value):
250+
self._sliding_window_pattern = value
251+
242252

243253
class Gemma3Config(PretrainedConfig):
244254
r"""

src/transformers/models/gemma3/modular_gemma3.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
import copy
17+
import warnings
1718
from collections.abc import Callable
1819
from typing import Any, Optional, Union
1920

@@ -171,10 +172,6 @@ class Gemma3TextConfig(Gemma2Config, PretrainedConfig):
171172
>>> # Accessing the model configuration
172173
>>> configuration = model.config
173174
```
174-
rope_local_base_freq (float, *optional*, defaults to 10000.0):
175-
The base period of the RoPE embeddings for local attention.
176-
sliding_window_pattern (`int`, *optional*, defaults to 6):
177-
Pattern for the sliding window attention.
178175
"""
179176

180177
model_type = "gemma3_text"
@@ -241,15 +238,28 @@ def __init__(
241238
self.rope_scaling = rope_scaling
242239
rope_config_validation(self)
243240

241+
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
242+
self._sliding_window_pattern = kwargs.get("sliding_window_pattern", 6)
243+
244244
if self.layer_types is None:
245-
# BC -> the pattern used to be a simple int, and it's still present in configs on the Hub
246-
sliding_window_pattern = getattr(self, "sliding_window_pattern", 6)
247245
self.layer_types = [
248-
"sliding_attention" if bool((i + 1) % sliding_window_pattern) else "full_attention"
246+
"sliding_attention" if bool((i + 1) % self._sliding_window_pattern) else "full_attention"
249247
for i in range(self.num_hidden_layers)
250248
]
251249
layer_type_validation(self.layer_types)
252250

251+
@property
252+
def sliding_window_pattern(self):
253+
warnings.warn(
254+
"The `sliding_window_pattern` attribute is deprecated and will be removed in v4.55.0.",
255+
FutureWarning,
256+
)
257+
return self._sliding_window_pattern
258+
259+
@sliding_window_pattern.setter
260+
def sliding_window_pattern(self, value):
261+
self._sliding_window_pattern = value
262+
253263

254264
class Gemma3Config(PretrainedConfig):
255265
r"""

0 commit comments

Comments
 (0)