@@ -41,21 +41,42 @@ impl<'a> StringFormat<'a> {
41
41
config,
42
42
}
43
43
}
44
+
45
+ /// Returns the maximum number of graphemes that is possible on a line while taking the
46
+ /// indentation into account.
47
+ ///
48
+ /// If we cannot put at least a single character per line, the rewrite won't succeed.
49
+ fn max_chars_with_indent ( & self ) -> Option < usize > {
50
+ Some (
51
+ self . shape
52
+ . width
53
+ . checked_sub ( self . opener . len ( ) + self . line_end . len ( ) + 1 ) ? + 1 ,
54
+ )
55
+ }
56
+
57
+ /// Like max_chars_with_indent but the indentation is not substracted.
58
+ /// This allows to fit more graphemes from the string on a line when
59
+ /// SnippetState::Overflow.
60
+ fn max_chars_without_indent ( & self ) -> Option < usize > {
61
+ Some ( self . config . max_width ( ) . checked_sub ( self . line_end . len ( ) ) ?)
62
+ }
44
63
}
45
64
46
- // FIXME: simplify this!
47
65
pub fn rewrite_string < ' a > (
48
66
orig : & str ,
49
67
fmt : & StringFormat < ' a > ,
50
68
max_width : Option < usize > ,
51
69
) -> Option < String > {
70
+ let max_chars_with_indent = fmt. max_chars_with_indent ( ) ?;
71
+ let max_chars_without_indent = fmt. max_chars_without_indent ( ) ?;
72
+ let indent = fmt. shape . indent . to_string_with_newline ( fmt. config ) ;
73
+
52
74
// Strip line breaks.
53
- let re = Regex :: new ( r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" ) . unwrap ( ) ;
54
- let stripped_str = re. replace_all ( orig, "$1" ) ;
75
+ // With this regex applied, all remaining whitespaces are significant
76
+ let strip_line_breaks_re = Regex :: new ( r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" ) . unwrap ( ) ;
77
+ let stripped_str = strip_line_breaks_re. replace_all ( orig, "$1" ) ;
55
78
56
79
let graphemes = UnicodeSegmentation :: graphemes ( & * stripped_str, false ) . collect :: < Vec < & str > > ( ) ;
57
- let shape = fmt. shape ;
58
- let indent = shape. indent . to_string_with_newline ( fmt. config ) ;
59
80
60
81
// `cur_start` is the position in `orig` of the start of the current line.
61
82
let mut cur_start = 0 ;
@@ -67,84 +88,129 @@ pub fn rewrite_string<'a>(
67
88
) ;
68
89
result. push_str ( fmt. opener ) ;
69
90
70
- let ender_length = fmt. line_end . len ( ) ;
71
- // If we cannot put at least a single character per line, the rewrite won't
72
- // succeed.
73
- let mut max_chars = shape
74
- . width
75
- . checked_sub ( fmt. opener . len ( ) + ender_length + 1 ) ? + 1 ;
76
-
77
- // Snip a line at a time from `orig` until it is used up. Push the snippet
91
+ // Snip a line at a time from `stripped_str` until it is used up. Push the snippet
78
92
// onto result.
79
- ' outer: loop {
80
- // `cur_end` will be where we break the line, as an offset into `orig`.
81
- // Initialised to the maximum it could be (which may be beyond `orig`).
82
- let mut cur_end = cur_start + max_chars;
83
-
84
- // We can fit the rest of the string on this line, so we're done.
85
- if cur_end >= graphemes. len ( ) {
86
- let line = & graphemes[ cur_start..] . join ( "" ) ;
87
- result. push_str ( line) ;
88
- break ' outer;
93
+ let mut cur_max_chars = max_chars_with_indent;
94
+ loop {
95
+ // All the input starting at cur_start fits on the current line
96
+ if graphemes. len ( ) - cur_start <= cur_max_chars {
97
+ result. push_str ( & graphemes[ cur_start..] . join ( "" ) ) ;
98
+ break ;
89
99
}
90
100
91
- // Push cur_end left until we reach whitespace (or the line is too small).
92
- while !is_whitespace ( graphemes[ cur_end - 1 ] ) {
93
- cur_end -= 1 ;
94
- if cur_end < cur_start + MIN_STRING {
95
- // We couldn't find whitespace before the string got too small.
96
- // So start again at the max length and look for punctuation.
97
- cur_end = cur_start + max_chars;
98
- while !is_punctuation ( graphemes[ cur_end - 1 ] ) {
99
- cur_end -= 1 ;
100
-
101
- // If we can't break at whitespace or punctuation, grow the string instead.
102
- if cur_end < cur_start + MIN_STRING {
103
- cur_end = cur_start + max_chars;
104
- while !( is_punctuation ( graphemes[ cur_end - 1 ] )
105
- || is_whitespace ( graphemes[ cur_end - 1 ] ) )
106
- {
107
- cur_end += 1 ;
108
- if cur_end == graphemes. len ( ) {
109
- let line = & graphemes[ cur_start..] . join ( "" ) ;
110
- result. push_str ( line) ;
111
- break ' outer;
112
- }
113
- }
114
- break ;
115
- }
116
- }
101
+ // The input starting at cur_start needs to be broken
102
+ match break_string ( cur_max_chars, fmt. trim_end , & graphemes[ cur_start..] ) {
103
+ SnippetState :: LineEnd ( line, len) => {
104
+ result. push_str ( & line) ;
105
+ result. push_str ( fmt. line_end ) ;
106
+ result. push_str ( & indent) ;
107
+ result. push_str ( fmt. line_start ) ;
108
+ cur_max_chars = max_chars_with_indent;
109
+ cur_start += len;
110
+ }
111
+ SnippetState :: Overflow ( line, len) => {
112
+ result. push_str ( & line) ;
113
+ cur_max_chars = max_chars_without_indent;
114
+ cur_start += len;
115
+ }
116
+ SnippetState :: EndOfInput ( line) => {
117
+ result. push_str ( & line) ;
117
118
break ;
118
119
}
119
120
}
120
- // Make sure there is no whitespace to the right of the break.
121
- while cur_end < stripped_str. len ( ) && is_whitespace ( graphemes[ cur_end] ) {
122
- cur_end += 1 ;
123
- }
121
+ }
124
122
125
- // Make the current line and add it on to result.
126
- let raw_line = graphemes[ cur_start..cur_end] . join ( "" ) ;
127
- let line = if fmt. trim_end {
128
- raw_line. trim ( )
129
- } else {
130
- raw_line. as_str ( )
131
- } ;
123
+ result. push_str ( fmt. closer ) ;
124
+ wrap_str ( result, fmt. config . max_width ( ) , fmt. shape )
125
+ }
132
126
133
- result. push_str ( line) ;
134
- result. push_str ( fmt. line_end ) ;
135
- result. push_str ( & indent) ;
136
- result. push_str ( fmt. line_start ) ;
127
+ /// Result of breaking a string so it fits in a line and the state it ended in.
128
+ /// The state informs about what to do with the snippet and how to continue the breaking process.
129
+ #[ derive( Debug , PartialEq ) ]
130
+ enum SnippetState {
131
+ /// The input could not be broken and so rewriting the string is finished.
132
+ EndOfInput ( String ) ,
133
+ /// The input could be broken and the returned snippet should be ended with a
134
+ /// `[StringFormat::line_end]`. The next snippet needs to be indented.
135
+ LineEnd ( String , usize ) ,
136
+ /// The input could be broken but the returned snippet should not be ended with a
137
+ /// `[StringFormat::line_end]` because the whitespace is significant. Therefore, the next
138
+ /// snippet should not be indented.
139
+ Overflow ( String , usize ) ,
140
+ }
137
141
138
- // The next line starts where the current line ends.
139
- cur_start = cur_end;
142
+ /// Break the input string at a boundary character around the offset `max_chars`. A boundary
143
+ /// character is either a punctuation or a whitespace.
144
+ fn break_string ( max_chars : usize , trim_end : bool , input : & [ & str ] ) -> SnippetState {
145
+ let break_at = |index /* grapheme at index is included */ | {
146
+ // Take in any whitespaces to the left/right of `input[index]` and
147
+ // check if there is a line feed, in which case whitespaces needs to be kept.
148
+ let mut index_minus_ws = index;
149
+ for ( i, grapheme) in input[ 0 ..=index] . iter ( ) . enumerate ( ) . rev ( ) {
150
+ if !trim_end && is_line_feed ( grapheme) {
151
+ return SnippetState :: Overflow ( input[ 0 ..=i] . join ( "" ) . to_string ( ) , i + 1 ) ;
152
+ } else if !is_whitespace ( grapheme) {
153
+ index_minus_ws = i;
154
+ break ;
155
+ }
156
+ }
157
+ let mut index_plus_ws = index;
158
+ for ( i, grapheme) in input[ index + 1 ..] . iter ( ) . enumerate ( ) {
159
+ if !trim_end && is_line_feed ( grapheme) {
160
+ return SnippetState :: Overflow (
161
+ input[ 0 ..=index + 1 + i] . join ( "" ) . to_string ( ) ,
162
+ index + 2 + i,
163
+ ) ;
164
+ } else if !is_whitespace ( grapheme) {
165
+ index_plus_ws = index + i;
166
+ break ;
167
+ }
168
+ }
140
169
141
- if let Some ( new_max_chars) = max_width {
142
- max_chars = new_max_chars. checked_sub ( fmt. opener . len ( ) + ender_length + 1 ) ? + 1 ;
170
+ if trim_end {
171
+ SnippetState :: LineEnd (
172
+ input[ 0 ..=index_minus_ws] . join ( "" ) . to_string ( ) ,
173
+ index_plus_ws + 1 ,
174
+ )
175
+ } else {
176
+ SnippetState :: LineEnd (
177
+ input[ 0 ..=index_plus_ws] . join ( "" ) . to_string ( ) ,
178
+ index_plus_ws + 1 ,
179
+ )
143
180
}
181
+ } ;
182
+
183
+ // Find the position in input for breaking the string
184
+ match input[ 0 ..max_chars]
185
+ . iter ( )
186
+ . rposition ( |grapheme| is_whitespace ( grapheme) )
187
+ {
188
+ // Found a whitespace and what is on its left side is big enough.
189
+ Some ( index) if index >= MIN_STRING => break_at ( index) ,
190
+ // No whitespace found, try looking for a punctuation instead
191
+ _ => match input[ 0 ..max_chars]
192
+ . iter ( )
193
+ . rposition ( |grapheme| is_punctuation ( grapheme) )
194
+ {
195
+ // Found a punctuation and what is on its left side is big enough.
196
+ Some ( index) if index >= MIN_STRING => break_at ( index) ,
197
+ // Either no boundary character was found to the left of `input[max_chars]`, or the line
198
+ // got too small. We try searching for a boundary character to the right.
199
+ _ => match input[ max_chars..]
200
+ . iter ( )
201
+ . position ( |grapheme| is_whitespace ( grapheme) || is_punctuation ( grapheme) )
202
+ {
203
+ // A boundary was found after the line limit
204
+ Some ( index) => break_at ( max_chars + index) ,
205
+ // No boundary to the right, the input cannot be broken
206
+ None => SnippetState :: EndOfInput ( input. join ( "" ) . to_string ( ) ) ,
207
+ } ,
208
+ } ,
144
209
}
210
+ }
145
211
146
- result . push_str ( fmt . closer ) ;
147
- wrap_str ( result , fmt . config . max_width ( ) , fmt . shape )
212
+ fn is_line_feed ( grapheme : & str ) -> bool {
213
+ grapheme . as_bytes ( ) [ 0 ] == b'\n'
148
214
}
149
215
150
216
fn is_whitespace ( grapheme : & str ) -> bool {
@@ -160,13 +226,99 @@ fn is_punctuation(grapheme: &str) -> bool {
160
226
161
227
#[ cfg( test) ]
162
228
mod test {
163
- use super :: { rewrite_string, StringFormat } ;
229
+ use super :: { break_string , rewrite_string, SnippetState , StringFormat } ;
164
230
use shape:: { Indent , Shape } ;
231
+ use unicode_segmentation:: UnicodeSegmentation ;
165
232
166
233
#[ test]
167
234
fn issue343 ( ) {
168
235
let config = Default :: default ( ) ;
169
236
let fmt = StringFormat :: new ( Shape :: legacy ( 2 , Indent :: empty ( ) ) , & config) ;
170
237
rewrite_string ( "eq_" , & fmt, None ) ;
171
238
}
239
+
240
+ #[ test]
241
+ fn should_break_on_whitespace ( ) {
242
+ let string = "Placerat felis. Mauris porta ante sagittis purus." ;
243
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
244
+ assert_eq ! (
245
+ break_string( 20 , false , & graphemes[ ..] ) ,
246
+ SnippetState :: LineEnd ( "Placerat felis. " . to_string( ) , 16 )
247
+ ) ;
248
+ assert_eq ! (
249
+ break_string( 20 , true , & graphemes[ ..] ) ,
250
+ SnippetState :: LineEnd ( "Placerat felis." . to_string( ) , 16 )
251
+ ) ;
252
+ }
253
+
254
+ #[ test]
255
+ fn should_break_on_punctuation ( ) {
256
+ let string = "Placerat_felis._Mauris_porta_ante_sagittis_purus." ;
257
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
258
+ assert_eq ! (
259
+ break_string( 20 , false , & graphemes[ ..] ) ,
260
+ SnippetState :: LineEnd ( "Placerat_felis." . to_string( ) , 15 )
261
+ ) ;
262
+ }
263
+
264
+ #[ test]
265
+ fn should_break_forward ( ) {
266
+ let string = "Venenatis_tellus_vel_tellus. Aliquam aliquam dolor at justo." ;
267
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
268
+ assert_eq ! (
269
+ break_string( 20 , false , & graphemes[ ..] ) ,
270
+ SnippetState :: LineEnd ( "Venenatis_tellus_vel_tellus. " . to_string( ) , 29 )
271
+ ) ;
272
+ assert_eq ! (
273
+ break_string( 20 , true , & graphemes[ ..] ) ,
274
+ SnippetState :: LineEnd ( "Venenatis_tellus_vel_tellus." . to_string( ) , 29 )
275
+ ) ;
276
+ }
277
+
278
+ #[ test]
279
+ fn nothing_to_break ( ) {
280
+ let string = "Venenatis_tellus_vel_tellus" ;
281
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
282
+ assert_eq ! (
283
+ break_string( 20 , false , & graphemes[ ..] ) ,
284
+ SnippetState :: EndOfInput ( "Venenatis_tellus_vel_tellus" . to_string( ) )
285
+ ) ;
286
+ }
287
+
288
+ #[ test]
289
+ fn significant_whitespaces ( ) {
290
+ let string = "Neque in sem. \n Pellentesque tellus augue." ;
291
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
292
+ assert_eq ! (
293
+ break_string( 15 , false , & graphemes[ ..] ) ,
294
+ SnippetState :: Overflow ( "Neque in sem. \n " . to_string( ) , 20 )
295
+ ) ;
296
+ assert_eq ! (
297
+ break_string( 25 , false , & graphemes[ ..] ) ,
298
+ SnippetState :: Overflow ( "Neque in sem. \n " . to_string( ) , 20 )
299
+ ) ;
300
+ // if `StringFormat::line_end` is true, then the line feed does not matter anymore
301
+ assert_eq ! (
302
+ break_string( 15 , true , & graphemes[ ..] ) ,
303
+ SnippetState :: LineEnd ( "Neque in sem." . to_string( ) , 26 )
304
+ ) ;
305
+ assert_eq ! (
306
+ break_string( 25 , true , & graphemes[ ..] ) ,
307
+ SnippetState :: LineEnd ( "Neque in sem." . to_string( ) , 26 )
308
+ ) ;
309
+ }
310
+
311
+ #[ test]
312
+ fn big_whitespace ( ) {
313
+ let string = "Neque in sem. Pellentesque tellus augue." ;
314
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
315
+ assert_eq ! (
316
+ break_string( 20 , false , & graphemes[ ..] ) ,
317
+ SnippetState :: LineEnd ( "Neque in sem. " . to_string( ) , 25 )
318
+ ) ;
319
+ assert_eq ! (
320
+ break_string( 20 , true , & graphemes[ ..] ) ,
321
+ SnippetState :: LineEnd ( "Neque in sem." . to_string( ) , 25 )
322
+ ) ;
323
+ }
172
324
}
0 commit comments