@@ -41,21 +41,39 @@ impl<'a> StringFormat<'a> {
41
41
config,
42
42
}
43
43
}
44
+
45
+ /// Returns the maximum number of graphemes that is possible on a line while taking the
46
+ /// indentation into account.
47
+ ///
48
+ /// If we cannot put at least a single character per line, the rewrite won't succeed.
49
+ fn max_chars_with_indent ( & self ) -> Option < usize > {
50
+ Some (
51
+ self . shape
52
+ . width
53
+ . checked_sub ( self . opener . len ( ) + self . line_end . len ( ) + 1 ) ?
54
+ + 1 ,
55
+ )
56
+ }
57
+
58
+ /// Like max_chars_with_indent but the indentation is not substracted.
59
+ /// This allows to fit more graphemes from the string on a line when
60
+ /// SnippetState::Overflow.
61
+ fn max_chars_without_indent ( & self ) -> Option < usize > {
62
+ Some ( self . config . max_width ( ) . checked_sub ( self . line_end . len ( ) ) ?)
63
+ }
44
64
}
45
65
46
- // FIXME: simplify this!
47
- pub fn rewrite_string < ' a > (
48
- orig : & str ,
49
- fmt : & StringFormat < ' a > ,
50
- max_width : Option < usize > ,
51
- ) -> Option < String > {
66
+ pub fn rewrite_string < ' a > ( orig : & str , fmt : & StringFormat < ' a > ) -> Option < String > {
67
+ let max_chars_with_indent = fmt. max_chars_with_indent ( ) ?;
68
+ let max_chars_without_indent = fmt. max_chars_without_indent ( ) ?;
69
+ let indent = fmt. shape . indent . to_string_with_newline ( fmt. config ) ;
70
+
52
71
// Strip line breaks.
53
- let re = Regex :: new ( r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" ) . unwrap ( ) ;
54
- let stripped_str = re. replace_all ( orig, "$1" ) ;
72
+ // With this regex applied, all remaining whitespaces are significant
73
+ let strip_line_breaks_re = Regex :: new ( r"([^\\](\\\\)*)\\[\n\r][[:space:]]*" ) . unwrap ( ) ;
74
+ let stripped_str = strip_line_breaks_re. replace_all ( orig, "$1" ) ;
55
75
56
76
let graphemes = UnicodeSegmentation :: graphemes ( & * stripped_str, false ) . collect :: < Vec < & str > > ( ) ;
57
- let shape = fmt. shape ;
58
- let indent = shape. indent . to_string_with_newline ( fmt. config ) ;
59
77
60
78
// `cur_start` is the position in `orig` of the start of the current line.
61
79
let mut cur_start = 0 ;
@@ -67,85 +85,129 @@ pub fn rewrite_string<'a>(
67
85
) ;
68
86
result. push_str ( fmt. opener ) ;
69
87
70
- let ender_length = fmt. line_end . len ( ) ;
71
- // If we cannot put at least a single character per line, the rewrite won't
72
- // succeed.
73
- let mut max_chars = shape
74
- . width
75
- . checked_sub ( fmt. opener . len ( ) + ender_length + 1 ) ?
76
- + 1 ;
77
-
78
- // Snip a line at a time from `orig` until it is used up. Push the snippet
88
+ // Snip a line at a time from `stripped_str` until it is used up. Push the snippet
79
89
// onto result.
80
- ' outer: loop {
81
- // `cur_end` will be where we break the line, as an offset into `orig`.
82
- // Initialised to the maximum it could be (which may be beyond `orig`).
83
- let mut cur_end = cur_start + max_chars;
84
-
85
- // We can fit the rest of the string on this line, so we're done.
86
- if cur_end >= graphemes. len ( ) {
87
- let line = & graphemes[ cur_start..] . join ( "" ) ;
88
- result. push_str ( line) ;
89
- break ' outer;
90
+ let mut cur_max_chars = max_chars_with_indent;
91
+ loop {
92
+ // All the input starting at cur_start fits on the current line
93
+ if graphemes. len ( ) - cur_start <= cur_max_chars {
94
+ result. push_str ( & graphemes[ cur_start..] . join ( "" ) ) ;
95
+ break ;
90
96
}
91
97
92
- // Push cur_end left until we reach whitespace (or the line is too small).
93
- while !is_whitespace ( graphemes[ cur_end - 1 ] ) {
94
- cur_end -= 1 ;
95
- if cur_end < cur_start + MIN_STRING {
96
- // We couldn't find whitespace before the string got too small.
97
- // So start again at the max length and look for punctuation.
98
- cur_end = cur_start + max_chars;
99
- while !is_punctuation ( graphemes[ cur_end - 1 ] ) {
100
- cur_end -= 1 ;
101
-
102
- // If we can't break at whitespace or punctuation, grow the string instead.
103
- if cur_end < cur_start + MIN_STRING {
104
- cur_end = cur_start + max_chars;
105
- while !( is_punctuation ( graphemes[ cur_end - 1 ] )
106
- || is_whitespace ( graphemes[ cur_end - 1 ] ) )
107
- {
108
- cur_end += 1 ;
109
- if cur_end == graphemes. len ( ) {
110
- let line = & graphemes[ cur_start..] . join ( "" ) ;
111
- result. push_str ( line) ;
112
- break ' outer;
113
- }
114
- }
115
- break ;
116
- }
117
- }
98
+ // The input starting at cur_start needs to be broken
99
+ match break_string ( cur_max_chars, fmt. trim_end , & graphemes[ cur_start..] ) {
100
+ SnippetState :: LineEnd ( line, len) => {
101
+ result. push_str ( & line) ;
102
+ result. push_str ( fmt. line_end ) ;
103
+ result. push_str ( & indent) ;
104
+ result. push_str ( fmt. line_start ) ;
105
+ cur_max_chars = max_chars_with_indent;
106
+ cur_start += len;
107
+ }
108
+ SnippetState :: Overflow ( line, len) => {
109
+ result. push_str ( & line) ;
110
+ cur_max_chars = max_chars_without_indent;
111
+ cur_start += len;
112
+ }
113
+ SnippetState :: EndOfInput ( line) => {
114
+ result. push_str ( & line) ;
118
115
break ;
119
116
}
120
117
}
121
- // Make sure there is no whitespace to the right of the break.
122
- while cur_end < stripped_str. len ( ) && is_whitespace ( graphemes[ cur_end] ) {
123
- cur_end += 1 ;
124
- }
118
+ }
125
119
126
- // Make the current line and add it on to result.
127
- let raw_line = graphemes[ cur_start..cur_end] . join ( "" ) ;
128
- let line = if fmt. trim_end {
129
- raw_line. trim ( )
130
- } else {
131
- raw_line. as_str ( )
132
- } ;
120
+ result. push_str ( fmt. closer ) ;
121
+ wrap_str ( result, fmt. config . max_width ( ) , fmt. shape )
122
+ }
133
123
134
- result. push_str ( line) ;
135
- result. push_str ( fmt. line_end ) ;
136
- result. push_str ( & indent) ;
137
- result. push_str ( fmt. line_start ) ;
124
+ /// Result of breaking a string so it fits in a line and the state it ended in.
125
+ /// The state informs about what to do with the snippet and how to continue the breaking process.
126
+ #[ derive( Debug , PartialEq ) ]
127
+ enum SnippetState {
128
+ /// The input could not be broken and so rewriting the string is finished.
129
+ EndOfInput ( String ) ,
130
+ /// The input could be broken and the returned snippet should be ended with a
131
+ /// `[StringFormat::line_end]`. The next snippet needs to be indented.
132
+ LineEnd ( String , usize ) ,
133
+ /// The input could be broken but the returned snippet should not be ended with a
134
+ /// `[StringFormat::line_end]` because the whitespace is significant. Therefore, the next
135
+ /// snippet should not be indented.
136
+ Overflow ( String , usize ) ,
137
+ }
138
138
139
- // The next line starts where the current line ends.
140
- cur_start = cur_end;
139
+ /// Break the input string at a boundary character around the offset `max_chars`. A boundary
140
+ /// character is either a punctuation or a whitespace.
141
+ fn break_string ( max_chars : usize , trim_end : bool , input : & [ & str ] ) -> SnippetState {
142
+ let break_at = |index /* grapheme at index is included */ | {
143
+ // Take in any whitespaces to the left/right of `input[index]` and
144
+ // check if there is a line feed, in which case whitespaces needs to be kept.
145
+ let mut index_minus_ws = index;
146
+ for ( i, grapheme) in input[ 0 ..=index] . iter ( ) . enumerate ( ) . rev ( ) {
147
+ if !trim_end && is_line_feed ( grapheme) {
148
+ return SnippetState :: Overflow ( input[ 0 ..=i] . join ( "" ) . to_string ( ) , i + 1 ) ;
149
+ } else if !is_whitespace ( grapheme) {
150
+ index_minus_ws = i;
151
+ break ;
152
+ }
153
+ }
154
+ let mut index_plus_ws = index;
155
+ for ( i, grapheme) in input[ index + 1 ..] . iter ( ) . enumerate ( ) {
156
+ if !trim_end && is_line_feed ( grapheme) {
157
+ return SnippetState :: Overflow (
158
+ input[ 0 ..=index + 1 + i] . join ( "" ) . to_string ( ) ,
159
+ index + 2 + i,
160
+ ) ;
161
+ } else if !is_whitespace ( grapheme) {
162
+ index_plus_ws = index + i;
163
+ break ;
164
+ }
165
+ }
141
166
142
- if let Some ( new_max_chars) = max_width {
143
- max_chars = new_max_chars. checked_sub ( fmt. opener . len ( ) + ender_length + 1 ) ? + 1 ;
167
+ if trim_end {
168
+ SnippetState :: LineEnd (
169
+ input[ 0 ..=index_minus_ws] . join ( "" ) . to_string ( ) ,
170
+ index_plus_ws + 1 ,
171
+ )
172
+ } else {
173
+ SnippetState :: LineEnd (
174
+ input[ 0 ..=index_plus_ws] . join ( "" ) . to_string ( ) ,
175
+ index_plus_ws + 1 ,
176
+ )
144
177
}
178
+ } ;
179
+
180
+ // Find the position in input for breaking the string
181
+ match input[ 0 ..max_chars]
182
+ . iter ( )
183
+ . rposition ( |grapheme| is_whitespace ( grapheme) )
184
+ {
185
+ // Found a whitespace and what is on its left side is big enough.
186
+ Some ( index) if index >= MIN_STRING => break_at ( index) ,
187
+ // No whitespace found, try looking for a punctuation instead
188
+ _ => match input[ 0 ..max_chars]
189
+ . iter ( )
190
+ . rposition ( |grapheme| is_punctuation ( grapheme) )
191
+ {
192
+ // Found a punctuation and what is on its left side is big enough.
193
+ Some ( index) if index >= MIN_STRING => break_at ( index) ,
194
+ // Either no boundary character was found to the left of `input[max_chars]`, or the line
195
+ // got too small. We try searching for a boundary character to the right.
196
+ _ => match input[ max_chars..]
197
+ . iter ( )
198
+ . position ( |grapheme| is_whitespace ( grapheme) || is_punctuation ( grapheme) )
199
+ {
200
+ // A boundary was found after the line limit
201
+ Some ( index) => break_at ( max_chars + index) ,
202
+ // No boundary to the right, the input cannot be broken
203
+ None => SnippetState :: EndOfInput ( input. join ( "" ) . to_string ( ) ) ,
204
+ } ,
205
+ } ,
145
206
}
207
+ }
146
208
147
- result . push_str ( fmt . closer ) ;
148
- wrap_str ( result , fmt . config . max_width ( ) , fmt . shape )
209
+ fn is_line_feed ( grapheme : & str ) -> bool {
210
+ grapheme . as_bytes ( ) [ 0 ] == b'\n'
149
211
}
150
212
151
213
fn is_whitespace ( grapheme : & str ) -> bool {
@@ -161,13 +223,99 @@ fn is_punctuation(grapheme: &str) -> bool {
161
223
162
224
#[ cfg( test) ]
163
225
mod test {
164
- use super :: { rewrite_string, StringFormat } ;
226
+ use super :: { break_string , rewrite_string, SnippetState , StringFormat } ;
165
227
use shape:: { Indent , Shape } ;
228
+ use unicode_segmentation:: UnicodeSegmentation ;
166
229
167
230
#[ test]
168
231
fn issue343 ( ) {
169
232
let config = Default :: default ( ) ;
170
233
let fmt = StringFormat :: new ( Shape :: legacy ( 2 , Indent :: empty ( ) ) , & config) ;
171
- rewrite_string ( "eq_" , & fmt, None ) ;
234
+ rewrite_string ( "eq_" , & fmt) ;
235
+ }
236
+
237
+ #[ test]
238
+ fn should_break_on_whitespace ( ) {
239
+ let string = "Placerat felis. Mauris porta ante sagittis purus." ;
240
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
241
+ assert_eq ! (
242
+ break_string( 20 , false , & graphemes[ ..] ) ,
243
+ SnippetState :: LineEnd ( "Placerat felis. " . to_string( ) , 16 )
244
+ ) ;
245
+ assert_eq ! (
246
+ break_string( 20 , true , & graphemes[ ..] ) ,
247
+ SnippetState :: LineEnd ( "Placerat felis." . to_string( ) , 16 )
248
+ ) ;
249
+ }
250
+
251
+ #[ test]
252
+ fn should_break_on_punctuation ( ) {
253
+ let string = "Placerat_felis._Mauris_porta_ante_sagittis_purus." ;
254
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
255
+ assert_eq ! (
256
+ break_string( 20 , false , & graphemes[ ..] ) ,
257
+ SnippetState :: LineEnd ( "Placerat_felis." . to_string( ) , 15 )
258
+ ) ;
259
+ }
260
+
261
+ #[ test]
262
+ fn should_break_forward ( ) {
263
+ let string = "Venenatis_tellus_vel_tellus. Aliquam aliquam dolor at justo." ;
264
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
265
+ assert_eq ! (
266
+ break_string( 20 , false , & graphemes[ ..] ) ,
267
+ SnippetState :: LineEnd ( "Venenatis_tellus_vel_tellus. " . to_string( ) , 29 )
268
+ ) ;
269
+ assert_eq ! (
270
+ break_string( 20 , true , & graphemes[ ..] ) ,
271
+ SnippetState :: LineEnd ( "Venenatis_tellus_vel_tellus." . to_string( ) , 29 )
272
+ ) ;
273
+ }
274
+
275
+ #[ test]
276
+ fn nothing_to_break ( ) {
277
+ let string = "Venenatis_tellus_vel_tellus" ;
278
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
279
+ assert_eq ! (
280
+ break_string( 20 , false , & graphemes[ ..] ) ,
281
+ SnippetState :: EndOfInput ( "Venenatis_tellus_vel_tellus" . to_string( ) )
282
+ ) ;
283
+ }
284
+
285
+ #[ test]
286
+ fn significant_whitespaces ( ) {
287
+ let string = "Neque in sem. \n Pellentesque tellus augue." ;
288
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
289
+ assert_eq ! (
290
+ break_string( 15 , false , & graphemes[ ..] ) ,
291
+ SnippetState :: Overflow ( "Neque in sem. \n " . to_string( ) , 20 )
292
+ ) ;
293
+ assert_eq ! (
294
+ break_string( 25 , false , & graphemes[ ..] ) ,
295
+ SnippetState :: Overflow ( "Neque in sem. \n " . to_string( ) , 20 )
296
+ ) ;
297
+ // if `StringFormat::line_end` is true, then the line feed does not matter anymore
298
+ assert_eq ! (
299
+ break_string( 15 , true , & graphemes[ ..] ) ,
300
+ SnippetState :: LineEnd ( "Neque in sem." . to_string( ) , 26 )
301
+ ) ;
302
+ assert_eq ! (
303
+ break_string( 25 , true , & graphemes[ ..] ) ,
304
+ SnippetState :: LineEnd ( "Neque in sem." . to_string( ) , 26 )
305
+ ) ;
306
+ }
307
+
308
+ #[ test]
309
+ fn big_whitespace ( ) {
310
+ let string = "Neque in sem. Pellentesque tellus augue." ;
311
+ let graphemes = UnicodeSegmentation :: graphemes ( & * string, false ) . collect :: < Vec < & str > > ( ) ;
312
+ assert_eq ! (
313
+ break_string( 20 , false , & graphemes[ ..] ) ,
314
+ SnippetState :: LineEnd ( "Neque in sem. " . to_string( ) , 25 )
315
+ ) ;
316
+ assert_eq ! (
317
+ break_string( 20 , true , & graphemes[ ..] ) ,
318
+ SnippetState :: LineEnd ( "Neque in sem." . to_string( ) , 25 )
319
+ ) ;
172
320
}
173
321
}
0 commit comments