@@ -2,8 +2,6 @@ module RSpec
2
2
module Support
3
3
# @private
4
4
class EncodedString
5
- MRI_UNICODE_UNKOWN_CHARACTER = "\xEF \xBF \xBD "
6
-
7
5
def initialize ( string , encoding = nil )
8
6
@encoding = encoding
9
7
@source_encoding = detect_source_encoding ( string )
@@ -34,19 +32,18 @@ def to_s
34
32
private
35
33
36
34
def matching_encoding ( string )
37
- string . encode ( @encoding )
38
- rescue Encoding ::UndefinedConversionError , Encoding ::InvalidByteSequenceError
39
- normalize_missing ( string . encode ( @encoding , :invalid => :replace , :undef => :replace ) )
35
+ # Converting it to a higher higher character set (UTF-16) and then
36
+ # back (to UTF-8) ensures that you will strip away invalid or undefined byte sequences.
37
+ string .
38
+ encode ( ::Encoding ::UTF_16LE ,
39
+ :invalid => :replace , :undef => :replace , :replace => '?' ) .
40
+ encode ( @encoding )
41
+ rescue Encoding ::UndefinedConversionError , Encoding ::InvalidByteSequenceError , Encoding ::CompatibilityError
42
+ string . encode ( @encoding ,
43
+ :invalid => :replace , :undef => :replace , :replace => '?' )
40
44
rescue Encoding ::ConverterNotFoundError
41
- normalize_missing ( string . force_encoding ( @encoding ) . encode ( :invalid => :replace ) )
42
- end
43
-
44
- def normalize_missing ( string )
45
- if @encoding . to_s == "UTF-8"
46
- string . gsub ( MRI_UNICODE_UNKOWN_CHARACTER . force_encoding ( @encoding ) , "?" )
47
- else
48
- string
49
- end
45
+ string . force_encoding ( @encoding ) . encode (
46
+ :invalid => :replace , :replace => '?' )
50
47
end
51
48
52
49
def detect_source_encoding ( string )
0 commit comments