@@ -3242,6 +3242,134 @@ def seekable(self): return True
3242
3242
F .tell = lambda x : 0
3243
3243
t = self .TextIOWrapper (F (), encoding = 'utf-8' )
3244
3244
3245
+ def test_set_encoding_same_codec (self ):
3246
+ data = 'foobar\n ' .encode ('latin1' )
3247
+ raw = self .BytesIO (data )
3248
+ txt = self .TextIOWrapper (raw , encoding = 'latin1' )
3249
+ self .assertEqual (txt .encoding , 'latin1' )
3250
+
3251
+ # Just an alias, shouldn't change anything
3252
+ txt .set_encoding ('ISO-8859-1' )
3253
+ self .assertEqual (txt .encoding , 'latin1' )
3254
+
3255
+ # This is an actual change
3256
+ txt .set_encoding ('iso8859-15' )
3257
+ self .assertEqual (txt .encoding , 'iso8859-15' )
3258
+
3259
+ def test_set_encoding_read (self ):
3260
+ # latin1 -> utf8
3261
+ # (latin1 can decode utf-8 encoded string)
3262
+ data = 'abc\xe9 \n ' .encode ('latin1' ) + 'd\xe9 f\n ' .encode ('utf8' )
3263
+ raw = self .BytesIO (data )
3264
+ txt = self .TextIOWrapper (raw , encoding = 'latin1' , newline = '\n ' )
3265
+ self .assertEqual (txt .readline (), 'abc\xe9 \n ' )
3266
+ with self .assertRaises (self .UnsupportedOperation ):
3267
+ txt .set_encoding ('utf-8' )
3268
+
3269
+ def test_set_encoding_write_fromascii (self ):
3270
+ # ascii has a specific encodefunc in the C implementation,
3271
+ # but utf-8-sig has not. Make sure that we get rid of the
3272
+ # cached encodefunc when we switch encoders.
3273
+ raw = self .BytesIO ()
3274
+ txt = self .TextIOWrapper (raw , encoding = 'ascii' , newline = '\n ' )
3275
+ txt .write ('foo\n ' )
3276
+ txt .set_encoding ('utf-8-sig' )
3277
+ txt .write ('\xe9 \n ' )
3278
+ txt .flush ()
3279
+ self .assertEqual (raw .getvalue (), b'foo\n \xc3 \xa9 \n ' )
3280
+
3281
+ def test_set_encoding_write (self ):
3282
+ # latin -> utf8
3283
+ raw = self .BytesIO ()
3284
+ txt = self .TextIOWrapper (raw , encoding = 'latin1' , newline = '\n ' )
3285
+ txt .write ('abc\xe9 \n ' )
3286
+ txt .set_encoding ('utf-8' )
3287
+ self .assertEqual (raw .getvalue (), b'abc\xe9 \n ' )
3288
+ txt .write ('d\xe9 f\n ' )
3289
+ txt .flush ()
3290
+ self .assertEqual (raw .getvalue (), b'abc\xe9 \n d\xc3 \xa9 f\n ' )
3291
+
3292
+ # ascii -> utf-8-sig: ensure that no BOM is written in the middle of
3293
+ # the file
3294
+ raw = self .BytesIO ()
3295
+ txt = self .TextIOWrapper (raw , encoding = 'ascii' , newline = '\n ' )
3296
+ txt .write ('abc\n ' )
3297
+ txt .set_encoding ('utf-8-sig' )
3298
+ txt .write ('d\xe9 f\n ' )
3299
+ txt .flush ()
3300
+ self .assertEqual (raw .getvalue (), b'abc\n d\xc3 \xa9 f\n ' )
3301
+
3302
+ def test_set_encoding_write_non_seekable (self ):
3303
+ raw = self .BytesIO ()
3304
+ raw .seekable = lambda : False
3305
+ raw .seek = None
3306
+ txt = self .TextIOWrapper (raw , encoding = 'ascii' , newline = '\n ' )
3307
+ txt .write ('abc\n ' )
3308
+ txt .set_encoding ('utf-8-sig' )
3309
+ txt .write ('d\xe9 f\n ' )
3310
+ txt .flush ()
3311
+
3312
+ # If the raw stream is not seekable, there'll be a BOM
3313
+ self .assertEqual (raw .getvalue (), b'abc\n \xef \xbb \xbf d\xc3 \xa9 f\n ' )
3314
+
3315
+ def test_set_encoding_defaults (self ):
3316
+ txt = self .TextIOWrapper (self .BytesIO (), 'ascii' , 'replace' , '\n ' )
3317
+ txt .set_encoding (None , None )
3318
+ self .assertEqual (txt .encoding , 'ascii' )
3319
+ self .assertEqual (txt .errors , 'replace' )
3320
+ txt .write ('LF\n ' )
3321
+
3322
+ txt .set_encoding (newline = '\r \n ' )
3323
+ self .assertEqual (txt .encoding , 'ascii' )
3324
+ self .assertEqual (txt .errors , 'replace' )
3325
+
3326
+ txt .set_encoding (errors = 'ignore' )
3327
+ self .assertEqual (txt .encoding , 'ascii' )
3328
+ txt .write ('CRLF\n ' )
3329
+
3330
+ txt .set_encoding (encoding = 'utf-8' , newline = None )
3331
+ self .assertEqual (txt .errors , 'strict' )
3332
+ txt .seek (0 )
3333
+ self .assertEqual (txt .read (), 'LF\n CRLF\n ' )
3334
+
3335
+ self .assertEqual (txt .detach ().getvalue (), b'LF\n CRLF\r \n ' )
3336
+
3337
+ def test_set_encoding_newline (self ):
3338
+ raw = self .BytesIO (b'CR\r EOF' )
3339
+ txt = self .TextIOWrapper (raw , 'ascii' , newline = '\n ' )
3340
+ txt .set_encoding (newline = None )
3341
+ self .assertEqual (txt .readline (), 'CR\n ' )
3342
+ raw = self .BytesIO (b'CR\r EOF' )
3343
+ txt = self .TextIOWrapper (raw , 'ascii' , newline = '\n ' )
3344
+ txt .set_encoding (newline = '' )
3345
+ self .assertEqual (txt .readline (), 'CR\r ' )
3346
+ raw = self .BytesIO (b'CR\r LF\n EOF' )
3347
+ txt = self .TextIOWrapper (raw , 'ascii' , newline = '\r ' )
3348
+ txt .set_encoding (newline = '\n ' )
3349
+ self .assertEqual (txt .readline (), 'CR\r LF\n ' )
3350
+ raw = self .BytesIO (b'LF\n CR\r EOF' )
3351
+ txt = self .TextIOWrapper (raw , 'ascii' , newline = '\n ' )
3352
+ txt .set_encoding (newline = '\r ' )
3353
+ self .assertEqual (txt .readline (), 'LF\n CR\r ' )
3354
+ raw = self .BytesIO (b'CR\r CRLF\r \n EOF' )
3355
+ txt = self .TextIOWrapper (raw , 'ascii' , newline = '\r ' )
3356
+ txt .set_encoding (newline = '\r \n ' )
3357
+ self .assertEqual (txt .readline (), 'CR\r CRLF\r \n ' )
3358
+
3359
+ txt = self .TextIOWrapper (self .BytesIO (), 'ascii' , newline = '\r ' )
3360
+ txt .set_encoding (newline = None )
3361
+ txt .write ('linesep\n ' )
3362
+ txt .set_encoding (newline = '' )
3363
+ txt .write ('LF\n ' )
3364
+ txt .set_encoding (newline = '\n ' )
3365
+ txt .write ('LF\n ' )
3366
+ txt .set_encoding (newline = '\r ' )
3367
+ txt .write ('CR\n ' )
3368
+ txt .set_encoding (newline = '\r \n ' )
3369
+ txt .write ('CRLF\n ' )
3370
+ expected = 'linesep' + os .linesep + 'LF\n LF\n CR\r CRLF\r \n '
3371
+ self .assertEqual (txt .detach ().getvalue ().decode ('ascii' ), expected )
3372
+
3245
3373
3246
3374
class MemviewBytesIO (io .BytesIO ):
3247
3375
'''A BytesIO object whose read method returns memoryviews
0 commit comments