Skip to content

Commit cfd68b7

Browse files
StringUtil#convertFileToLinuxStyle safe for non-text files
1 parent dc39fb6 commit cfd68b7

File tree

2 files changed

+86
-15
lines changed

2 files changed

+86
-15
lines changed

code/src/main/java/com/codeforces/commons/text/StringUtil.java

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
import com.codeforces.commons.collection.MapBuilder;
44
import com.codeforces.commons.holder.Holders;
55
import com.codeforces.commons.io.FileUtil;
6-
import com.codeforces.commons.io.IoUtil;
76
import com.codeforces.commons.pair.*;
87
import com.codeforces.commons.properties.internal.CommonsPropertiesUtil;
98
import com.codeforces.commons.reflection.ReflectionUtil;
109
import com.codeforces.commons.text.similarity.SimilarityChecker;
1110
import org.apache.commons.codec.binary.Base64;
1211
import org.apache.commons.codec.binary.Hex;
1312
import org.apache.commons.codec.digest.DigestUtils;
13+
import org.apache.commons.io.FileUtils;
1414
import org.apache.commons.lang3.ArrayUtils;
1515
import org.apache.commons.lang3.mutable.MutableBoolean;
1616
import org.apache.commons.text.translate.CharSequenceTranslator;
@@ -36,14 +36,17 @@
3636
import java.awt.event.KeyEvent;
3737
import java.awt.font.FontRenderContext;
3838
import java.awt.geom.Rectangle2D;
39-
import java.io.*;
39+
import java.io.ByteArrayOutputStream;
40+
import java.io.File;
41+
import java.io.IOException;
42+
import java.io.StringReader;
4043
import java.lang.reflect.Array;
4144
import java.nio.charset.StandardCharsets;
4245
import java.security.InvalidKeyException;
4346
import java.security.MessageDigest;
4447
import java.security.NoSuchAlgorithmException;
45-
import java.util.List;
4648
import java.util.*;
49+
import java.util.List;
4750
import java.util.concurrent.locks.Lock;
4851
import java.util.concurrent.locks.ReadWriteLock;
4952
import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -1206,25 +1209,46 @@ private static String wellformSingleLineForWindows(String line) {
12061209
return trim(sb.toString());
12071210
}
12081211

1212+
/**
1213+
* Removes all \r if it really looks like windows encoded text file.
1214+
*
1215+
* @param file File to process
1216+
* @throws IOException On any IO error
1217+
*/
12091218
public static void convertFileToLinuxStyle(File file) throws IOException {
1210-
byte[] bytes = FileUtil.getBytes(file);
1211-
BufferedReader reader = null;
1212-
BufferedWriter writer = null;
1219+
byte[] content = FileUtil.getBytes(file);
12131220

1214-
try {
1215-
reader = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(bytes), UTF_8));
1216-
writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), UTF_8));
1221+
boolean validCrLf = true;
1222+
int crCount = 0;
1223+
1224+
for (int i = 0; i < content.length; i++) {
1225+
if (content[i] == 0x0D) { // \r
1226+
crCount++;
1227+
if (i + 1 >= content.length || content[i + 1] != 0x0A) {
1228+
validCrLf = false;
1229+
break;
1230+
}
1231+
} else if (content[i] == 0x0A) { // \n
1232+
if (i == 0 || content[i - 1] != 0x0D) {
1233+
validCrLf = false;
1234+
break;
1235+
}
1236+
}
1237+
}
12171238

1218-
String line;
1219-
while ((line = reader.readLine()) != null) {
1220-
writer.write(line);
1221-
writer.write(10);
1239+
if (validCrLf && crCount > 0) {
1240+
// Remove all \r (0x0D) bytes
1241+
ByteArrayOutputStream out = new ByteArrayOutputStream(content.length - crCount);
1242+
for (byte b : content) {
1243+
if (b != 0x0D) {
1244+
out.write(b);
1245+
}
12221246
}
1223-
} finally {
1224-
IoUtil.closeQuietly(reader, writer);
1247+
FileUtils.writeByteArrayToFile(file, out.toByteArray());
12251248
}
12261249
}
12271250

1251+
12281252
/**
12291253
* @param s Given string.
12301254
* @param maxLength Maximal length.

code/src/test/java/com/codeforces/commons/text/StringUtilTest.java

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
package com.codeforces.commons.text;
22

3+
import com.codeforces.commons.io.FileUtil;
4+
import com.codeforces.commons.io.IoUtil;
35
import com.codeforces.commons.text.similarity.SimilarityChecker;
46
import org.apache.commons.lang3.StringUtils;
7+
import org.junit.Assert;
58
import org.junit.Test;
69

10+
import java.io.File;
11+
import java.io.IOException;
12+
import java.io.InputStream;
713
import java.util.*;
814
import java.util.List;
915
import java.util.function.Predicate;
@@ -356,4 +362,45 @@ public void testWellformForWindows() {
356362
assertEquals("a b c\r\nd e\r\n", StringUtil.wellformForWindows("\r\n\r\n a b c \r\n d e"));
357363
assertEquals("a b c\r\n\r\nd e\r\n", StringUtil.wellformForWindows("a b c\r\n\r\nd e\r\n"));
358364
}
365+
366+
@Test
367+
public void testConvertFileToLinuxStyle() throws IOException {
368+
testConvertFileToLinuxStyle("01");
369+
testConvertFileToLinuxStyle("02");
370+
testConvertFileToLinuxStyle("03");
371+
testConvertFileToLinuxStyle("04");
372+
testConvertFileToLinuxStyle("05");
373+
testConvertFileToLinuxStyle("06");
374+
}
375+
376+
private void testConvertFileToLinuxStyle(String fileName) throws IOException {
377+
byte[] inputBytes = getConvertFileToLinuxStyleResourceBytes(fileName + ".in");
378+
byte[] answerBytes = getConvertFileToLinuxStyleResourceBytes(fileName + ".out");
379+
380+
File inputFile = File.createTempFile("convertFileToLinuxStyle", fileName + ".in");
381+
FileUtil.writeFile(inputFile, inputBytes);
382+
383+
StringUtil.convertFileToLinuxStyle(inputFile);
384+
byte[] outputBytes = FileUtil.getBytes(inputFile);
385+
FileUtil.deleteTotally(inputFile);
386+
387+
Assert.assertArrayEquals("Subtest '" + fileName + "' failed.",
388+
answerBytes, outputBytes);
389+
}
390+
391+
private static byte[] getConvertFileToLinuxStyleResourceBytes(String resourceName) throws IOException {
392+
String resource = "/com/codeforces/commons/text/convertFileToLinuxStyle/" + resourceName;
393+
394+
InputStream resourceStream = StringUtilTest.class.getResourceAsStream(
395+
resource);
396+
if (resourceStream == null) {
397+
throw new IOException("Can't find resource '" + resource + "'.");
398+
}
399+
400+
try {
401+
return IoUtil.toByteArray(resourceStream);
402+
} finally {
403+
IoUtil.closeQuietly(resourceStream);
404+
}
405+
}
359406
}

0 commit comments

Comments
 (0)