From 7fdc26ad782e1a80c0b7cd56f8ea5907b356add9 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Mar 2023 18:22:09 +0800 Subject: [PATCH 1/4] fix --- modules/typesniffer/typesniffer.go | 10 ++++++++++ modules/typesniffer/typesniffer_test.go | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index c9fef953ce703..99684def0364d 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -4,6 +4,7 @@ package typesniffer import ( + "bytes" "fmt" "io" "net/http" @@ -97,6 +98,15 @@ func DetectContentType(data []byte) SniffedType { ct = SvgMimeType } + if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) { + // the MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg" + // so remove the "ID3" prefix and detect again, if result is text, then it must be text content. + ct2 := http.DetectContentType(data[3:]) + if strings.HasPrefix(ct2, "text/") { + ct = ct2 + } + } + return SniffedType{ct} } diff --git a/modules/typesniffer/typesniffer_test.go b/modules/typesniffer/typesniffer_test.go index dbce94fc3bdd9..83a428d934dfb 100644 --- a/modules/typesniffer/typesniffer_test.go +++ b/modules/typesniffer/typesniffer_test.go @@ -86,6 +86,10 @@ func TestIsAudio(t *testing.T) { mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") assert.True(t, DetectContentType(mp3).IsAudio()) assert.False(t, DetectContentType([]byte("plain text")).IsAudio()) + + assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio()) + assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text + assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char } func TestDetectContentTypeFromReader(t *testing.T) { From 3568dd8b3efc27c8a6e9c42658c604a8670ef55c Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Mar 2023 19:07:49 +0800 Subject: [PATCH 2/4] Update modules/typesniffer/typesniffer.go Co-authored-by: delvh --- modules/typesniffer/typesniffer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index 99684def0364d..4631f8ce98fc3 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -101,6 +101,7 @@ func DetectContentType(data []byte) SniffedType { if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) { // the MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg" // so remove the "ID3" prefix and detect again, if result is text, then it must be text content. + // This works especially because audio files contain many unprintable/invalid characters like "\000" ct2 := http.DetectContentType(data[3:]) if strings.HasPrefix(ct2, "text/") { ct = ct2 From b4de06ca139a81fa2ae42fc4641473e7550c20f1 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Mar 2023 19:35:51 +0800 Subject: [PATCH 3/4] update comment --- modules/typesniffer/typesniffer.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index 4631f8ce98fc3..554f72c5b72fb 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -99,9 +99,9 @@ func DetectContentType(data []byte) SniffedType { } if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) { - // the MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg" - // so remove the "ID3" prefix and detect again, if result is text, then it must be text content. - // This works especially because audio files contain many unprintable/invalid characters like "\000" + // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg". + // So remove the "ID3" prefix and detect again, if result is text, then it must be text content. + // This works especially because audio files contain many unprintable/invalid characters like `byte(0)` ct2 := http.DetectContentType(data[3:]) if strings.HasPrefix(ct2, "text/") { ct = ct2 From 58ef5c7bec9323e5b913d38ac1cd0ccb5c3b128f Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 7 Mar 2023 19:37:03 +0800 Subject: [PATCH 4/4] update comment --- modules/typesniffer/typesniffer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/typesniffer/typesniffer.go b/modules/typesniffer/typesniffer.go index 554f72c5b72fb..89e23c0f6acd4 100644 --- a/modules/typesniffer/typesniffer.go +++ b/modules/typesniffer/typesniffer.go @@ -101,7 +101,7 @@ func DetectContentType(data []byte) SniffedType { if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) { // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg". // So remove the "ID3" prefix and detect again, if result is text, then it must be text content. - // This works especially because audio files contain many unprintable/invalid characters like `byte(0)` + // This works especially because audio files contain many unprintable/invalid characters like `0x00` ct2 := http.DetectContentType(data[3:]) if strings.HasPrefix(ct2, "text/") { ct = ct2