Skip to content

Commit 298e669

Browse files
committed
Convert RTF to HTML respecting original code page
1 parent 34fb32e commit 298e669

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

lib/mapi/mime.rb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ def initialize str, ignore_body=false
6969
end
7070
end
7171

72+
def encode(x)
73+
x.encoding == Encoding::UTF_8 ? x : x.encode('utf-8', 'cp1252')
74+
end
75+
7276
def multipart?
7377
@content_type && @content_type =~ /^multipart/ ? true : false
7478
end
@@ -97,7 +101,7 @@ def to_s opts={}
97101
opts = {:boundary_counter => 0}.merge opts
98102
if multipart?
99103
boundary = Mime.make_boundary opts[:boundary_counter] += 1, self
100-
@body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue].
104+
@body = [preamble, parts.map { |part| "\r\n" + encode(part.to_s(opts)) + "\r\n" }, "--\r\n" + epilogue].
101105
flatten.join("\r\n--" + boundary)
102106
content_type, attrs = Mime.split_header @headers['Content-Type'][0]
103107
attrs['boundary'] = boundary
@@ -106,7 +110,7 @@ def to_s opts={}
106110

107111
str = ''
108112
@headers.each do |key, vals|
109-
vals.each { |val| str << "#{key}: #{val}\r\n" }
113+
vals.each { |val| str << "#{encode(key)}: #{encode(val)}\r\n" }
110114
end
111115
str << "\r\n" + @body
112116
end

lib/mapi/rtf.rb

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def self.rtf2text str, format=:text
122122
end
123123
end
124124

125-
RTF_PREBUF =
125+
RTF_PREBUF =
126126
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
127127
"{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
128128
"\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
@@ -175,7 +175,7 @@ def rtfdecompr data
175175
else # unknown magic number
176176
raise "Unknown compression type (magic number 0x%08x)" % magic
177177
end
178-
178+
179179
# not sure if its due to a bug in the above code. doesn't seem to be
180180
# in my tests, but sometimes there's a trailing null. we chomp it here,
181181
# which actually makes the resultant rtf smaller than its advertised
@@ -189,7 +189,7 @@ def rtfdecompr data
189189
#
190190
# Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
191191
#
192-
# Some cases that the original didn't deal with have been patched up, eg from
192+
# Some cases that the original didn't deal with have been patched up, eg from
193193
# this chunk, where there are tags outside of the htmlrtf ignore block.
194194
#
195195
# "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
@@ -229,8 +229,14 @@ def rtfdecompr data
229229
def rtf2html rtf
230230
scan = StringScanner.new rtf
231231
# require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
232-
# was converted from plain text.
232+
# was converted from plain text.
233233
return nil unless rtf["\\fromhtml"]
234+
if scan.scan_until(/\\ansicpg/)
235+
code_page = "cp" + scan.scan(/\d+/)
236+
scan.pos = 0
237+
else
238+
code_page = 'ascii'
239+
end
234240
html = ''
235241
ignore_tag = nil
236242
# skip up to the first htmltag. return nil if we don't ever find one
@@ -270,7 +276,7 @@ def rtf2html rtf
270276
p :wtf
271277
end
272278
end
273-
html.strip.empty? ? nil : html
279+
html.strip.empty? ? nil : html.encode('utf-8', code_page)
274280
end
275281

276282
module_function :rtf2html, :rtfdecompr

0 commit comments

Comments
 (0)