Skip to content

Commit a18fb2e

Browse files
committed
Respect MIME type aliases
* Fixes incorrect `audio/x-wav` subtype of `audio/vnd.wav` to be an alias of `audio/vnd.wave`.
1 parent b78b567 commit a18fb2e

14 files changed

+253
-16
lines changed

lib/marcel/magic.rb

+32-5
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,34 @@ def initialize(type)
2525
# Option keys:
2626
# * <i>:extensions</i>: String list or single string of file extensions
2727
# * <i>:parents</i>: String list or single string of parent mime types
28+
# * <i>:aliases</i>: String list or single string of aliased mime types
2829
# * <i>:magic</i>: Mime magic specification
2930
# * <i>:comment</i>: Comment string
3031
def self.add(type, options)
3132
extensions = [options[:extensions]].flatten.compact
33+
extensions.each {|ext| EXTENSIONS[ext] = type }
3234
TYPE_EXTS[type] = extensions
35+
36+
TYPE_ALIASES.delete(type)
37+
[options[:aliases]].flatten.compact.each do |aliased|
38+
TYPE_ALIASES[aliased] = type
39+
end
40+
3341
parents = [options[:parents]].flatten.compact
3442
TYPE_PARENTS[type] = parents unless parents.empty?
35-
extensions.each {|ext| EXTENSIONS[ext] = type }
43+
3644
MAGIC.unshift [type, options[:magic]] if options[:magic]
3745
end
3846

39-
# Removes a mime type from the dictionary. You might want to do this if
47+
# Removes a mime type from the dictionary. You might want to do this if
4048
# you're seeing impossible conflicts (for instance, application/x-gmc-link).
41-
# * <i>type</i>: The mime type to remove. All associated extensions and magic are removed too.
49+
# * <i>type</i>: The mime type to remove.
4250
def self.remove(type)
43-
EXTENSIONS.delete_if {|ext, t| t == type }
44-
MAGIC.delete_if {|t, m| t == type }
51+
EXTENSIONS.delete_if { |ext, t| t == type }
52+
MAGIC.delete_if { |t, m| t == type }
4553
TYPE_EXTS.delete(type)
4654
TYPE_PARENTS.delete(type)
55+
TYPE_ALIASES.delete_if { |aliased, canonical| aliased == type || canonical == type }
4756
end
4857

4958
# Returns true if type is a text format
@@ -64,11 +73,24 @@ def extensions
6473
TYPE_EXTS[type] || []
6574
end
6675

76+
def canonical
77+
if to = TYPE_ALIASES[type]
78+
self.class.new(to)
79+
else
80+
self
81+
end
82+
end
83+
6784
# Get mime comment
6885
def comment
6986
nil # deprecated
7087
end
7188

89+
# Lookup canonical mime type by mime type string
90+
def self.by_type(type)
91+
new(type.downcase).canonical if type
92+
end
93+
7294
# Lookup mime type by file extension
7395
def self.by_extension(ext)
7496
ext = ext.to_s.downcase
@@ -111,9 +133,14 @@ def hash
111133
alias == eql?
112134

113135
def self.child?(child, parent)
136+
child, parent = canonical(child), canonical(parent)
114137
child == parent || TYPE_PARENTS[child]&.any? {|p| child?(p, parent) }
115138
end
116139

140+
def self.canonical(aliased_type)
141+
by_type(aliased_type)&.type
142+
end
143+
117144
def self.magic_match(io, method)
118145
return magic_match(StringIO.new(io.to_s), method) unless io.respond_to?(:read)
119146

lib/marcel/mime_type.rb

+9-4
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@ class MimeType
55
BINARY = "application/octet-stream"
66

77
class << self
8-
def extend(type, extensions: [], parents: [], magic: nil)
8+
def extend(type, extensions: [], aliases: [], parents: [], magic: nil)
9+
if canonical = Marcel::TYPE_ALIASES[type]
10+
warn "#{type} was already aliased to #{canonical}"
11+
end
12+
913
extensions = (Array(extensions) + Array(Marcel::TYPE_EXTS[type])).uniq
14+
existing_aliases = Marcel::TYPE_ALIASES.select { |_, t| t == type }.keys
15+
aliases = (Array(aliases) + existing_aliases).uniq
1016
parents = (Array(parents) + Array(Marcel::TYPE_PARENTS[type])).uniq
11-
Magic.add(type, extensions: extensions, magic: magic, parents: parents)
17+
Magic.add(type, extensions: extensions, magic: magic, aliases: aliases, parents: parents)
1218
end
1319

1420
# Returns the most appropriate content type for the given file.
@@ -32,7 +38,6 @@ def for(pathname_or_io = nil, name: nil, extension: nil, declared_type: nil)
3238
end
3339

3440
private
35-
3641
def for_data(pathname_or_io)
3742
if pathname_or_io
3843
with_io(pathname_or_io) do |io|
@@ -60,7 +65,7 @@ def for_extension(extension)
6065
end
6166

6267
def for_declared_type(declared_type)
63-
type = parse_media_type(declared_type)
68+
type = Marcel::Magic.canonical(parse_media_type(declared_type))
6469

6570
# application/octet-stream is treated as an undeclared/missing type,
6671
# allowing the type to be inferred from the filename. If there's no

lib/marcel/mime_type/definitions.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,14 @@
3232
Marcel::MimeType.extend "application/vnd.apple.numbers", extensions: %w( numbers ), parents: "application/zip"
3333
Marcel::MimeType.extend "application/vnd.apple.keynote", extensions: %w( key ), parents: "application/zip"
3434

35+
# Upstream aliases to application/x-x509-cert. Override with a ;format=pem subtype.
36+
Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem"
37+
3538
Marcel::MimeType.extend "audio/aac", extensions: %w( aac ), parents: "audio/x-aac"
3639
Marcel::MimeType.extend("audio/ogg", extensions: %w( ogg oga ), magic: [[0, 'OggS', [[29, 'vorbis']]]])
3740

3841
Marcel::MimeType.extend "image/vnd.dwg", magic: [[0, "AC10"]]
3942

40-
Marcel::MimeType.extend "application/x-x509-ca-cert", magic: [[0, '-----BEGIN CERTIFICATE-----']], extensions: %w( pem ), parents: "application/x-x509-cert;format=pem"
41-
4243
Marcel::MimeType.extend "image/avif", magic: [[4, "ftypavif"]], extensions: %w( avif )
4344
Marcel::MimeType.extend "image/heif", magic: [[4, "ftypmif1"]], extensions: %w( heif )
4445
Marcel::MimeType.extend "image/heic", magic: [[4, "ftypheic"]], extensions: %w( heic )
@@ -49,7 +50,6 @@
4950
Marcel::MimeType.extend "video/mp4", magic: [[4, "ftypisom"], [4, "ftypM4V "]], extensions: %w( mp4 m4v )
5051

5152
Marcel::MimeType.extend "audio/flac", magic: [[0, 'fLaC']], extensions: %w( flac ), parents: "audio/x-flac"
52-
Marcel::MimeType.extend "audio/x-wav", magic: [[0, 'RIFF', [[8, 'WAVE']]]], extensions: %w( wav ), parents: "audio/vnd.wav"
5353
Marcel::MimeType.extend "audio/mpc", magic: [[0, "MPCKSH"]], extensions: %w( mpc )
5454

5555
Marcel::MimeType.extend "font/ttf", magic: [[0, "\x00\x01\x00\x00"]], extensions: %w( ttf ttc )

lib/marcel/tables.rb

+143
Original file line numberDiff line numberDiff line change
@@ -2148,6 +2148,149 @@ module Marcel
21482148
'video/x-sgi-movie' => %w(movie),
21492149
'x-conference/x-cooltalk' => %w(ice), # Cooltalk Audio
21502150
}
2151+
TYPE_ALIASES = {
2152+
'application/bat' => 'application/x-bat',
2153+
'application/x-coreldraw' => 'application/coreldraw',
2154+
'application/x-cdr' => 'application/coreldraw',
2155+
'application/cdr' => 'application/coreldraw',
2156+
'image/x-cdr' => 'application/coreldraw',
2157+
'image/cdr' => 'application/coreldraw',
2158+
'application/x-setupscript' => 'application/inf',
2159+
'application/x-wine-extension-inf' => 'application/inf',
2160+
'application/x-javascript' => 'application/javascript',
2161+
'text/javascript' => 'application/javascript',
2162+
'application/x-java-vm' => 'application/java-vm',
2163+
'application/x-java' => 'application/java-vm',
2164+
'application/mac-binhex' => 'application/mac-binhex40',
2165+
'application/binhex' => 'application/mac-binhex40',
2166+
'application/vnd.ms-word' => 'application/msword',
2167+
'application/x-ogg' => 'audio/vorbis',
2168+
'application/msonenote' => 'application/onenote',
2169+
'application/x-pdf' => 'application/pdf',
2170+
'application/pgp' => 'application/pgp-encrypted',
2171+
'text/rss' => 'application/rss+xml',
2172+
'text/rtf' => 'application/rtf',
2173+
'application/smil' => 'application/smil+xml',
2174+
'application/x-kchart' => 'application/vnd.kde.kchart',
2175+
'application/x-kpresenter' => 'application/vnd.kde.kpresenter',
2176+
'application/x-kspread' => 'application/vnd.kde.kspread',
2177+
'application/x-kword' => 'application/vnd.kde.kword',
2178+
'application/x-koan' => 'application/vnd.koan',
2179+
'application/x-123' => 'application/vnd.lotus-1-2-3',
2180+
'application/x-mif' => 'application/vnd.mif',
2181+
'application/x-frame' => 'application/vnd.mif',
2182+
'application/msexcel' => 'application/vnd.ms-excel',
2183+
'application/mspowerpoint' => 'application/vnd.ms-powerpoint',
2184+
'application/ms-tnef' => 'application/vnd.ms-tnef',
2185+
'application/oxps' => 'application/vnd.ms-xpsdocument',
2186+
'application/x-vnd.oasis.opendocument.chart' => 'application/vnd.oasis.opendocument.chart',
2187+
'application/x-vnd.oasis.opendocument.chart-template' => 'application/vnd.oasis.opendocument.chart-template',
2188+
'application/vnd.oasis.opendocument.database' => 'application/vnd.oasis.opendocument.base',
2189+
'application/x-vnd.oasis.opendocument.formula' => 'application/vnd.oasis.opendocument.formula',
2190+
'application/x-vnd.oasis.opendocument.formula-template' => 'application/vnd.oasis.opendocument.formula-template',
2191+
'application/x-vnd.oasis.opendocument.graphics' => 'application/vnd.oasis.opendocument.graphics',
2192+
'application/x-vnd.oasis.opendocument.graphics-template' => 'application/vnd.oasis.opendocument.graphics-template',
2193+
'application/x-vnd.oasis.opendocument.image' => 'application/vnd.oasis.opendocument.image',
2194+
'application/x-vnd.oasis.opendocument.image-template' => 'application/vnd.oasis.opendocument.image-template',
2195+
'application/x-vnd.oasis.opendocument.presentation' => 'application/vnd.oasis.opendocument.presentation',
2196+
'application/x-vnd.oasis.opendocument.presentation-template' => 'application/vnd.oasis.opendocument.presentation-template',
2197+
'application/x-vnd.oasis.opendocument.spreadsheet' => 'application/vnd.oasis.opendocument.spreadsheet',
2198+
'application/x-vnd.oasis.opendocument.spreadsheet-template' => 'application/vnd.oasis.opendocument.spreadsheet-template',
2199+
'application/x-vnd.oasis.opendocument.text' => 'application/vnd.oasis.opendocument.text',
2200+
'application/x-vnd.oasis.opendocument.text-master' => 'application/vnd.oasis.opendocument.text-master',
2201+
'application/x-vnd.oasis.opendocument.text-template' => 'application/vnd.oasis.opendocument.text-template',
2202+
'application/x-vnd.oasis.opendocument.text-web' => 'application/vnd.oasis.opendocument.text-web',
2203+
'application/x-vnd.sun.xml.writer' => 'application/vnd.sun.xml.writer',
2204+
'application/vnd.ms-visio' => 'application/vnd.visio',
2205+
'image/x-targa' => 'image/x-tga',
2206+
'application/x-unix-archive' => 'application/x-archive',
2207+
'application/x-arj-compressed' => 'application/x-arj',
2208+
'application/x-dbm' => 'application/x-berkeley-db',
2209+
'application/vnd.debian.binary-package' => 'application/x-debian-package',
2210+
'application/x-Gnumeric-spreadsheet' => 'application/x-gnumeric',
2211+
'application/x-gzip' => 'application/gzip',
2212+
'application/x-gunzip' => 'application/gzip',
2213+
'application/gzipped' => 'application/gzip',
2214+
'application/gzip-compressed' => 'application/gzip',
2215+
'application/x-gzip-compressed' => 'application/gzip',
2216+
'gzip/document' => 'application/gzip',
2217+
'application/x-windows-installer' => 'application/x-ms-installer',
2218+
'application/x-msi' => 'application/x-ms-installer',
2219+
'application/x-rar' => 'application/x-rar-compressed',
2220+
'text/x-tex' => 'application/x-tex',
2221+
'text/x-texinfo' => 'application/x-texinfo',
2222+
'application/x-x509-ca-cert' => 'application/x-x509-cert',
2223+
'application/x-x509-user-cert' => 'application/x-x509-cert',
2224+
'text/xml' => 'application/xml',
2225+
'application/x-xml' => 'application/xml',
2226+
'text/x-dtd' => 'application/xml-dtd',
2227+
'text/xml-external-parsed-entity' => 'application/xml-external-parsed-entity',
2228+
'text/xsl' => 'application/xslt+xml',
2229+
'application/x-zip-compressed' => 'application/zip',
2230+
'application/x-deflate' => 'application/zlib',
2231+
'audio/x-m4a' => 'audio/mp4',
2232+
'audio/x-mp4a' => 'audio/mp4',
2233+
'audio/x-mpeg' => 'audio/mpeg',
2234+
'audio/x-ogg-flac' => 'audio/x-oggflac',
2235+
'audio/x-ogg-pcm' => 'audio/x-oggpcm',
2236+
'application/x-speex' => 'audio/speex',
2237+
'audio/aiff' => 'audio/x-aiff',
2238+
'audio/x-realaudio' => 'audio/x-pn-realaudio',
2239+
'audio/x-wav' => 'audio/vnd.wave',
2240+
'audio/wave' => 'audio/vnd.wave',
2241+
'audio/wav' => 'audio/vnd.wave',
2242+
'image/x-bmp' => 'image/bmp',
2243+
'image/x-ms-bmp' => 'image/bmp',
2244+
'image/x-emf' => 'image/emf',
2245+
'application/x-emf' => 'image/emf',
2246+
'application/x-ms-emz' => 'image/x-emf-compressed',
2247+
'image/hevc' => 'image/heic',
2248+
'image/hevc-sequence' => 'image/heic-sequence',
2249+
'video/jpm' => 'image/jpm',
2250+
'image/ntf' => 'image/nitf',
2251+
'image/x-psd' => 'image/vnd.adobe.photoshop',
2252+
'application/photoshop' => 'image/vnd.adobe.photoshop',
2253+
'image/x-dwg' => 'image/vnd.dwg',
2254+
'application/acad' => 'image/vnd.dwg',
2255+
'application/x-acad' => 'image/vnd.dwg',
2256+
'application/autocad_dwg' => 'image/vnd.dwg',
2257+
'application/dwg' => 'image/vnd.dwg',
2258+
'application/x-dwg' => 'image/vnd.dwg',
2259+
'application/x-autocad' => 'image/vnd.dwg',
2260+
'drawing/dwg' => 'image/vnd.dwg',
2261+
'image/x-icon' => 'image/vnd.microsoft.icon',
2262+
'image/x-dcx' => 'image/vnd.zbrush.dcx',
2263+
'image/x-pcx' => 'image/vnd.zbrush.pcx',
2264+
'image/x-pc-paintbrush' => 'image/vnd.zbrush.pcx',
2265+
'image/x-wmf' => 'image/wmf',
2266+
'application/x-msmetafile' => 'image/wmf',
2267+
'image/x-jb2' => 'image/x-jbig2',
2268+
'image/xcf' => 'image/x-xcf',
2269+
'application/x-mimearchive' => 'multipart/related',
2270+
'message/rfc2557' => 'multipart/related',
2271+
'drawing/x-dwf' => 'model/vnd.dwf',
2272+
'text/x-asm' => 'text/x-assembly',
2273+
'application/x-troff' => 'text/troff',
2274+
'application/x-troff-man' => 'text/troff',
2275+
'application/x-troff-me' => 'text/troff',
2276+
'application/x-troff-ms' => 'text/troff',
2277+
'text/x-c' => 'text/x-csrc',
2278+
'text/x-java' => 'text/x-java-source',
2279+
'text/x-properties' => 'text/x-java-properties',
2280+
'text/properties' => 'text/x-java-properties',
2281+
'application/x-httpd-jsp' => 'text/x-jsp',
2282+
'application/matlab-mat' => 'application/x-matlab-data',
2283+
'application/x-tcl' => 'text/x-tcl',
2284+
'video/x-daala' => 'video/daala',
2285+
'video/x-theora' => 'video/theora',
2286+
'video/x-ogg-uvs' => 'video/x-ogguvs',
2287+
'video/x-ogg-yuv' => 'video/x-oggyuv',
2288+
'video/x-ogg-rgb' => 'video/x-oggrgb',
2289+
'video/avi' => 'video/x-msvideo',
2290+
'video/msvideo' => 'video/x-msvideo',
2291+
'application/font-woff' => 'font/woff',
2292+
'application/font-woff2' => 'font/woff2',
2293+
}
21512294
TYPE_PARENTS = {
21522295
'application/bizagi-modeler' => %w(application/zip),
21532296
'application/dash+xml' => %w(application/xml),

script/generate_tables.rb

+7
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ def get_matches(mime, parent)
128128

129129
extensions = {}
130130
types = {}
131+
aliases = {}
131132
magics = []
132133

133134
ARGV.each do |path|
@@ -137,6 +138,7 @@ def get_matches(mime, parent)
137138
(doc/'mime-info/mime-type').each do |mime|
138139
comments = Hash[*(mime/'_comment').map {|comment| [comment['xml:lang'], comment.inner_text] }.flatten]
139140
type = mime['type']
141+
(mime/'alias').each { |x| aliases[x['type']] = type }
140142
subclass = (mime/'sub-class-of').map{|x| x['type']}
141143
exts = (mime/'glob').map{|x| x['pattern'] =~ /^\*\.([^\[\]]+)$/ ? $1.downcase : nil }.compact
142144
(mime/'magic').each do |magic|
@@ -222,6 +224,11 @@ def get_matches(mime, parent)
222224
puts " '#{key}' => %w(#{exts}),#{comment}"
223225
end
224226
puts " }"
227+
puts " TYPE_ALIASES = {"
228+
aliases.each do |aliased, type|
229+
puts " '#{aliased}' => '#{type}',"
230+
end
231+
puts " }"
225232
puts " TYPE_PARENTS = {"
226233
types.keys.sort.each do |key|
227234
parents = types[key][1].sort.join(' ')

test/declared_type_test.rb

+5
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,9 @@ class Marcel::MimeType::DeclaredTypeTest < Marcel::TestCase
1919
test "ignores charset declarations" do
2020
assert_equal "text/html", Marcel::MimeType.for(declared_type: "text/html; charset=utf-8")
2121
end
22+
23+
test "resolves declared type to a canonical MIME type" do
24+
aliased, canonical = Marcel::TYPE_ALIASES.first
25+
assert_equal canonical, Marcel::MimeType.for(declared_type: aliased)
26+
end
2227
end
File renamed without changes.
File renamed without changes.

test/illustrator_test.rb

+5
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
require 'rack'
33

44
class Marcel::MimeType::IllustratorTest < Marcel::TestCase
5+
test ".ai uploaded as application/illustrator" do
6+
file = files("name/application/illustrator/illustrator.ai")
7+
assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/illustrator")
8+
end
9+
510
test ".ai uploaded as application/postscript" do
611
file = files("name/application/illustrator/illustrator.ai")
712
assert_equal "application/illustrator", Marcel::MimeType.for(file, name: "illustrator.ai", declared_type: "application/postscript")

test/magic_and_declared_type_test.rb

+7-1
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,14 @@
33

44
class Marcel::MimeType::MagicAndDeclaredTypeTest < Marcel::TestCase
55
each_content_type_fixture('name') do |file, name, content_type|
6-
test "correctly returns #{content_type} for #{name} given both file and declared type" do
6+
test "detects #{content_type} given magic bytes from #{name} and declared type" do
77
assert_equal content_type, Marcel::MimeType.for(file, declared_type: content_type)
88
end
9+
10+
ALIASED[content_type].each do |aliased|
11+
test "detects #{content_type} given magic bytes from #{name} and aliased type #{aliased}" do
12+
assert_equal content_type, Marcel::MimeType.for(file, declared_type: aliased)
13+
end
14+
end
915
end
1016
end

test/magic_and_name_test.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ class Marcel::MimeType::MagicAndNameTest < Marcel::TestCase
66
# the file contents and the name. In some cases, the file contents will point to a
77
# generic type, while the name will choose a more specific subclass
88
each_content_type_fixture('name') do |file, name, content_type|
9-
test "correctly returns #{content_type} for #{name} given both file and name" do
9+
test "detects #{content_type} given filename #{name} and its magic bytes" do
1010
assert_equal content_type, Marcel::MimeType.for(file, name: name)
1111
end
1212
end

0 commit comments

Comments
 (0)