Data URL Header
To extract the specific [<mediatype>][;base64]
portions of Data URLs — per MDN's doc — the following aspects were explored:
Hard-coding a specific list of allowed <type>/<subtype>
into the expression versus hard-coding only the <type>
.
A more complete capture of the *( ";" parameter )
portion — per RFC2397 — returning separately the attribute-value pairs and/or the last base64
portions.
The recommended expression for #28614 would roughly be (pending obvious refinements if will be used):
/^(?:((?:text|application)\/(?:[A-Z][-.0-9A-Z]*)?[A-Z]+)((?:;[A-Z][!%'()*\-.0-9A-Z_~]*=[!%'()*\-.0-9A-Z_~]*)*)(;base64)?),/i;
Note: See the annotated code snippet for more details.
This is expected to work as follows:
matcher.exec('text/javascript,')
NOTE: Assuming
text/javascript;,
to be invalid
[
// 0: valid data-uri head
'text/javascript,',
// 1: mime
'text/javascript',
// 2: attributes
'',
// 3: base64
undefined,
];
matcher.exec('text/javascript;base64,')
NOTE: Assuming
;base64,
andbase64,
to be invalid
[
// 0: valid data-uri head
'text/javascript;base64,',
// 1: mime
'text/javascript',
// 2: attributes
'',
// 3: base64
';base64',
];
matcher.exec('text/javascript;a=b;base64,')
[
// 0: valid data-uri head
'text/javascript;a=b;base64,',
// 1: mime
'text/javascript',
// 2: attributes
';a=b',
// 3: base64
';base64',
];
[
'text/javascript',
'application/json',
'application/wasm'
].flatMap(v => [v, `${v},`, `${v};base64,`, `${v};charset=US-ASCII,`]).reduce((r, v) => (r[v] = /^(?:((?:text|application)\/(?:[A-Z][-.0-9A-Z]*)?[A-Z]+)((?:;[A-Z][!%'()*\-.0-9A-Z_~]*=[!%'()*\-.0-9A-Z_~]*)*)(;base64)?),/i.exec(v),r),{});
result: ({
"text/javascript": null,
"text/javascript,": ["text/javascript,", "text/javascript", "", undefined],
"text/javascript;base64,": ["text/javascript;base64,", "text/javascript", "", ";base64"],
"text/javascript;charset=US-ASCII,": [
"text/javascript;charset=US-ASCII,",
"text/javascript",
";charset=US-ASCII",
undefined
],
"application/json": null,
"application/json,": ["application/json,", "application/json", "", undefined],
"application/json;base64,": ["application/json;base64,", "application/json", "", ";base64"],
"application/json;charset=US-ASCII,": [
"application/json;charset=US-ASCII,",
"application/json",
";charset=US-ASCII",
undefined
],
"application/wasm": null,
"application/wasm,": ["application/wasm,", "application/wasm", "", undefined],
"application/wasm;base64,": ["application/wasm;base64,", "application/wasm", "", ";base64"],
"application/wasm;charset=US-ASCII,": [
"application/wasm;charset=US-ASCII,",
"application/wasm",
";charset=US-ASCII",
undefined
]
})
const createDataURLHeaderMatcher = options => {
const formats = {
...{...options}.formats,
__proto__: null,
'text/javascript': 'module',
'application/json': 'json',
};
const matcher = new RegExp(
`^(?:${
// <MEDIATYPE> data-uri forms only
//
// SEE: https://tools.ietf.org/html/rfc2045#section-5.1
//
`(${
// <TYPE>/<SUBTYPE> "text" or "application" only
/(?:text|application)\/(?:[A-Z][-.0-9A-Z]*)?[A-Z]+/i.source
// Strict alternative: (keyof formats only)
// Object.getOwnPropertyNames(formats)
// .filter(t=>/(?:[A-Z]+)\/(?:[A-Z][-.0-9A-Z]*)?[A-Z]+$/i.test(t))
// .map(t=>t.reaplce(/[\\^$*+?.()|[\]{}]/g, '\\$&'))
// .join('|')
})(${
// <PARAMETER> "safe" uri tokens only (no quotes/spaces)
/(?:;[A-Z][!%'()*\-.0-9A-Z_~]*=[!%'()*\-.0-9A-Z_~]*)*/i.source
// Derived from:
// Array(255)
// .fill(undefined)
// .map((v,i)=>encodeURIComponent(v=String.fromCodePoint(i))===v&&v)
// .filter(Boolean)
})(;base64)?`
}),`,
'i',
);
return matcher;
};