summaryrefslogtreecommitdiff
path: root/parser/html/nsIParserUtils.idl
blob: 27306df2534b34610440758d465b1a3bb6501baa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsISupports.idl"

interface nsIDOMElement;
interface nsIDOMDocumentFragment;
interface nsIURI;

/**
 * Non-Web HTML parser functionality to extensions and applications. 
 * Don't use this from within Gecko--use nsContentUtils, nsTreeSanitizer, etc.
 * directly instead.
 */
[scriptable, uuid(a1101145-0025-411e-8873-fdf57bf28128)]
interface nsIParserUtils : nsISupports
{

  /**
   * Flag for sanitizer: Allow comment nodes.
   */
  const unsigned long SanitizerAllowComments = (1 << 0);

  /**
   * Flag for sanitizer: Allow <style> and style="" (with contents sanitized
   * in case of -moz-binding). Note! If -moz-binding is absent, properties
   * that might be XSS risks in other Web engines are preserved!
   */
  const unsigned long SanitizerAllowStyle = (1 << 1);

  /**
   * Flag for sanitizer: Only allow cid: URLs for embedded content.
   *
   * At present, sanitizing CSS backgrounds, etc., is not supported, so setting 
   * this together with SanitizerAllowStyle doesn't make sense.
   *
   * At present, sanitizing CSS syntax in SVG presentational attributes is not
   * supported, so this option flattens out SVG.
   */
  const unsigned long SanitizerCidEmbedsOnly = (1 << 2);

  /**
   * Flag for sanitizer: Drop non-CSS presentational HTML elements and 
   * attributes, such as <font>, <center> and bgcolor="".
   */
  const unsigned long SanitizerDropNonCSSPresentation = (1 << 3);

  /**
   * Flag for sanitizer: Drop forms and form controls (excluding 
   * fieldset/legend).
   */
  const unsigned long SanitizerDropForms = (1 << 4);

  /**
   * Flag for sanitizer: Drop <img>, <video>, <audio> and <source> and flatten
   * out SVG.
   */
  const unsigned long SanitizerDropMedia = (1 << 5);

  /**
   * Parses a string into an HTML document, sanitizes the document and 
   * returns the result serialized to a string.
   *
   * The sanitizer is designed to protect against XSS when sanitized content
   * is inserted into a different-origin context without an iframe-equivalent
   * sandboxing mechanism.
   *
   * By default, the sanitizer doesn't try to avoid leaking information that 
   * the content was viewed to third parties. That is, by default, e.g. 
   * <img src> pointing to an HTTP server potentially controlled by a third 
   * party is not removed. To avoid ambient information leakage upon loading
   * the sanitized content, use the SanitizerInternalEmbedsOnly flag. In that 
   * case, <a href> links (and similar) to other content are preserved, so an
   * explicit user action (following a link) after the content has been loaded
   * can still leak information.
   *
   * By default, non-dangerous non-CSS presentational HTML elements and 
   * attributes or forms are not removed. To remove these, use 
   * SanitizerDropNonCSSPresentation and/or SanitizerDropForms.
   *
   * By default, comments and CSS is removed. To preserve comments, use
   * SanitizerAllowComments. To preserve <style> and style="", use 
   * SanitizerAllowStyle. -moz-binding is removed from <style> and style="" if
   * present. In this case, properties that Gecko doesn't recognize can get 
   * removed as a side effect. Note! If -moz-binding is not present, <style>
   * and style="" and SanitizerAllowStyle is specified, the sanitized content
   * may still be XSS dangerous if loaded into a non-Gecko Web engine!
   *
   * @param src the HTML source to parse (C++ callers are allowed but not
   *            required to use the same string for the return value.)
   * @param flags sanitization option flags defined above
   */
  AString sanitize(in AString src, in unsigned long flags);

  /**
   * Convert HTML to plain text.
   *
   * @param src the HTML source to parse (C++ callers are allowed but not
   *            required to use the same string for the return value.)
   * @param flags conversion option flags defined in nsIDocumentEncoder
   * @param wrapCol number of characters per line; 0 for no auto-wrapping
   */
  AString convertToPlainText(in AString src,
                             in unsigned long flags,
                             in unsigned long wrapCol);

  /**
   * Parses markup into a sanitized document fragment.
   *
   * @param fragment the input markup
   * @param flags sanitization option flags defined above
   * @param isXML true if |fragment| is XML and false if HTML
   * @param baseURI the base URL for this fragment
   * @param element the context node for the fragment parsing algorithm
   */
  nsIDOMDocumentFragment parseFragment(in AString fragment,
                                       in unsigned long flags,
                                       in boolean isXML,
                                       in nsIURI baseURI,
                                       in nsIDOMElement element);

};

%{ C++
#define NS_PARSERUTILS_CONTRACTID \
    "@mozilla.org/parserutils;1"
#define NS_PARSERUTILS_CID  \
{ 0xaf7b24cb, 0x893f, 0x41bb, { 0x96, 0x1f, 0x5a, 0x69, 0x38, 0x8e, 0x27, 0xc3 } }
%}