summaryrefslogtreecommitdiff
path: root/parser/html/nsHtml5Highlighter.h
blob: e9474869e68f3735975274bcf6c67d2cf4a04470 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef nsHtml5Highlighter_h
#define nsHtml5Highlighter_h

#include "nsCOMPtr.h"
#include "nsHtml5TreeOperation.h"
#include "nsHtml5UTF16Buffer.h"
#include "nsHtml5TreeOperation.h"
#include "nsAHtml5TreeOpSink.h"

#define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512

/**
 * A state machine for generating HTML for display in View Source based on
 * the transitions the tokenizer makes on the source being viewed.
 */
class nsHtml5Highlighter
{
  public:
    /**
     * The constructor.
     *
     * @param aOpSink the sink for the tree ops generated by this highlighter
     */
    explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);

    /**
     * The destructor.
     */
    ~nsHtml5Highlighter();

    /**
     * Starts the generated document.
     */
    void Start(const nsAutoString& aTitle);

    /**
     * Report a tokenizer state transition.
     *
     * @param aState the state being transitioned to
     * @param aReconsume whether this is a reconsuming transition
     * @param aPos the tokenizer's current position into the buffer
     */
    int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);

    /**
     * Report end of file.
     */
    void End();

    /**
     * Set the current buffer being tokenized
     */
    void SetBuffer(nsHtml5UTF16Buffer* aBuffer);

    /**
     * Let go of the buffer being tokenized but first, flush text from it.
     *
     * @param aPos the first UTF-16 code unit not to flush
     */
    void DropBuffer(int32_t aPos);

    /**
     * Flush the tree ops into the sink.
     *
     * @return true if there were ops to flush
     */
    bool FlushOps();

    /**
     * Linkify the current attribute value if the attribute name is one of
     * known URL attributes. (When executing tree ops, javascript: URLs will
     * not be linkified, though.)
     *
     * @param aName the name of the attribute
     * @param aValue the value of the attribute
     */
    void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
                                    nsString* aValue);

    /**
     * Inform the highlighter that the tokenizer successfully completed a
     * named character reference.
     */
    void CompletedNamedCharacterReference();

    /**
     * Adds an error annotation to the node that's currently on top of
     * mStack.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentNode(const char* aMsgId);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recently opened markup declaration/tag span, character reference or
     * run of text.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentRun(const char* aMsgId);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recently opened markup declaration/tag span, character reference or
     * run of text with one atom to use when formatting the message.
     *
     * @param aMsgId the id of the message in the property file
     * @param aName the atom
     */
    void AddErrorToCurrentRun(const char* aMsgId, nsIAtom* aName);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recently opened markup declaration/tag span, character reference or
     * run of text with two atoms to use when formatting the message.
     *
     * @param aMsgId the id of the message in the property file
     * @param aName the first atom
     * @param aOther the second atom
     */
    void AddErrorToCurrentRun(const char* aMsgId,
                              nsIAtom* aName,
                              nsIAtom* aOther);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recent potentially character reference-starting ampersand.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentAmpersand(const char* aMsgId);

    /**
     * Adds an error annotation to the node that corresponds to the most
     * recent potentially self-closing slash.
     *
     * @param aMsgId the id of the message in the property file
     */
    void AddErrorToCurrentSlash(const char* aMsgId);
    
    /**
     * Enqueues a tree op for adding base to the urls with the view-source:
     *
     * @param aValue the base URL to add
     */
    void AddBase(const nsString& aValue);

  private:

    /**
     * Starts a span with no class.
     */
    void StartSpan();

    /**
     * Starts a <span> and sets the class attribute on it.
     *
     * @param aClass the class to set (MUST be a static string that does not
     *        need to be released!)
     */
    void StartSpan(const char16_t* aClass);

    /**
     * End the current <span> or <a> in the highlighter output.
     */
    void EndSpanOrA();

    /**
     * Starts a wrapper around a run of characters.
     */
    void StartCharacters();

    /**
     * Ends a wrapper around a run of characters.
     */
    void EndCharactersAndStartMarkupRun();

    /**
     * Starts an <a>.
     */
    void StartA();

    /**
     * Flushes characters up to but not including the current one.
     */
    void FlushChars();

    /**
     * Flushes characters up to and including the current one.
     */
    void FlushCurrent();

    /**
     * Finishes highlighting a tag in the input data by closing the open
     * <span> and <a> elements in the highlighter output and then starts
     * another <span> for potentially highlighting characters potentially
     * appearing next.
     */
    void FinishTag();

    /**
     * Adds a class attribute to the current node.
     *
     * @param aClass the class to set (MUST be a static string that does not
     *        need to be released!)
     */
    void AddClass(const char16_t* aClass);

    /**
     * Allocates a handle for an element.
     *
     * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
     * in nsHtml5TreeBuilderHSupplement.h.
     *
     * @return the handle
     */
    nsIContent** AllocateContentHandle();

    /**
     * Enqueues an element creation tree operation.
     *
     * @param aName the name of the element
     * @param aAttributes the attribute holder (ownership will be taken) or
     *        nullptr for no attributes
     * @param aIntendedParent the intended parent node for the created element
     * @return the handle for the element that will be created
     */
    nsIContent** CreateElement(nsIAtom* aName,
                               nsHtml5HtmlAttributes* aAttributes,
                               nsIContent** aIntendedParent);

    /**
     * Gets the handle for the current node. May be called only after the
     * root element has been set.
     *
     * @return the handle for the current node
     */
    nsIContent** CurrentNode();

    /**
     * Create an element and push it (its handle) on the stack.
     *
     * @param aName the name of the element
     * @param aAttributes the attribute holder (ownership will be taken) or
     *        nullptr for no attributes
     */
    void Push(nsIAtom* aName, nsHtml5HtmlAttributes* aAttributes);

    /**
     * Pops the current node off the stack.
     */
    void Pop();

    /**
     * Appends text content to the current node.
     *
     * @param aBuffer the buffer to copy from
     * @param aStart the index of the first code unit to copy
     * @param aLength the number of code units to copy
     */
    void AppendCharacters(const char16_t* aBuffer,
                          int32_t aStart,
                          int32_t aLength);

    /**
     * Enqueues a tree op for adding an href attribute with the view-source:
     * URL scheme to the current node.
     *
     * @param aValue the (potentially relative) URL to link to
     */
    void AddViewSourceHref(const nsString& aValue);
    
    /**
     * The state we are transitioning away from.
     */
    int32_t mState;

    /**
     * The index of the first UTF-16 code unit in mBuffer that hasn't been
     * flushed yet.
     */
    int32_t mCStart;

    /**
     * The position of the code unit in mBuffer that caused the current
     * transition.
     */
    int32_t mPos;

    /**
     * The current line number.
     */
    int32_t mLineNumber;

    /**
     * The number of inline elements open inside the <pre> excluding the
     * span potentially wrapping a run of characters.
     */
    int32_t mInlinesOpen;

    /**
     * Whether there's a span wrapping a run of characters (excluding CDATA
     * section) open.
     */
    bool mInCharacters;

    /**
     * The current buffer being tokenized.
     */
    nsHtml5UTF16Buffer* mBuffer;

    /**
     * The outgoing tree op queue.
     */
    nsTArray<nsHtml5TreeOperation> mOpQueue;

    /**
     * The tree op stage for the tree op executor.
     */
    nsAHtml5TreeOpSink* mOpSink;

    /**
     * The most recently opened markup declaration/tag or run of characters.
     */
    nsIContent** mCurrentRun;

    /**
     * The most recent ampersand in a place where character references were
     * allowed.
     */
    nsIContent** mAmpersand;

    /**
     * The most recent slash that might become a self-closing slash.
     */
    nsIContent** mSlash;

    /**
     * Memory for element handles.
     */
    mozilla::UniquePtr<nsIContent*[]> mHandles;

    /**
     * Number of handles used in mHandles
     */
    int32_t mHandlesUsed;

    /**
     * A holder for old contents of mHandles
     */
    nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;

    /**
     * The element stack.
     */
    nsTArray<nsIContent**> mStack;

    /**
     * The string "comment"
     */
    static char16_t sComment[];

    /**
     * The string "cdata"
     */
    static char16_t sCdata[];

    /**
     * The string "start-tag"
     */
    static char16_t sStartTag[];

    /**
     * The string "attribute-name"
     */
    static char16_t sAttributeName[];

    /**
     * The string "attribute-value"
     */
    static char16_t sAttributeValue[];

    /**
     * The string "end-tag"
     */
    static char16_t sEndTag[];

    /**
     * The string "doctype"
     */
    static char16_t sDoctype[];

    /**
     * The string "entity"
     */
    static char16_t sEntity[];

    /**
     * The string "pi"
     */
    static char16_t sPi[];
    
    /**
     * Whether base is already visited once.
     */
     bool mSeenBase;
};

#endif // nsHtml5Highlighter_h