1 files changed, 907 insertions, 0 deletions
diff --git a/mailnews/db/gloda/modules/fundattr.js b/mailnews/db/gloda/modules/fundattr.js
new file mode 100644
index 0000000000..75a424adbc
--- /dev/null
+++ b/mailnews/db/gloda/modules/fundattr.js
@@ -0,0 +1,907 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+this.EXPORTED_SYMBOLS = ['GlodaFundAttr'];
+
+var Cc = Components.classes;
+var Ci = Components.interfaces;
+var Cr = Components.results;
+var Cu = Components.utils;
+
+Cu.import("resource:///modules/gloda/log4moz.js");
+Cu.import("resource:///modules/StringBundle.js");
+
+Cu.import("resource:///modules/gloda/utils.js");
+Cu.import("resource:///modules/gloda/gloda.js");
+Cu.import("resource:///modules/gloda/datastore.js");
+Cu.import("resource:///modules/gloda/datamodel.js"); // for GlodaAttachment
+
+Cu.import("resource:///modules/gloda/noun_mimetype.js");
+Cu.import("resource:///modules/gloda/connotent.js");
+
+/**
+ * @namespace The Gloda Fundamental Attribute provider is a special attribute
+ *  provider; it provides attributes that the rest of the providers should be
+ *  able to assume exist.  Also, it may end up accessing things at a lower level
+ *  than most extension providers should do.  In summary, don't mimic this code
+ *  unless you won't complain when your code breaks.
+ */
+var GlodaFundAttr = {
+  providerName: "gloda.fundattr",
+  strings: new StringBundle("chrome://messenger/locale/gloda.properties"),
+  _log: null,
+
+  init: function gloda_explattr_init() {
+    this._log =  Log4Moz.repository.getLogger("gloda.fundattr");
+
+    try {
+      this.defineAttributes();
+    }
+    catch (ex) {
+      this._log.error("Error in init: " + ex);
+      throw ex;
+    }
+  },
+
+  POPULARITY_FROM_ME_TO: 10,
+  POPULARITY_FROM_ME_CC: 4,
+  POPULARITY_FROM_ME_BCC: 3,
+  POPULARITY_TO_ME: 5,
+  POPULARITY_CC_ME: 1,
+  POPULARITY_BCC_ME: 1,
+
+  /** Boost for messages 'I' sent */
+  NOTABILITY_FROM_ME: 10,
+  /** Boost for messages involving 'me'. */
+  NOTABILITY_INVOLVING_ME: 1,
+  /** Boost for message from someone in 'my' address book. */
+  NOTABILITY_FROM_IN_ADDR_BOOK: 10,
+  /** Boost for the first person involved in my address book. */
+  NOTABILITY_INVOLVING_ADDR_BOOK_FIRST: 8,
+  /** Boost for each additional person involved in my address book. */
+  NOTABILITY_INVOLVING_ADDR_BOOK_ADDL: 2,
+
+  defineAttributes: function gloda_fundattr_defineAttributes() {
+    /* ***** Conversations ***** */
+    // conversation: subjectMatches
+    this._attrConvSubject = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "subjectMatches",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "subject",
+      subjectNouns: [Gloda.NOUN_CONVERSATION],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    /* ***** Messages ***** */
+    // folder
+    this._attrFolder = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "folder",
+      singular: true,
+      facet: true,
+      special: Gloda.kSpecialColumn,
+      specialColumnName: "folderID",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FOLDER,
+      }); // tested-by: test_attributes_fundamental
+    this._attrAccount = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "account",
+      canQuery: "memory",
+      singular: true,
+      facet: true,
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_ACCOUNT
+      });
+    this._attrMessageKey = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "messageKey",
+      singular: true,
+      special: Gloda.kSpecialColumn,
+      specialColumnName: "messageKey",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_NUMBER,
+      canQuery: true,
+      }); // tested-by: test_attributes_fundamental
+
+    // We need to surface the deleted attribute for querying, but there is no
+    //  reason for user code, so let's call it "_deleted" rather than deleted.
+    // (In fact, our validity constraints require a special query formulation
+    //  that user code should have no clue exists.  That's right user code,
+    //  that's a dare.)
+    Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "_deleted",
+      singular: true,
+      special: Gloda.kSpecialColumn,
+      specialColumnName: "deleted",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_NUMBER,
+      });
+
+
+    // -- fulltext search helpers
+    // fulltextMatches.  Match over message subject, body, and attachments
+    // @testpoint gloda.noun.message.attr.fulltextMatches
+    this._attrFulltext = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "fulltextMatches",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "messagesText",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    // subjectMatches.  Fulltext match on subject
+    // @testpoint gloda.noun.message.attr.subjectMatches
+    this._attrSubjectText = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "subjectMatches",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "subject",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    // bodyMatches. super-synthetic full-text matching...
+    // @testpoint gloda.noun.message.attr.bodyMatches
+    this._attrBody = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "bodyMatches",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "body",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    // attachmentNamesMatch
+    // @testpoint gloda.noun.message.attr.attachmentNamesMatch
+    this._attrAttachmentNames = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "attachmentNamesMatch",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "attachmentNames",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    // @testpoint gloda.noun.message.attr.authorMatches
+    this._attrAuthorFulltext = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "authorMatches",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "author",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    // @testpoint gloda.noun.message.attr.recipientsMatch
+    this._attrRecipientsFulltext = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrDerived,
+      attributeName: "recipientsMatch",
+      singular: true,
+      special: Gloda.kSpecialFulltext,
+      specialColumnName: "recipients",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_FULLTEXT,
+      });
+
+    // --- synthetic stuff for some reason
+    // conversation
+    // @testpoint gloda.noun.message.attr.conversation
+    this._attrConversation = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "conversation",
+      singular: true,
+      special: Gloda.kSpecialColumnParent,
+      specialColumnName: "conversationID",
+      idStorageAttributeName: "_conversationID",
+      valueStorageAttributeName: "_conversation",
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_CONVERSATION,
+      canQuery: true,
+      });
+
+    // --- Fundamental
+    // From
+    this._attrFrom = Gloda.defineAttribute({
+                        provider: this,
+                        extensionName: Gloda.BUILT_IN,
+                        attributeType: Gloda.kAttrFundamental,
+                        attributeName: "from",
+                        singular: true,
+                        subjectNouns: [Gloda.NOUN_MESSAGE],
+                        objectNoun: Gloda.NOUN_IDENTITY,
+                        }); // tested-by: test_attributes_fundamental
+    // To
+    this._attrTo = Gloda.defineAttribute({
+                        provider: this,
+                        extensionName: Gloda.BUILT_IN,
+                        attributeType: Gloda.kAttrFundamental,
+                        attributeName: "to",
+                        singular: false,
+                        subjectNouns: [Gloda.NOUN_MESSAGE],
+                        objectNoun: Gloda.NOUN_IDENTITY,
+                        }); // tested-by: test_attributes_fundamental
+    // Cc
+    this._attrCc = Gloda.defineAttribute({
+                        provider: this,
+                        extensionName: Gloda.BUILT_IN,
+                        attributeType: Gloda.kAttrFundamental,
+                        attributeName: "cc",
+                        singular: false,
+                        subjectNouns: [Gloda.NOUN_MESSAGE],
+                        objectNoun: Gloda.NOUN_IDENTITY,
+                        }); // not-tested
+    /**
+     * Bcc'ed recipients; only makes sense for sent messages.
+     */
+    this._attrBcc = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "bcc",
+      singular: false,
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_IDENTITY,
+    }); // not-tested
+
+    // Date.  now lives on the row.
+    this._attrDate = Gloda.defineAttribute({
+                        provider: this,
+                        extensionName: Gloda.BUILT_IN,
+                        attributeType: Gloda.kAttrFundamental,
+                        attributeName: "date",
+                        singular: true,
+                        facet: {
+                          type: "date",
+                        },
+                        special: Gloda.kSpecialColumn,
+                        specialColumnName: "date",
+                        subjectNouns: [Gloda.NOUN_MESSAGE],
+                        objectNoun: Gloda.NOUN_DATE,
+                        }); // tested-by: test_attributes_fundamental
+
+    // Header message ID.
+    this._attrHeaderMessageID = Gloda.defineAttribute({
+                        provider: this,
+                        extensionName: Gloda.BUILT_IN,
+                        attributeType: Gloda.kAttrFundamental,
+                        attributeName: "headerMessageID",
+                        singular: true,
+                        special: Gloda.kSpecialString,
+                        specialColumnName: "headerMessageID",
+                        subjectNouns: [Gloda.NOUN_MESSAGE],
+                        objectNoun: Gloda.NOUN_STRING,
+                        canQuery: true,
+                        }); // tested-by: test_attributes_fundamental
+
+    // Attachment MIME Types
+    this._attrAttachmentTypes = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "attachmentTypes",
+      singular: false,
+      emptySetIsSignificant: true,
+      facet: {
+        type: "default",
+        // This will group the MIME types by their category.
+        groupIdAttr: "category",
+        queryHelper: "Category",
+      },
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_MIME_TYPE,
+      });
+
+    // Attachment infos
+    this._attrIsEncrypted = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "isEncrypted",
+      singular: true,
+      emptySetIsSignificant: false,
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_NUMBER,
+      });
+
+    // Attachment infos
+    this._attrAttachmentInfos = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "attachmentInfos",
+      singular: false,
+      emptySetIsSignificant: false,
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_ATTACHMENT,
+      });
+
+    // --- Optimization
+    /**
+     * Involves means any of from/to/cc/bcc.  The queries get ugly enough
+     *  without this that it seems to justify the cost, especially given the
+     *  frequent use case.  (In fact, post-filtering for the specific from/to/cc
+     *  is probably justifiable rather than losing this attribute...)
+     */
+    this._attrInvolves = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrOptimization,
+      attributeName: "involves",
+      singular: false,
+      facet: {
+        type: "default",
+        /**
+         * Filter out 'me', as we have other facets that deal with that, and the
+         *  'me' identities are so likely that they distort things.
+         *
+         * @return true if the identity is not one of my identities, false if it
+         *   is.
+         */
+        filter: function gloda_explattr_involves_filter(aItem) {
+          return (!(aItem.id in Gloda.myIdentities));
+        }
+      },
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_IDENTITY,
+      }); // not-tested
+
+    /**
+     * Any of to/cc/bcc.
+     */
+    this._attrRecipients = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrOptimization,
+      attributeName: "recipients",
+      singular: false,
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_IDENTITY,
+      }); // not-tested
+
+    // From Me (To/Cc/Bcc)
+    this._attrFromMe = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrOptimization,
+      attributeName: "fromMe",
+      singular: false,
+      // The interesting thing to a facet is whether the message is from me.
+      facet: {
+        type: "nonempty?"
+      },
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_PARAM_IDENTITY,
+      }); // not-tested
+    // To/Cc/Bcc Me
+    this._attrToMe = Gloda.defineAttribute({
+      provider: this,
+      extensionName: Gloda.BUILT_IN,
+      attributeType: Gloda.kAttrFundamental,
+      attributeName: "toMe",
+      // The interesting thing to a facet is whether the message is to me.
+      facet: {
+        type: "nonempty?"
+      },
+      singular: false,
+      subjectNouns: [Gloda.NOUN_MESSAGE],
+      objectNoun: Gloda.NOUN_PARAM_IDENTITY,
+      }); // not-tested
+
+
+    // -- Mailing List
+    // Non-singular, but a hard call.  Namely, it is obvious that a message can
+    //  be addressed to multiple mailing lists.  However, I don't see how you
+    //  could receive a message with more than one set of List-* headers,
+    //  since each list-serve would each send you a copy.  Based on our current
+    //  decision to treat each physical message as separate, it almost seems
+    //  right to limit the list attribute to the copy that originated at the
+    //  list.  That may sound entirely wrong, but keep in mind that until we
+    //  have seen a message from the list with the List headers, we can't
+    //  definitely know it's a mailing list (although heuristics could take us
+    //  pretty far).  As such, the quasi-singular thing is appealing.
+    // Of course, the reality is that we really want to know if a message was
+    //  sent to multiple mailing lists and be able to query on that.
+    //  Additionally, our implicit-to logic needs to work on messages that
+    //  weren't relayed by the list-serve, especially messages sent to the list
+    //  by the user.
+    this._attrList = Gloda.defineAttribute({
+                        provider: this,
+                        extensionName: Gloda.BUILT_IN,
+                        attributeType: Gloda.kAttrFundamental,
+                        attributeName: "mailing-list",
+                        bindName: "mailingLists",
+                        singular: false,
+                        emptySetIsSignificant: true,
+                        facet: true,
+                        subjectNouns: [Gloda.NOUN_MESSAGE],
+                        objectNoun: Gloda.NOUN_IDENTITY,
+                        }); // not-tested, not-implemented
+  },
+
+  RE_LIST_POST: /<mailto:([^>]+)>/,
+
+  /**
+   *
+   * Specializations:
+   * - Mailing Lists.  Replies to a message on a mailing list frequently only
+   *   have the list-serve as the 'to', so we try to generate a synthetic 'to'
+   *   based on the author of the parent message when possible.  (The 'possible'
+   *   part is that we may not have a copy of the parent message at the time of
+   *   processing.)
+   * - Newsgroups.  Same deal as mailing lists.
+   */
+  process: function* gloda_fundattr_process(aGlodaMessage, aRawReps,
+                                            aIsNew, aCallbackHandle) {
+    let aMsgHdr = aRawReps.header;
+    let aMimeMsg = aRawReps.mime;
+
+    // -- From
+    // Let's use replyTo if available.
+    // er, since we are just dealing with mailing lists for now, forget the
+    //  reply-to...
+    // TODO: deal with default charset issues
+    let author = null;
+    /*
+    try {
+      author = aMsgHdr.getStringProperty("replyTo");
+    }
+    catch (ex) {
+    }
+    */
+    if (author == null || author == "")
+      author = aMsgHdr.author;
+
+    let normalizedListPost = "";
+    if (aMimeMsg && aMimeMsg.has("list-post")) {
+      let match = this.RE_LIST_POST.exec(aMimeMsg.get("list-post"));
+      if (match)
+        normalizedListPost = "<" + match[1] + ">";
+    }
+
+    // Do not use the MIME decoded variants of any of the email addresses
+    //  because if name is encoded and has a comma in it, it will break the
+    //  address parser (which already knows how to do the decoding anyways).
+    let [authorIdentities, toIdentities, ccIdentities, bccIdentities,
+         listIdentities] =
+      yield aCallbackHandle.pushAndGo(
+        Gloda.getOrCreateMailIdentities(aCallbackHandle,
+                                        author, aMsgHdr.recipients,
+                                        aMsgHdr.ccList, aMsgHdr.bccList,
+                                        normalizedListPost));
+
+    if (authorIdentities.length != 1) {
+      throw new Gloda.BadItemContentsError(
+        "Message with subject '" + aMsgHdr.mime2DecodedSubject +
+          "' somehow lacks a valid author.  Bailing.");
+    }
+    let authorIdentity = authorIdentities[0];
+    aGlodaMessage.from = authorIdentity;
+
+    // -- To, Cc, Bcc
+    aGlodaMessage.to = toIdentities;
+    aGlodaMessage.cc = ccIdentities;
+    aGlodaMessage.bcc = bccIdentities;
+
+    // -- Mailing List
+    if (listIdentities.length)
+      aGlodaMessage.mailingLists = listIdentities;
+
+    let findIsEncrypted = x =>
+      x.isEncrypted || (x.parts ? x.parts.some(findIsEncrypted) : false);
+
+    // -- Encryption
+    aGlodaMessage.isEncrypted = false;
+    if (aMimeMsg) {
+      aGlodaMessage.isEncrypted = findIsEncrypted(aMimeMsg);
+    }
+
+    // -- Attachments
+    if (aMimeMsg) {
+      // nsParseMailbox.cpp puts the attachment flag on msgHdrs as soon as it
+      // finds a multipart/mixed part. This is a good heuristic, but if it turns
+      // out the part has no filename, then we don't treat it as an attachment.
+      // We just streamed the message, and we have all the information to figure
+      // that out, so now is a good place to clear the flag if needed.
+      let foundRealAttachment = false;
+      let attachmentTypes = [];
+      for (let attachment of aMimeMsg.allAttachments) {
+        // We don't care about would-be attachments that are not user-intended
+        //  attachments but rather artifacts of the message content.
+        // We also want to avoid dealing with obviously bogus mime types.
+        //  (If you don't have a "/", you are probably bogus.)
+        if (attachment.isRealAttachment &&
+            attachment.contentType.includes("/")) {
+          attachmentTypes.push(MimeTypeNoun.getMimeType(attachment.contentType));
+        }
+        if (attachment.isRealAttachment)
+          foundRealAttachment = true;
+      }
+      if (attachmentTypes.length) {
+        aGlodaMessage.attachmentTypes = attachmentTypes;
+      }
+
+      let aMsgHdr = aRawReps.header;
+      let wasStreamed = aMsgHdr &&
+        !aGlodaMessage.isEncrypted &&
+        ((aMsgHdr.flags & Ci.nsMsgMessageFlags.Offline) ||
+        (aMsgHdr.folder instanceof Ci.nsIMsgLocalMailFolder));
+
+      // Clear the flag if it turns out there's no attachment after all and we
+      // streamed completely the message (if we didn't, then we have no
+      // knowledge of attachments, unless bug 673370 is fixed).
+      if (!foundRealAttachment && wasStreamed)
+        aMsgHdr.markHasAttachments(false);
+
+      // This is not the same kind of attachments as above. Now, we want to
+      // provide convenience attributes to Gloda consumers, so that they can run
+      // through the list of attachments of a given message, to possibly build a
+      // visualization on top of it. We still reject bogus mime types, which
+      // means yencode won't be supported. Oh, I feel really bad.
+      let attachmentInfos = [];
+      for (let att of aMimeMsg.allUserAttachments) {
+        attachmentInfos.push(this.glodaAttFromMimeAtt(aRawReps.trueGlodaRep,
+                                                      att));
+      }
+      aGlodaMessage.attachmentInfos = attachmentInfos;
+    }
+
+    // TODO: deal with mailing lists, including implicit-to.  this will require
+    //  convincing the indexer to pass us in the previous message if it is
+    //  available.  (which we'll simply pass to everyone... it can help body
+    //  logic for quoting purposes, etc. too.)
+
+    yield Gloda.kWorkDone;
+  },
+
+  glodaAttFromMimeAtt:
+      function gloda_fundattr_glodaAttFromMimeAtt(aGlodaMessage, aAtt) {
+    // So we don't want to store the URL because it can change over time if
+    // the message is moved. What we do is store the full URL if it's a
+    // detached attachment, otherwise just keep the part information, and
+    // rebuild the URL according to where the message is sitting.
+    let part, externalUrl;
+    if (aAtt.isExternal) {
+      externalUrl = aAtt.url;
+    } else {
+      let matches = aAtt.url.match(GlodaUtils.PART_RE);
+      if (matches && matches.length)
+        part = matches[1];
+      else
+        this._log.error("Error processing attachment: " + aAtt.url);
+    }
+    return new GlodaAttachment(aGlodaMessage,
+                               aAtt.name,
+                               aAtt.contentType,
+                               aAtt.size,
+                               part,
+                               externalUrl,
+                               aAtt.isExternal);
+  },
+
+  optimize: function* gloda_fundattr_optimize(aGlodaMessage, aRawReps,
+                                              aIsNew, aCallbackHandle) {
+
+    let aMsgHdr = aRawReps.header;
+
+    // for simplicity this is used for both involves and recipients
+    let involvesIdentities = {};
+    let involves = aGlodaMessage.involves || [];
+    let recipients = aGlodaMessage.recipients || [];
+
+    // 'me' specialization optimizations
+    let toMe = aGlodaMessage.toMe || [];
+    let fromMe = aGlodaMessage.fromMe || [];
+
+    let myIdentities = Gloda.myIdentities; // needless optimization?
+    let authorIdentity = aGlodaMessage.from;
+    let isFromMe = authorIdentity.id in myIdentities;
+
+    // The fulltext search column for the author.  We want to have in here:
+    // - The e-mail address and display name as enclosed on the message.
+    // - The name per the address book card for this e-mail address, if we have
+    //   one.
+    aGlodaMessage._indexAuthor = aMsgHdr.mime2DecodedAuthor;
+    // The fulltext search column for the recipients. (same deal)
+    aGlodaMessage._indexRecipients = aMsgHdr.mime2DecodedRecipients;
+
+    if (isFromMe)
+      aGlodaMessage.notability += this.NOTABILITY_FROM_ME;
+    else {
+      let authorCard = authorIdentity.abCard;
+      if (authorCard) {
+        aGlodaMessage.notability += this.NOTABILITY_FROM_IN_ADDR_BOOK;
+        // @testpoint gloda.noun.message.attr.authorMatches
+        aGlodaMessage._indexAuthor += ' ' + authorCard.displayName;
+      }
+    }
+
+    involves.push(authorIdentity);
+    involvesIdentities[authorIdentity.id] = true;
+
+    let involvedAddrBookCount = 0;
+
+    for (let toIdentity of aGlodaMessage.to) {
+      if (!(toIdentity.id in involvesIdentities)) {
+        involves.push(toIdentity);
+        recipients.push(toIdentity);
+        involvesIdentities[toIdentity.id] = true;
+        let toCard = toIdentity.abCard;
+        if (toCard) {
+          involvedAddrBookCount++;
+          // @testpoint gloda.noun.message.attr.recipientsMatch
+          aGlodaMessage._indexRecipients += ' ' + toCard.displayName;
+        }
+      }
+
+      // optimization attribute to-me ('I' am the parameter)
+      if (toIdentity.id in myIdentities) {
+        toMe.push([toIdentity, authorIdentity]);
+        if (aIsNew)
+          authorIdentity.contact.popularity += this.POPULARITY_TO_ME;
+      }
+      // optimization attribute from-me-to ('I' am the parameter)
+      if (isFromMe) {
+        fromMe.push([authorIdentity, toIdentity]);
+        // also, popularity
+        if (aIsNew)
+          toIdentity.contact.popularity += this.POPULARITY_FROM_ME_TO;
+      }
+    }
+    for (let ccIdentity of aGlodaMessage.cc) {
+      if (!(ccIdentity.id in involvesIdentities)) {
+        involves.push(ccIdentity);
+        recipients.push(ccIdentity);
+        involvesIdentities[ccIdentity.id] = true;
+        let ccCard = ccIdentity.abCard;
+        if (ccCard) {
+          involvedAddrBookCount++;
+          // @testpoint gloda.noun.message.attr.recipientsMatch
+          aGlodaMessage._indexRecipients += ' ' + ccCard.displayName;
+        }
+      }
+      // optimization attribute cc-me ('I' am the parameter)
+      if (ccIdentity.id in myIdentities) {
+        toMe.push([ccIdentity, authorIdentity]);
+        if (aIsNew)
+          authorIdentity.contact.popularity += this.POPULARITY_CC_ME;
+      }
+      // optimization attribute from-me-to ('I' am the parameter)
+      if (isFromMe) {
+        fromMe.push([authorIdentity, ccIdentity]);
+        // also, popularity
+        if (aIsNew)
+          ccIdentity.contact.popularity += this.POPULARITY_FROM_ME_CC;
+      }
+    }
+    // just treat bcc like cc; the intent is the same although the exact
+    //  semantics differ.
+    for (let bccIdentity of aGlodaMessage.bcc) {
+      if (!(bccIdentity.id in involvesIdentities)) {
+        involves.push(bccIdentity);
+        recipients.push(bccIdentity);
+        involvesIdentities[bccIdentity.id] = true;
+        let bccCard = bccIdentity.abCard;
+        if (bccCard) {
+          involvedAddrBookCount++;
+          // @testpoint gloda.noun.message.attr.recipientsMatch
+          aGlodaMessage._indexRecipients += ' ' + bccCard.displayName;
+        }
+      }
+      // optimization attribute cc-me ('I' am the parameter)
+      if (bccIdentity.id in myIdentities) {
+        toMe.push([bccIdentity, authorIdentity]);
+        if (aIsNew)
+          authorIdentity.contact.popularity += this.POPULARITY_BCC_ME;
+      }
+      // optimization attribute from-me-to ('I' am the parameter)
+      if (isFromMe) {
+        fromMe.push([authorIdentity, bccIdentity]);
+        // also, popularity
+        if (aIsNew)
+          bccIdentity.contact.popularity += this.POPULARITY_FROM_ME_BCC;
+      }
+    }
+
+    if (involvedAddrBookCount)
+      aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST +
+        (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL;
+
+    aGlodaMessage.involves = involves;
+    aGlodaMessage.recipients = recipients;
+    if (toMe.length) {
+      aGlodaMessage.toMe = toMe;
+      aGlodaMessage.notability += this.NOTABILITY_INVOLVING_ME;
+    }
+    if (fromMe.length)
+      aGlodaMessage.fromMe = fromMe;
+
+    // Content
+    if (aRawReps.bodyLines) {
+      aGlodaMessage._content = aRawReps.content = new GlodaContent();
+      if (this.contentWhittle({}, aRawReps.bodyLines, aGlodaMessage._content)) {
+        // we were going to do something here?
+      }
+    }
+    else {
+      aRawReps.content = null;
+    }
+
+    yield Gloda.kWorkDone;
+  },
+
+  /**
+   * Duplicates the notability logic from optimize().  Arguably optimize should
+   *  be factored to call us, grokNounItem should be factored to call us, or we
+   *  should get sufficiently fancy that our code wildly diverges.
+   */
+  score: function gloda_fundattr_score(aMessage, aContext) {
+    let score = 0;
+
+    let authorIdentity = aMessage.from;
+    if (authorIdentity.id in Gloda.myIdentities)
+      score += this.NOTABILITY_FROM_ME;
+    else if (authorIdentity.inAddressBook)
+      score += this.NOTABILITY_FROM_IN_ADDR_BOOK;
+    if (aMessage.toMe)
+      score += this.NOTABILITY_INVOLVING_ME;
+
+    let involvedAddrBookCount = 0;
+    for (let [, identity] in Iterator(aMessage.to))
+      if (identity.inAddressBook)
+        involvedAddrBookCount++;
+    for (let [, identity] in Iterator(aMessage.cc))
+      if (identity.inAddressBook)
+        involvedAddrBookCount++;
+    if (involvedAddrBookCount)
+      score += this.NOTABILITY_INVOLVING_ADDR_BOOK_FIRST +
+        (involvedAddrBookCount - 1) * this.NOTABILITY_INVOLVING_ADDR_BOOK_ADDL;
+    return score;
+  },
+
+  _countQuoteDepthAndNormalize:
+    function gloda_fundattr__countQuoteDepthAndNormalize(aLine) {
+    let count = 0;
+    let lastStartOffset = 0;
+
+    for (let i = 0; i < aLine.length; i++) {
+      let c = aLine[i];
+      if (c == ">") {
+        count++;
+        lastStartOffset = i+1;
+      }
+      else if (c == " ") {
+      }
+      else {
+        return [count,
+                lastStartOffset ? aLine.substring(lastStartOffset) : aLine];
+      }
+    }
+
+    return [count, lastStartOffset ? aLine.substring(lastStartOffset) : aLine];
+  },
+
+  /**
+   * Attempt to understand simple quoting constructs that use ">" with
+   * obvious phrases to enter the quoting block.  No support for other types
+   * of quoting at this time.  Also no support for piercing the wrapper of
+   * forwarded messages to actually be the content of the forwarded message.
+   */
+  contentWhittle: function gloda_fundattr_contentWhittle(aMeta,
+      aBodyLines, aContent) {
+    if (!aContent.volunteerContent(aContent.kPriorityBase))
+      return false;
+
+    // duplicate the list; we mutate somewhat...
+    let bodyLines = aBodyLines.concat();
+
+    // lastNonBlankLine originally was just for detecting quoting idioms where
+    //  the "wrote" line was separated from the quoted block by a blank line.
+    // Now we also use it for whitespace suppression at the boundaries of
+    //  quoted and un-quoted text.  (We keep blank lines within the same
+    //  'block' of quoted or non-quoted text.)
+    // Because we now have two goals for it, and we still want to suppress blank
+    //  lines when there is a 'wrote' line involved, we introduce...
+    //  prevLastNonBlankLine!  This arguably suggests refactoring should be the
+    //  next step, but things work for now.
+    let rangeStart = 0, lastNonBlankLine = null, prevLastNonBlankLine = null;
+    let inQuoteDepth = 0;
+    for (let [iLine, line] of bodyLines.entries()) {
+      if (!line || (line == "\xa0")) /* unicode non breaking space */
+        continue;
+
+      if (line.startsWith(">")) {
+        if (!inQuoteDepth) {
+          let rangeEnd = iLine - 1;
+          let quoteRangeStart = iLine;
+          // see if the last non-blank-line was a lead-in...
+          if (lastNonBlankLine != null) {
+            // TODO: localize quote range start detection
+            if (aBodyLines[lastNonBlankLine].includes("wrote")) {
+              quoteRangeStart = lastNonBlankLine;
+              rangeEnd = lastNonBlankLine - 1;
+              // we 'used up' lastNonBlankLine, let's promote the prev guy to
+              //  be the new lastNonBlankLine for the next logic block
+              lastNonBlankLine = prevLastNonBlankLine;
+            }
+            // eat the trailing whitespace...
+            if (lastNonBlankLine != null)
+              rangeEnd = Math.min(rangeEnd, lastNonBlankLine);
+          }
+          if (rangeEnd >= rangeStart)
+            aContent.content(aBodyLines.slice(rangeStart, rangeEnd+1));
+
+          [inQuoteDepth, line] = this._countQuoteDepthAndNormalize(line);
+          bodyLines[iLine] = line;
+          rangeStart = quoteRangeStart;
+        }
+        else {
+          let curQuoteDepth;
+          [curQuoteDepth, line] = this._countQuoteDepthAndNormalize(line);
+          bodyLines[iLine] = line;
+
+          if (curQuoteDepth != inQuoteDepth) {
+            // we could do some "wrote" compensation here, but it's not really
+            //  as important.  let's wait for a more clever algorithm.
+            aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth);
+            inQuoteDepth = curQuoteDepth;
+            rangeStart = iLine;
+          }
+        }
+      }
+      else {
+        if (inQuoteDepth) {
+          aContent.quoted(aBodyLines.slice(rangeStart, iLine), inQuoteDepth);
+          inQuoteDepth = 0;
+          rangeStart = iLine;
+        }
+      }
+
+      prevLastNonBlankLine = lastNonBlankLine;
+      lastNonBlankLine = iLine;
+    }
+
+    if (inQuoteDepth) {
+      aContent.quoted(aBodyLines.slice(rangeStart), inQuoteDepth);
+    }
+    else {
+      aContent.content(aBodyLines.slice(rangeStart, lastNonBlankLine+1));
+    }
+
+    return true;
+  },
+};