// Copyright 2011 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.enterprise.adaptor;

import org.w3c.dom.*;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;


/** Makes XML metadata-and-url feed file from DocIds.
  This code is based on information provided by Google at
  http://code.google.com/apis/searchappliance/documentation/64/feedsguide.html
 */
class GsaFeedFileMaker {
  // DateFormats are relatively expensive to create, and cannot be used from
  // multiple threads
  private static ThreadLocal<DateFormat> rfc822Format
      = new ThreadLocal<DateFormat>() {
        @Override
        protected DateFormat initialValue() {
          DateFormat df = new SimpleDateFormat(
              "EEE, dd MMM yyyy HH:mm:ss Z", Locale.ENGLISH);
          df.setTimeZone(TimeZone.getTimeZone("GMT"));
          return df;
        }
      };

  private final DocIdEncoder idEncoder;
  private final boolean separateClosingRecordTagWorkaround;
  private final boolean useAuthMethodWorkaround;

  public GsaFeedFileMaker(DocIdEncoder encoder) {
    this(encoder, false, false);
  }

  public GsaFeedFileMaker(DocIdEncoder encoder,
      boolean separateClosingRecordTagWorkaround,
      boolean useAuthMethodWorkaround) {
    this.idEncoder = encoder;
    this.separateClosingRecordTagWorkaround
        = separateClosingRecordTagWorkaround;
    this.useAuthMethodWorkaround = useAuthMethodWorkaround;
  }

  /** Adds header to document's root.
      @param srcName Used as datasource name. */
  private void constructMetadataAndUrlFeedFileHead(Document doc,
      Element root, String srcName) {
    Comment comment = doc.createComment("GSA EasyConnector");
    root.appendChild(comment);
    Element header = doc.createElement("header");
    root.appendChild(header);
    Element datasource = doc.createElement("datasource");
    header.appendChild(datasource);
    Element feedtype = doc.createElement("feedtype");
    header.appendChild(feedtype);
    Text srcText = doc.createTextNode(srcName);
    datasource.appendChild(srcText);
    Text feedText = doc.createTextNode("metadata-and-url");
    feedtype.appendChild(feedText);
  }

  /** Adds a single record to feed-file-document's group,
      communicating the information represented by DocId. */
  private void constructSingleMetadataAndUrlFeedFileRecord(
      Document doc, Element group, DocIdPusher.Record docRecord) {
    DocId docForGsa = docRecord.getDocId();
    Element record = doc.createElement("record");
    group.appendChild(record);
    record.setAttribute("url", "" + idEncoder.encodeDocId(docForGsa));
    // We are no longer automatically clearing the displayurl if unset. We are
    // moving the setting of displayurl to crawl-time and we don't want a lister
    // and retriever to fight.
    if (null != docRecord.getResultLink()) {
      record.setAttribute("displayurl", "" + docRecord.getResultLink());
    }
    if (docRecord.isToBeDeleted()) {
      record.setAttribute("action", "delete");
    }
    record.setAttribute("mimetype", "text/plain"); // Required but ignored :)
    if (null != docRecord.getLastModified()) {
      String dateStr = rfc822Format.get().format(docRecord.getLastModified());
      record.setAttribute("last-modified", dateStr);
    }
    if (docRecord.isToBeLocked()) {
      record.setAttribute("lock", "true");
    }
    if (docRecord.isToBeCrawledImmediately()) {
      record.setAttribute("crawl-immediately", "true");
    }
    if (docRecord.isToBeCrawledOnce()) {
      record.setAttribute("crawl-once", "true");
    }
    if (useAuthMethodWorkaround) {
      record.setAttribute("authmethod", "httpsso");
    }
    // TODO(pjo): record.setAttribute(no-follow,);

    if (separateClosingRecordTagWorkaround) {
      // GSA 6.14 has a feed parsing bug (fixed in patch 2) that fails to parse
      // self-closing record tags. Thus, here we force record to have a separate
      // close tag.
      record.appendChild(doc.createTextNode(" "));
    }
  }

  /**
   * Adds a single ACL tag to the provided group, communicating the named
   * resource's information provided in {@code docAcl}.
   */
  private void constructSingleMetadataAndUrlFeedFileAcl(
      Document doc, Element group, DocIdSender.AclItem docAcl) {
    Element aclElement = doc.createElement("acl");
    group.appendChild(aclElement);
    URI uri = idEncoder.encodeDocId(docAcl.getDocId());
    try {
      // Although it is named "fragment", we put the docIdFragment in the query
      // portion of the URI because the GSA removes fragments when it
      // "normalizes" the identifier.
      uri = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(),
          docAcl.getDocIdFragment(), null);
    } catch (URISyntaxException ex) {
      throw new AssertionError(ex);
    }
    aclElement.setAttribute("url", uri.toString());
    Acl acl = docAcl.getAcl();
    if (acl.getInheritFrom() != null) {
      URI inheritFrom = idEncoder.encodeDocId(acl.getInheritFrom());
      try {
        // Although it is named "fragment", we use a query parameter because the
        // GSA "normalizes" away fragments.
        inheritFrom = new URI(inheritFrom.getScheme(),
            inheritFrom.getAuthority(), inheritFrom.getPath(),
            acl.getInheritFromFragment(), null);
      } catch (URISyntaxException ex) {
        throw new AssertionError(ex);
      }
      aclElement.setAttribute("inherit-from", inheritFrom.toString());
    }
    if (acl.getInheritanceType() != Acl.InheritanceType.LEAF_NODE) {
      aclElement.setAttribute("inheritance-type",
          acl.getInheritanceType().getCommonForm());
    }
    boolean noCase = acl.isEverythingCaseInsensitive();
    for (UserPrincipal permitUser : acl.getPermitUsers()) {
      constructMetadataAndUrlPrincipal(doc, aclElement, "permit",
          permitUser, noCase);
    }
    for (GroupPrincipal permitGroup : acl.getPermitGroups()) {
      constructMetadataAndUrlPrincipal(doc, aclElement, "permit",
          permitGroup, noCase);
    }
    for (UserPrincipal denyUser : acl.getDenyUsers()) {
      constructMetadataAndUrlPrincipal(doc, aclElement, "deny",
          denyUser, noCase);
    }
    for (GroupPrincipal denyGroup : acl.getDenyGroups()) {
      constructMetadataAndUrlPrincipal(doc, aclElement, "deny",
          denyGroup, noCase);
    }
  }

  private void constructMetadataAndUrlPrincipal(Document doc, Element acl,
      String access, Principal principal, boolean everythingCaseInsensitive) {
    String scope = principal.isUser() ? "user" : "group";
    Element principalElement = doc.createElement("principal");
    principalElement.setAttribute("scope", scope);
    principalElement.setAttribute("access", access);
    if (!Principal.DEFAULT_NAMESPACE.equals(principal.getNamespace())) {
      principalElement.setAttribute("namespace", principal.getNamespace());
    }
    if (everythingCaseInsensitive) {
      principalElement.setAttribute(
          "case-sensitivity-type", "everything-case-insensitive");
    }
    principalElement.appendChild(doc.createTextNode(principal.getName()));
    acl.appendChild(principalElement);
  }

  /** Adds all the DocIds into feed-file-document one record
    at a time. */
  private void constructMetadataAndUrlFeedFileBody(Document doc,
      Element root, List<? extends DocIdSender.Item> items) {
    Element group = doc.createElement("group");
    root.appendChild(group);
    for (DocIdSender.Item item : items) {
      if (item instanceof DocIdPusher.Record) {
        constructSingleMetadataAndUrlFeedFileRecord(doc, group,
                                                    (DocIdPusher.Record) item);
      } else if (item instanceof DocIdSender.AclItem) {
        constructSingleMetadataAndUrlFeedFileAcl(doc, group,
                                                 (DocIdSender.AclItem) item);
      } else {
        throw new IllegalArgumentException("Unable to process class: "
                                           + item.getClass().getName());
      }
    }
  }

  /** Puts all DocId into metadata-and-url GSA feed file. */
  private void constructMetadataAndUrlFeedFile(Document doc,
      String srcName, List<? extends DocIdSender.Item> items) {
    Element root = doc.createElement("gsafeed");
    doc.appendChild(root);
    constructMetadataAndUrlFeedFileHead(doc, root, srcName);
    constructMetadataAndUrlFeedFileBody(doc, root, items);
  }

  /** Makes a Java String from the XML feed-file-document passed in. */
  private String documentToString(Document doc)
      throws TransformerConfigurationException, TransformerException {
    TransformerFactory transfac = TransformerFactory.newInstance();
    Transformer trans = transfac.newTransformer();
    String doctype = "-//Google//DTD GSA Feeds//EN";
    trans.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, doctype);
    trans.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "");
    trans.setOutputProperty(OutputKeys.INDENT, "yes");
    trans.setOutputProperty(OutputKeys.STANDALONE, "no");
    StringWriter sw = new StringWriter();
    StreamResult result = new StreamResult(sw);
    DOMSource source = new DOMSource(doc);
    trans.transform(source, result);
    String xmlString = "" + sw;
    return xmlString;
  }

  /** Makes a metadata-and-url feed file from upto 
     provided DocIds and source name.  Is used by
     GsaCommunicationHandler.pushDocIds(). */
  public String makeMetadataAndUrlXml(String srcName,
      List<? extends DocIdSender.Item> items) {
    try {
      DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
      DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
      Document doc = docBuilder.newDocument();
      constructMetadataAndUrlFeedFile(doc, srcName, items);
      String xmlString = documentToString(doc); 
      return xmlString;
    } catch (TransformerConfigurationException tce) {
      throw new IllegalStateException(tce);
    } catch (TransformerException te) {
      throw new IllegalStateException(te);
    } catch (ParserConfigurationException pce) {
      throw new IllegalStateException(pce);
    }
  }

  /** Creates single group definition of group principal key and members. */
  private void constructSingleMembership(Document doc, Element root,
      GroupPrincipal groupPrincipal, Collection<Principal> members,
      boolean caseSensitiveMembers) {
    Element groupWithDef = doc.createElement("membership");
    root.appendChild(groupWithDef);
    Element groupKey = doc.createElement("principal");
    groupWithDef.appendChild(groupKey);
    groupKey.setAttribute("namespace", groupPrincipal.getNamespace());
    groupKey.setAttribute("scope", "GROUP");
    groupKey.appendChild(doc.createTextNode(groupPrincipal.getName()));
    Element groupDef = doc.createElement("members");
    groupWithDef.appendChild(groupDef);
    for (Principal member : members) {
      Element groupDefElement = doc.createElement("principal");
      groupDefElement.setAttribute("namespace", member.getNamespace());
      String scope = member.isUser() ? "USER" : "GROUP";
      groupDefElement.setAttribute("scope", scope);
      if (caseSensitiveMembers) {
        groupDefElement.setAttribute(
            "case-sensitivity-type", "EVERYTHING_CASE_SENSITIVE");
      } else {
        groupDefElement.setAttribute(
            "case-sensitivity-type", "EVERYTHING_CASE_INSENSITIVE");
      }
      groupDefElement.appendChild(doc.createTextNode(member.getName()));
      groupDef.appendChild(groupDefElement);
    }
  }

  /** Adds all the groups' definitions into body. */
  private <T extends Collection<Principal>> void
      constructGroupsDefinitionsFileBody(Document doc, Element root,
      Collection<Map.Entry<GroupPrincipal, T>> items,
      boolean caseSensitiveMembers) {
    for (Map.Entry<GroupPrincipal, T> group : items) {
      constructSingleMembership(doc, root, group.getKey(), group.getValue(),
          caseSensitiveMembers);
    }
  }

  /** Puts all groups' definitions into document. */
  private <T extends Collection<Principal>> void
      constructGroupsDefinitionsFeedFile(Document doc,
      Collection<Map.Entry<GroupPrincipal, T>> items,
      boolean caseSensitiveMembers) {
    Element root = doc.createElement("xmlgroups");
    doc.appendChild(root);
    Comment comment = doc.createComment("GSA EasyConnector");
    root.appendChild(comment);
    constructGroupsDefinitionsFileBody(doc, root, items, caseSensitiveMembers);
  }

  // This and all the methods it calls with things from 'items' requires the
  // parameter T even though ? would normally suffice. See comment in
  // DocIdSender to learn about the Java limitation causing the need for T.
  /** Makes feed file with groups and their definitions. */
  public <T extends Collection<Principal>> String makeGroupsDefinitionsXml(
      Collection<Map.Entry<GroupPrincipal, T>> items,
      boolean caseSensitiveMembers) {
    try {
      DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
      DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
      Document doc = docBuilder.newDocument();
      constructGroupsDefinitionsFeedFile(doc, items, caseSensitiveMembers);
      String xmlString = documentToString(doc); 
      return xmlString;
    } catch (TransformerConfigurationException tce) {
      throw new IllegalStateException(tce);
    } catch (TransformerException te) {
      throw new IllegalStateException(te);
    } catch (ParserConfigurationException pce) {
      throw new IllegalStateException(pce);
    }
  }
}
