| // Copyright 2008 Google Inc. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package com.google.common.labs.matcher; |
| |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import com.google.common.base.Preconditions; |
| |
| /** |
| * This class parses a Google URL pattern into an immutable representation that |
| * provides equivalent Java regexes, |
| * exact-match patterns and prefix patterns, as appropriate. For a description |
| * of Google URL patterns, see the |
| * documentation in <a |
| * href="http://code.google.com/apis/searchappliance/documentation/50/admin/URL_patterns.html"> |
| * this document</a>. |
| * <p> |
| * All Google URL patterns can be translated into an equivalent Java regex (with |
| * some exceptions and caveats, see below). This class provides access to an |
| * equivalent Java regex through {@link #getUrlRegex()}. |
| * <p> |
| * In addition, the class provides further analysis and special kinds of |
| * patterns, depending on these top-level predicates: |
| * <ul> |
| * <li> {@link #isHostPathType()}: Returns {@code true} if the parsed pattern |
| * is a "host-path" pattern. A "host-path" pattern is a pattern that can be |
| * parsed into two regexes, a host regex and a path regex, such that a subject |
| * URL matches the original URL pattern iff the host portion matches the host |
| * regex and the path portion matches the path regex. If |
| * {@code isHostPathType()} is true, then {@link #getHostRegex()} and |
| * {@link #getPathRegex()} return the corresponding regexes. </li> |
| * <li> {@link #isPathPrefixMatch()}: Returns {@code true} if the parsed |
| * pattern is a "host-path" pattern and the path portion of the pattern is |
| * simply a fixed string that must appear at the beginning of the path. In this |
| * case, {@link #getPathPrefixString()} returns a simple string (not a regex) |
| * that can be matched against the start of the subject URL's path. </li> |
| * <li> {@link #isPathExactMatch()}: Returns {@code true} if if the parsed |
| * pattern is a "host-path" pattern and the path portion of the pattern is an |
| * exact-match string. In this case, {@link #getPathExactString()} returns a |
| * simple string (not a regex) that can be matched exactly against the subject |
| * URL's path. </li> |
| * </ul> |
| * In summary: |
| * <ul> |
| * <li> {@code getUrlRegex()} provides an equivalent Java regex for the entire |
| * pattern. </li> |
| * <li> If {@code isHostPathType()} is true, then, {@code getHostRegex()} and |
| * {@code getPathRegex()} return regexes for the two portions.</li> |
| * <li> If {@code isPrefixPathMatch()} is true, then, |
| * {@code getPrefixPathMatchPattern()} returns a simple string pattern for |
| * prefix match.</li> |
| * <li> If {@code isPathExactMatch()} is true, then, in addition, |
| * {@code getPathExactMatchPattern()} returns a simple string pattern for exact |
| * match. |
| * </ul> |
| * <p> |
| * Note: the "path" portion is the hierarchical part, that is, everything |
| * following the first slash (not the {@code ://}). The "host" portion is |
| * everything before that. For example: for the URL |
| * {@code http://www.example.com/foo/bar}, the protocol-authority portion is |
| * {@code http://www.example.com/} and the file portion is {@code /foo/bar}. |
| * Note the the middle slash appears in both portions. |
| * <p> |
| * A parser is provided to separate a URL string into host and path portions: |
| * {@link AnalyzedUrl}. You can access the host and path portions through |
| * {@link AnalyzedUrl#getHostPart()} and {@link AnalyzedUrl#getPathPart()}. It |
| * is recommended that this parser be used rather than the standard |
| * {@code getHost()} and {@code getPath()} functions of {@link java.net.URL}, |
| * because this class and {@code AnalyzedUrl} share parsing infrastructure and |
| * at present, there is at least one significant difference: |
| * {@code AnalyzedUrl.getPathPart()} includes the leading slash but |
| * {@code java.net.URL.getPath()} does not. TODO: fix this. |
| * <p> |
| * Exceptions and caveats: not all forms of Google URL patterns are currently |
| * supported. At present, these exceptions and special cases apply: |
| * <ul> |
| * <li> {@code www?:} patterns are not supported </li> |
| * <li> {@code regexp:} and {@code regexpCase:} patterns are translated simply |
| * by removing those two prefixes. Thus, the remaining pattern is assumed to be |
| * a Java regex, not a GNU regex (as documented on the <a |
| * href="http://code.google.com/apis/searchappliance/documentation/50/admin/URL_patterns.html"> |
| * reference site</a>). </li> |
| * <li> {@code regexpIgnoreCase:} patterns are handled similarly. In this case, |
| * the prefix is removed and the pattern is enclosed in {@code (?i:}...{@code )}</li> |
| * <li> Exception patterns (patterns with leading {@code -} or {@code +-}) are |
| * not supported.</li> |
| * </ul> |
| */ |
| public class ParsedUrlPattern { |
| |
| private final String urlPattern; |
| private final String urlRegex; |
| |
| private final boolean hostPathType; |
| private final String hostRegex; |
| |
| private final String pathRegex; |
| private final boolean pathExactMatch; |
| private final String pathExactMatchPattern; |
| |
| private final boolean prefixPathMatch; |
| private final String prefixPathMatchPattern; |
| |
| /** |
| * Parses a Google URL pattern to Java regexes. Google URL patterns are |
| * publicly documented <a |
| * href="http://code.google.com/apis/searchappliance/documentation/50/admin/URL_patterns.html"> |
| * here </a>. |
| * |
| * @param urlPattern A Google URL pattern |
| * @throws IllegalArgumentException if the URL pattern is unsupported or can |
| * not be parsed |
| */ |
| public ParsedUrlPattern(String urlPattern) { |
| ParsedUrlPatternBuilder t = new ParsedUrlPatternBuilder(urlPattern); |
| this.urlPattern = t.urlPattern; |
| this.urlRegex = t.urlRegex; |
| this.hostPathType = t.hostPathType; |
| this.hostRegex = t.hostRegex; |
| this.pathRegex = t.pathRegex; |
| this.pathExactMatch = t.pathExactMatch; |
| this.pathExactMatchPattern = t.pathExactMatchPattern; |
| this.prefixPathMatch = t.prefixPathMatch; |
| this.prefixPathMatchPattern = t.prefixPathMatchPattern; |
| |
| } |
| |
| /** |
| * Returns a regex that matches the entire URL. A subject string matches the |
| * URL pattern iff it matches this regex. |
| * |
| * @return a regex that matches the entire URL |
| */ |
| public String getUrlRegex() { |
| return urlRegex; |
| } |
| |
| /** |
| * Returns {@code true} if the parsed pattern is a "host-path" pattern. A |
| * "host-path" pattern is a pattern that can be parsed into two regexes, a |
| * host regex and a path regex, such that a subject url matches the pattern |
| * iff the host portion matches the host regex and the path portion matches |
| * the path regex. |
| * <p> |
| * For example, the pattern {@code example.com/foo} might be parsed into two |
| * regexes, host regex: {@code example.com/$} and path regex: {@code ^/foo}. |
| */ |
| public boolean isHostPathType() { |
| return hostPathType; |
| } |
| |
| /** |
| * Returns a regex that matches the host (protocol and authority) portion of |
| * the URL. If this is a host-path regex then a subject string matches the url |
| * pattern iff the host portion matches this regex and the the path portion |
| * matches the corresponding path regex (obtained by {@link #getPathRegex()}). |
| * <p> |
| * This should be used against URLs that have been parsed using the |
| * {@link AnalyzedUrl} class. |
| * <p> |
| * Note: this should only be used if {@code isHostPathType()} is true; if not, |
| * then this method throws an {@code IllegalStateException}. |
| * |
| * @return a regex that matches the host (protocol and authority) portion of |
| * the URL |
| * @throws IllegalStateException if {@code isHostPathType()} is false |
| */ |
| public String getHostRegex() { |
| Preconditions.checkState(isHostPathType()); |
| return hostRegex; |
| } |
| |
| /** |
| * Returns a regex that matches the path (hierarchical) portion of the URL. |
| * <p> |
| * This should be used against URLs that have been parsed using the |
| * {@link AnalyzedUrl} class. |
| * <p> |
| * Note: this should only be used if {@link #isHostPathType()} is true; if |
| * not, then this method throws an {@code IllegalStateException}. |
| * |
| * @return a regex that matches the path (hierarchical) portion of the URL |
| * @throws IllegalStateException if {@code isHostPathType()} is false |
| */ |
| public String getPathRegex() { |
| Preconditions.checkState(isHostPathType()); |
| return pathRegex; |
| } |
| |
| /** |
| * Indicates whether the parsed pattern gives a prefix match pattern. If this |
| * is true, then this pattern can be obtained using |
| * {@link #getPathPrefixString()}. |
| * |
| * @return {@code true} if the parsed pattern gives an prefix match pattern. |
| */ |
| public boolean isPathPrefixMatch() { |
| return prefixPathMatch; |
| } |
| |
| /** |
| * If {@link #isPathPrefixMatch()} is true, then this returns a simple string |
| * that can be matched against the path portion of a subject string using |
| * {@link String#startsWith(String)}. |
| * <p> |
| * Note: this should only be used if {@code isPrefixPathMatch()} is true; if |
| * not, then this method throws an {@code IllegalStateException}. |
| * |
| * @return a string that matches a prefix of the path portion of the URL |
| * @throws IllegalStateException if {@code isPathPrefixMatch()} is false |
| */ |
| public String getPathPrefixString() { |
| Preconditions.checkState(isPathPrefixMatch()); |
| return prefixPathMatchPattern; |
| } |
| |
| /** |
| * Returns whether the parsed pattern gives an exact match pattern. If this is |
| * true, then this pattern can be obtained using {@link #getPathExactString()}. |
| * |
| * @return {@code true} if the parsed pattern gives an exact match pattern. |
| */ |
| public boolean isPathExactMatch() { |
| return pathExactMatch; |
| } |
| |
| /** |
| * If {@link #isPathExactMatch()} is true, then this returns a simple string |
| * that can be matched against the path portion of a subject string using |
| * {@link String#equals(Object)}. Note: this should only be used if |
| * {@code isPathExactMatch()} is true; if not, then this method throws an |
| * {@code IllegalStateException}. |
| * |
| * @return a string that matches the entire path |
| * @throws IllegalStateException if {@code isPathExactMatch()} is false |
| */ |
| public String getPathExactString() { |
| Preconditions.checkState(isPathExactMatch()); |
| return pathExactMatchPattern; |
| } |
| |
| /** |
| * Returns the original URL pattern. |
| * |
| * @return the original URL pattern. |
| */ |
| public String getUrlPattern() { |
| return urlPattern; |
| } |
| |
| // This is the master meta-regex. This is used both for parsing URL patterns |
| // and for parsing URLs |
| private static final String URL_METAPATTERN_STRING = |
| "\\A(\\^)?((?:([^/:$<]*)((?:(?::|(?::/))?\\Z)|(?:://)))?" + |
| // ___1_____2a__3_________4b__c____d____________e |
| "(?:([^/:@]*)@)?([^/:<]*)?(?::([^/<]*))?)(/|(?:</>))?(?:(.*?)(\\Z|\\$)?)?\\Z" |
| // f__5___________6_________g___7__________8__h________i__9____0 |
| ; |
| |
| // Groups: (capturing groups are numbered, non-capturing are lettered) |
| // 1 anchor (^) |
| // 2 protocol + authority (not including /) |
| // a protocol + ((nothing or : or :/ followed by end of pattern) or ::/) |
| // 3 protocol |
| // 4 protocol separator ((nothing or : or :/ followed by end of pattern) or |
| // ::/) |
| // b nothing or : or :/ followed by end of pattern |
| // c : or :/ |
| // d :/ |
| // e ::/ |
| // f userinfo + @ |
| // 5 userinfo |
| // 6 host |
| // g : + port |
| // 7 port |
| // 8 slash (after authority) (could be a slash or "</>") |
| // h </> |
| // i file + anchor |
| // 9 file |
| // 10 anchor ($) |
| |
| // This Pattern is package visible so it can be used by AnalyzedUrl |
| static final Pattern URL_METAPATTERN = Pattern.compile(URL_METAPATTERN_STRING); |
| |
| // As above, the enum is package visible so it can be used by AnalyzedUrl |
| // Note: if you change the master regex, you should change this enum to match |
| static enum MetaRegexGroup { |
| LEFT_ANCHOR(1), PROTOCOL_AUTHORITY(2), PROTOCOL(3), PROTOCOL_SEPARATOR(4), USERINFO(5), |
| HOST(6), PORT(7), SLASH_AFTER_AUTHORITY(8), FILE(9), RIGHT_ANCHOR(10); |
| private int n; |
| |
| MetaRegexGroup(int n) { |
| this.n = n; |
| } |
| |
| int intValue() { |
| return n; |
| } |
| } |
| |
| // This static helper is also shared with the AnalyzedUrl |
| static String getGroup(Matcher m, MetaRegexGroup g) { |
| String s = m.group(g.intValue()); |
| return (s == null) ? "" : s; |
| } |
| |
| private static class ParsedUrlPatternBuilder { |
| |
| public String urlPattern; |
| public String urlRegex; |
| |
| public boolean hostPathType; |
| public String hostRegex; |
| |
| public String pathRegex; |
| public boolean pathExactMatch; |
| public String pathExactMatchPattern; |
| |
| public boolean prefixPathMatch; |
| public String prefixPathMatchPattern; |
| |
| ParsedUrlPatternBuilder(String urlPattern) { |
| checkPatternValidity(urlPattern); |
| this.urlPattern = urlPattern; |
| analyze(); |
| } |
| |
| private void analyze() { |
| if (urlPattern.startsWith(CONTAINS_PATTERNS_METAPATTERN_PREFIX)) { |
| urlRegex = |
| Pattern.quote(urlPattern.substring(CONTAINS_PATTERNS_METAPATTERN_PREFIX.length())); |
| initNonHostPathPattern(); |
| return; |
| } |
| |
| if (urlPattern.startsWith(REGEXP_PATTERNS_METAPATTERN_PREFIX)) { |
| urlRegex = urlPattern.substring(REGEXP_PATTERNS_METAPATTERN_PREFIX.length()); |
| initNonHostPathPattern(); |
| return; |
| } |
| |
| if (urlPattern.startsWith(REGEXPCASE_PATTERNS_METAPATTERN_PREFIX)) { |
| urlRegex = urlPattern.substring(REGEXPCASE_PATTERNS_METAPATTERN_PREFIX.length()); |
| initNonHostPathPattern(); |
| return; |
| } |
| |
| if (urlPattern.startsWith(REGEXPIGNORECASE_PATTERNS_METAPATTERN_PREFIX)) { |
| urlRegex = |
| "(?i:" + urlPattern.substring(REGEXPIGNORECASE_PATTERNS_METAPATTERN_PREFIX.length()) |
| + ")"; |
| initNonHostPathPattern(); |
| return; |
| } |
| |
| initHostPathPattern(); |
| |
| if (isNullOrEmpty(urlPattern)) { |
| prefixPathMatch = true; |
| return; |
| } |
| if (testForAndHandleNoSlashSuffixPattern()) { |
| return; |
| } |
| Matcher m = URL_METAPATTERN.matcher(urlPattern); |
| Preconditions.checkArgument(m.find(), "problem parsing urlpattern: " + urlPattern); |
| urlRegex = buildUrlRegex(m); |
| pathRegex = buildPathRegex(m); |
| hostRegex = buildHostRegex(m); |
| } |
| |
| private void initNonHostPathPattern() { |
| hostPathType = false; |
| pathRegex = null; |
| hostRegex = null; |
| pathExactMatch = false; |
| pathExactMatchPattern = null; |
| prefixPathMatch = false; |
| prefixPathMatchPattern = null; |
| } |
| |
| private void initHostPathPattern() { |
| hostPathType = true; |
| urlRegex = ""; |
| pathRegex = ""; |
| hostRegex = ""; |
| pathExactMatch = false; |
| pathExactMatchPattern = null; |
| prefixPathMatch = false; |
| prefixPathMatchPattern = "/"; |
| } |
| |
| // A suffix pattern (ends in $) that has no slash just doesn't parse well |
| // with |
| // the metapattern. So we use a special pattern for this case. |
| private boolean testForAndHandleNoSlashSuffixPattern() { |
| Matcher m = NO_SLASH_SUFFIX_PATTERN.matcher(urlPattern); |
| if (!m.find()) { |
| return false; |
| } |
| urlRegex = Pattern.quote(m.group(1)) + OUTPUT_RIGHT_ANCHOR_PATTERN_STRING; |
| pathRegex = urlRegex; |
| hostRegex = ""; |
| pathExactMatch = false; |
| pathExactMatchPattern = null; |
| prefixPathMatch = false; |
| prefixPathMatchPattern = null; |
| return true; |
| } |
| |
| // suffix patterns that contain no slash jam up my master meta-regex: the |
| // string before the $ gets put in the wrong capturing group. I fought with |
| // it |
| // a while but then bailed and just made a special meta-regex for them |
| private static final String NO_SLASH_SUFFIX_PATTERN_STRING = "\\A([^/]*)\\$\\Z"; |
| private static final Pattern NO_SLASH_SUFFIX_PATTERN = |
| Pattern.compile(NO_SLASH_SUFFIX_PATTERN_STRING); |
| |
| private static final String CONTAINS_PATTERNS_METAPATTERN_PREFIX = "contains:"; |
| |
| private static final String REGEXP_PATTERNS_METAPATTERN_PREFIX = "regexp:"; |
| |
| private static final String REGEXPCASE_PATTERNS_METAPATTERN_PREFIX = "regexpCase:"; |
| |
| private static final String REGEXPIGNORECASE_PATTERNS_METAPATTERN_PREFIX = "regexpIgnoreCase:"; |
| |
| private static final String UNSUPPORTED_PATTERNS_METAPATTERN_STRING = "\\A(?:(www\\?:)|(-))"; |
| private static final Pattern UNSUPPORTED_PATTERNS_METAPATTERN = |
| Pattern.compile(UNSUPPORTED_PATTERNS_METAPATTERN_STRING); |
| |
| private static final String OUTPUT_RIGHT_ANCHOR_PATTERN_STRING = "\\Z"; |
| private static final String OUTPUT_LEFT_ANCHOR_PATTERN_STRING = "\\A"; |
| |
| private static final String OUTPUT_SLASH = "/"; |
| private static final String OUTPUT_ANY_OR_NO_PORT_PATTERN = "(\\:[^/]*)?"; |
| private static final String OUTPUT_ANY_PORT_PATTERN = "\\:[^/]*"; |
| |
| private static boolean isNullOrEmpty(String s) { |
| return (s == null || s.length() < 1); |
| } |
| |
| // These helper functions whose names match buildSOMETHINGPattern build a |
| // regex to match the SOMETHING in their names. They should be usable, |
| // appropriately quoted regexes |
| private static String buildProtocolUserinfoHostPattern(Matcher m) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append(getGroup(m, MetaRegexGroup.PROTOCOL)); |
| sb.append(getGroup(m, MetaRegexGroup.PROTOCOL_SEPARATOR)); |
| String userInfo = getGroup(m, MetaRegexGroup.USERINFO); |
| if (!isNullOrEmpty(userInfo)) { |
| sb.append(userInfo); |
| sb.append("@"); |
| } |
| sb.append(getGroup(m, MetaRegexGroup.HOST)); |
| String unquotedPattern = sb.toString(); |
| return isNullOrEmpty(unquotedPattern) ? "" : Pattern.quote(unquotedPattern); |
| } |
| |
| // port is tricky because the absence of a port in a pattern should match |
| // any |
| // specific port in a target |
| private static String buildPortPattern(Matcher m) { |
| StringBuilder sb = new StringBuilder(); |
| String port = getGroup(m, MetaRegexGroup.PORT); |
| if (isNullOrEmpty(port)) { |
| // port was empty - match any port - default or explicit |
| sb.append(OUTPUT_ANY_OR_NO_PORT_PATTERN); |
| } else { |
| if (port.equals("*")) { |
| // port was explicitly "*" - match any explicitly specified port |
| sb.append(OUTPUT_ANY_PORT_PATTERN); |
| } else { |
| // port was explicit and not "*" - match only that port |
| sb.append("\\:"); |
| sb.append(Pattern.quote(port)); |
| } |
| } |
| return sb.toString(); |
| } |
| |
| private static String buildUnquotedFilePattern(Matcher m) { |
| return getGroup(m, MetaRegexGroup.FILE); |
| } |
| |
| private static String buildQuotedFilePattern(Matcher m) { |
| String unquotedPattern = buildUnquotedFilePattern(m); |
| return isNullOrEmpty(unquotedPattern) ? "" : Pattern.quote(unquotedPattern); |
| } |
| |
| // the helper functions whose names match buildSOMETHINGRegex each build one |
| // of the three public regexes: the urlRegex, the protocolAuthorityRegex and |
| // the fileRegex. |
| |
| // The main reason that the urlRegex is not simply the concatenation of the |
| // protocolAuthorityRegex and the fileRegex is the anchors. Both for |
| // correctness and efficiency, we want to use anchors only where |
| // appropriate: |
| // using ^A.*foo is considerably slower than just using foo. |
| private String buildUrlRegex(Matcher m) { |
| StringBuilder sb = new StringBuilder(); |
| String leftAnchor = getGroup(m, MetaRegexGroup.LEFT_ANCHOR); |
| String protocolUserinfoHostPattern = buildProtocolUserinfoHostPattern(m); |
| String portPattern = buildPortPattern(m); |
| String slashAfterAuthority = getGroup(m, MetaRegexGroup.SLASH_AFTER_AUTHORITY); |
| String filePattern = buildQuotedFilePattern(m); |
| String rightAnchor = getGroup(m, MetaRegexGroup.RIGHT_ANCHOR); |
| // prefix patterns need to be handled specially |
| if (!isNullOrEmpty(leftAnchor)) { |
| sb.append(OUTPUT_LEFT_ANCHOR_PATTERN_STRING); |
| } |
| if (!isNullOrEmpty(protocolUserinfoHostPattern)) { |
| sb.append(protocolUserinfoHostPattern); |
| } |
| if (!isNullOrEmpty(portPattern)) { |
| if (sb.length() > 0) { |
| sb.append(portPattern); |
| } |
| } |
| if (!isNullOrEmpty(slashAfterAuthority)) { |
| if ("</>".equals(slashAfterAuthority)) { |
| if (sb.length() < 1) { |
| sb.append(OUTPUT_LEFT_ANCHOR_PATTERN_STRING); |
| sb.append("[^/]*//[^/]*"); |
| } |
| } |
| sb.append(OUTPUT_SLASH); |
| } |
| if (!isNullOrEmpty(filePattern)) { |
| sb.append(filePattern); |
| } |
| if (!isNullOrEmpty(rightAnchor)) { |
| sb.append(rightAnchor); |
| } |
| return sb.toString(); |
| } |
| |
| private String buildHostRegex(Matcher m) { |
| StringBuilder sb = new StringBuilder(); |
| String leftAnchor = getGroup(m, MetaRegexGroup.LEFT_ANCHOR); |
| String protocolUserinfoHostPattern = buildProtocolUserinfoHostPattern(m); |
| String portPattern = buildPortPattern(m); |
| String slashAfterAuthority = getGroup(m, MetaRegexGroup.SLASH_AFTER_AUTHORITY); |
| // prefix patterns need to be handled specially |
| if (!isNullOrEmpty(leftAnchor)) { |
| sb.append(OUTPUT_LEFT_ANCHOR_PATTERN_STRING); |
| } |
| if (!isNullOrEmpty(protocolUserinfoHostPattern)) { |
| sb.append(protocolUserinfoHostPattern); |
| } |
| if (!isNullOrEmpty(portPattern)) { |
| sb.append(portPattern); |
| } |
| if (!isNullOrEmpty(slashAfterAuthority)) { |
| sb.append(OUTPUT_SLASH); |
| } |
| return sb.toString(); |
| } |
| |
| // We expect that, in practice, the fileRegex will be used much more often |
| // than the protocolAuthority regex (there will probably be a hashtable for |
| // the protocol-authority portion), so we really want to makes sure that the |
| // fileRegexes are simple prefix matches, as often as possible. |
| private String buildPathRegex(Matcher m) { |
| boolean hasLeftAnchor = false; |
| boolean hasRightAnchor = false; |
| StringBuilder sb = new StringBuilder(); |
| String protocolAuthority = getGroup(m, MetaRegexGroup.PROTOCOL_AUTHORITY); |
| String slashAfterAuthority = getGroup(m, MetaRegexGroup.SLASH_AFTER_AUTHORITY); |
| String unquotedFilePattern = buildUnquotedFilePattern(m); |
| String rightAnchor = getGroup(m, MetaRegexGroup.RIGHT_ANCHOR); |
| // two conditions for this being an prefix pattern: |
| // either there was a protocolAuthority OR there was a </> |
| // slashAfterAuthority |
| if (!isNullOrEmpty(protocolAuthority) || "</>".equals(slashAfterAuthority)) { |
| hasLeftAnchor = true; |
| sb.append(OUTPUT_LEFT_ANCHOR_PATTERN_STRING); |
| } |
| if (!isNullOrEmpty(slashAfterAuthority)) { |
| sb.append(OUTPUT_SLASH); |
| } |
| sb.append(Pattern.quote(unquotedFilePattern)); |
| if (!isNullOrEmpty(rightAnchor)) { |
| hasRightAnchor = true; |
| sb.append(OUTPUT_RIGHT_ANCHOR_PATTERN_STRING); |
| } |
| if (hasLeftAnchor) { |
| if (hasRightAnchor) { |
| this.pathExactMatch = true; |
| this.pathExactMatchPattern = "/" + unquotedFilePattern; |
| this.prefixPathMatch = false; |
| this.prefixPathMatchPattern = null; |
| } else { |
| this.pathExactMatch = false; |
| this.pathExactMatchPattern = null; |
| this.prefixPathMatch = true; |
| this.prefixPathMatchPattern = "/" + unquotedFilePattern; |
| } |
| } |
| return sb.toString(); |
| } |
| |
| private static void checkPatternValidity(String s) { |
| Preconditions.checkNotNull(s); |
| Matcher m = UNSUPPORTED_PATTERNS_METAPATTERN.matcher(s); |
| Preconditions.checkArgument(!m.find(), "unsupported urlpattern: " + s); |
| } |
| } |
| } |