python/re2_test.py - re2 - Git at Google

 # Copyright 2019 The RE2 Authors.  All Rights Reserved.
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
 """Tests for google3.third_party.re2.python.re2."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections
 import pickle
 import re
 import six

 from absl.testing import absltest
 from absl.testing import parameterized
 import re2


 class OptionsTest(parameterized.TestCase):

   @parameterized.parameters(*re2.Options.NAMES)
   def test_option(self, name):
     options = re2.Options()
     value = getattr(options, name)
     if isinstance(value, re2.Options.Encoding):
       value = next(v for v in type(value).__members__.values() if v != value)
     elif isinstance(value, bool):
       value = not value
     elif isinstance(value, int):
       value = value + 1
     else:
       raise TypeError('option {!r}: {!r} {!r}'.format(name, type(value), value))
     setattr(options, name, value)
     self.assertEqual(value, getattr(options, name))


 class Re2CompileTest(parameterized.TestCase):
   """Contains tests that apply to the re2 module only.

   We disagree with Python on the string types of group names,
   so there is no point attempting to verify consistency.
   """

   @parameterized.parameters(
       (u'(foo*)(?P<bar>qux+)', 2, [(u'bar', 2)]),
       (b'(foo*)(?P<bar>qux+)', 2, [(b'bar', 2)]),
   )
   def test_compile(self, pattern, expected_groups, expected_groupindex):
     regexp = re2.compile(pattern)
     if re2._HAVE_LRU_CACHE:
       self.assertIs(regexp, re2.compile(pattern))  # cached
     self.assertIsNotNone(regexp.options)
     self.assertEqual(expected_groups, regexp.groups)
     self.assertDictEqual(dict(expected_groupindex), regexp.groupindex)

   def test_compile_with_options(self):
     options = re2.Options()
     options.max_mem = 100
     with self.assertRaisesRegex(re2.error, 'pattern too large'):
       re2.compile('.{1000}', options=options)

   def test_programsize_reverseprogramsize(self):
     regexp = re2.compile('a+b')
     self.assertEqual(7, regexp.programsize)
     self.assertEqual(7, regexp.reverseprogramsize)

   def test_programfanout_reverseprogramfanout(self):
     regexp = re2.compile('a+b')
     self.assertListEqual([1, 1], regexp.programfanout)
     self.assertListEqual([3], regexp.reverseprogramfanout)


 Params = collections.namedtuple(
     'Params', ('pattern', 'text', 'spans', 'search', 'match', 'fullmatch'))

 PARAMS = [
     Params(u'\\d+', u'Hello, world.', None, False, False, False),
     Params(b'\\d+', b'Hello, world.', None, False, False, False),
     Params(u'\\s+', u'Hello, world.', [(6, 7)], True, False, False),
     Params(b'\\s+', b'Hello, world.', [(6, 7)], True, False, False),
     Params(u'\\w+', u'Hello, world.', [(0, 5)], True, True, False),
     Params(b'\\w+', b'Hello, world.', [(0, 5)], True, True, False),
     Params(u'(\\d+)?', u'Hello, world.', [(0, 0), (-1, -1)], True, True, False),
     Params(b'(\\d+)?', b'Hello, world.', [(0, 0), (-1, -1)], True, True, False),
     Params(u'youtube(_device|_md|_gaia|_multiday|_multiday_gaia)?',
            u'youtube_ads', [(0, 7), (-1, -1)], True, True, False),
     Params(b'youtube(_device|_md|_gaia|_multiday|_multiday_gaia)?',
            b'youtube_ads', [(0, 7), (-1, -1)], True, True, False),
 ]


 def upper(match):
   return match.group().upper()


 class ReRegexpTest(parameterized.TestCase):
   """Contains tests that apply to the re and re2 modules."""

   MODULE = re

   @parameterized.parameters((p.pattern,) for p in PARAMS)
   def test_pickle(self, pattern):
     regexp = self.MODULE.compile(pattern)
     rick = pickle.loads(pickle.dumps(regexp))
     self.assertEqual(regexp.pattern, rick.pattern)

   @parameterized.parameters(
       (p.pattern, p.text, (p.spans if p.search else None)) for p in PARAMS)
   def test_search(self, pattern, text, expected_spans):
     match = self.MODULE.search(pattern, text)
     if expected_spans is None:
       self.assertIsNone(match)
     else:
       spans = [match.span(group) for group in range(match.re.groups + 1)]
       self.assertListEqual(expected_spans, spans)

   def test_search_with_pos_and_endpos(self):
     regexp = self.MODULE.compile(u'.+')  # empty string NOT allowed
     text = u'I \u2665 RE2!'
     # Note that len(text) is the position of the empty string at the end of
     # text, so range() stops at len(text) + 1 in order to include len(text).
     for pos in range(len(text) + 1):
       for endpos in range(pos, len(text) + 1):
         match = regexp.search(text, pos=pos, endpos=endpos)
         if pos == endpos:
           self.assertIsNone(match)
         else:
           self.assertEqual(pos, match.pos)
           self.assertEqual(endpos, match.endpos)
           self.assertEqual(pos, match.start())
           self.assertEqual(endpos, match.end())
           self.assertTupleEqual((pos, endpos), match.span())

   def test_search_with_bogus_pos_and_endpos(self):
     regexp = self.MODULE.compile(u'.*')  # empty string allowed
     text = u'I \u2665 RE2!'

     match = regexp.search(text, pos=-100)
     self.assertEqual(0, match.pos)
     match = regexp.search(text, pos=100)
     self.assertEqual(8, match.pos)

     match = regexp.search(text, endpos=-100)
     self.assertEqual(0, match.endpos)
     match = regexp.search(text, endpos=100)
     self.assertEqual(8, match.endpos)

     match = regexp.search(text, pos=100, endpos=-100)
     self.assertIsNone(match)

   @parameterized.parameters(
       (p.pattern, p.text, (p.spans if p.match else None)) for p in PARAMS)
   def test_match(self, pattern, text, expected_spans):
     match = self.MODULE.match(pattern, text)
     if expected_spans is None:
       self.assertIsNone(match)
     else:
       spans = [match.span(group) for group in range(match.re.groups + 1)]
       self.assertListEqual(expected_spans, spans)

   @parameterized.parameters(
       (p.pattern, p.text, (p.spans if p.fullmatch else None)) for p in PARAMS)
   def test_fullmatch(self, pattern, text, expected_spans):
     # NOT supported by the Python 2 re module!
     if not (six.PY2 and self.MODULE is re):
       match = self.MODULE.fullmatch(pattern, text)
       if expected_spans is None:
         self.assertIsNone(match)
       else:
         spans = [match.span(group) for group in range(match.re.groups + 1)]
         self.assertListEqual(expected_spans, spans)

   @parameterized.parameters(
       (u'', u'', [(0, 0)]),
       (b'', b'', [(0, 0)]),
       (u'', u'x', [(0, 0), (1, 1)]),
       (b'', b'x', [(0, 0), (1, 1)]),
       (u'', u'xy', [(0, 0), (1, 1), (2, 2)]),
       (b'', b'xy', [(0, 0), (1, 1), (2, 2)]),
       (u'.', u'xy', [(0, 1), (1, 2)]),
       (b'.', b'xy', [(0, 1), (1, 2)]),
       (u'x', u'xy', [(0, 1)]),
       (b'x', b'xy', [(0, 1)]),
       (u'y', u'xy', [(1, 2)]),
       (b'y', b'xy', [(1, 2)]),
       (u'z', u'xy', []),
       (b'z', b'xy', []),
       (u'\\w*', u'Hello, world.', [(0, 5), (5, 5), (6, 6), (7, 12), (12, 12),
                                    (13, 13)]),
       (b'\\w*', b'Hello, world.', [(0, 5), (5, 5), (6, 6), (7, 12), (12, 12),
                                    (13, 13)]),
   )
   def test_finditer(self, pattern, text, expected_matches):
     matches = [match.span() for match in self.MODULE.finditer(pattern, text)]
     self.assertListEqual(expected_matches, matches)

   @parameterized.parameters(
       (u'\\w\\w+', u'Hello, world.', [u'Hello', u'world']),
       (b'\\w\\w+', b'Hello, world.', [b'Hello', b'world']),
       (u'(\\w)\\w+', u'Hello, world.', [u'H', u'w']),
       (b'(\\w)\\w+', b'Hello, world.', [b'H', b'w']),
       (u'(\\w)(\\w+)', u'Hello, world.', [(u'H', u'ello'), (u'w', u'orld')]),
       (b'(\\w)(\\w+)', b'Hello, world.', [(b'H', b'ello'), (b'w', b'orld')]),
       (u'(\\w)(\\w+)?', u'Hello, w.', [(u'H', u'ello'), (u'w', u'')]),
       (b'(\\w)(\\w+)?', b'Hello, w.', [(b'H', b'ello'), (b'w', b'')]),
   )
   def test_findall(self, pattern, text, expected_matches):
     matches = self.MODULE.findall(pattern, text)
     self.assertListEqual(expected_matches, matches)

   @parameterized.parameters(
       (u'\\W+', u'Hello, world.', -1, [u'Hello, world.']),
       (b'\\W+', b'Hello, world.', -1, [b'Hello, world.']),
       (u'\\W+', u'Hello, world.', 0, [u'Hello', u'world', u'']),
       (b'\\W+', b'Hello, world.', 0, [b'Hello', b'world', b'']),
       (u'\\W+', u'Hello, world.', 1, [u'Hello', u'world.']),
       (b'\\W+', b'Hello, world.', 1, [b'Hello', b'world.']),
       (u'(\\W+)', u'Hello, world.', -1, [u'Hello, world.']),
       (b'(\\W+)', b'Hello, world.', -1, [b'Hello, world.']),
       (u'(\\W+)', u'Hello, world.', 0, [u'Hello', u', ', u'world', u'.', u'']),
       (b'(\\W+)', b'Hello, world.', 0, [b'Hello', b', ', b'world', b'.', b'']),
       (u'(\\W+)', u'Hello, world.', 1, [u'Hello', u', ', u'world.']),
       (b'(\\W+)', b'Hello, world.', 1, [b'Hello', b', ', b'world.']),
   )
   def test_split(self, pattern, text, maxsplit, expected_pieces):
     pieces = self.MODULE.split(pattern, text, maxsplit)
     self.assertListEqual(expected_pieces, pieces)

   @parameterized.parameters(
       (u'\\w+', upper, u'Hello, world.', -1, u'Hello, world.', 0),
       (b'\\w+', upper, b'Hello, world.', -1, b'Hello, world.', 0),
       (u'\\w+', upper, u'Hello, world.', 0, u'HELLO, WORLD.', 2),
       (b'\\w+', upper, b'Hello, world.', 0, b'HELLO, WORLD.', 2),
       (u'\\w+', upper, u'Hello, world.', 1, u'HELLO, world.', 1),
       (b'\\w+', upper, b'Hello, world.', 1, b'HELLO, world.', 1),
       (u'\\w+', u'MEEP', u'Hello, world.', -1, u'Hello, world.', 0),
       (b'\\w+', b'MEEP', b'Hello, world.', -1, b'Hello, world.', 0),
       (u'\\w+', u'MEEP', u'Hello, world.', 0, u'MEEP, MEEP.', 2),
       (b'\\w+', b'MEEP', b'Hello, world.', 0, b'MEEP, MEEP.', 2),
       (u'\\w+', u'MEEP', u'Hello, world.', 1, u'MEEP, world.', 1),
       (b'\\w+', b'MEEP', b'Hello, world.', 1, b'MEEP, world.', 1),
       (u'\\\\', u'\\\\\\\\', u'Hello,\\world.', 0, u'Hello,\\\\world.', 1),
       (b'\\\\', b'\\\\\\\\', b'Hello,\\world.', 0, b'Hello,\\\\world.', 1),
   )
   def test_subn_sub(self, pattern, repl, text, count, expected_joined_pieces,
                     expected_numsplit):
     joined_pieces, numsplit = self.MODULE.subn(pattern, repl, text, count)
     self.assertEqual(expected_joined_pieces, joined_pieces)
     self.assertEqual(expected_numsplit, numsplit)

     joined_pieces = self.MODULE.sub(pattern, repl, text, count)
     self.assertEqual(expected_joined_pieces, joined_pieces)


 class Re2RegexpTest(ReRegexpTest):
   """Contains tests that apply to the re2 module only."""

   MODULE = re2

   def test_compile_with_latin1_encoding(self):
     options = re2.Options()
     options.encoding = re2.Options.Encoding.LATIN1
     with self.assertRaisesRegex(re2.error,
                                 ('string type of pattern is Text .*, but '
                                  'encoding specified in options is LATIN1')):
       re2.compile(u'.?', options=options)

     # ... whereas this is fine, of course.
     re2.compile(b'.?', options=options)

   @parameterized.parameters(
       (u'\\p{Lo}', u'\u0ca0_\u0ca0', [(0, 1), (2, 3)]),
       (b'\\p{Lo}', b'\xe0\xb2\xa0_\xe0\xb2\xa0', [(0, 3), (4, 7)]),
   )
   def test_finditer_with_utf8(self, pattern, text, expected_matches):
     matches = [match.span() for match in self.MODULE.finditer(pattern, text)]
     self.assertListEqual(expected_matches, matches)

   def test_purge(self):
     if re2._HAVE_LRU_CACHE:
       re2.compile('Goodbye, world.')
       self.assertGreater(re2._Regexp._make.cache_info().currsize, 0)
       re2.purge()
       self.assertEqual(re2._Regexp._make.cache_info().currsize, 0)


 class Re2EscapeTest(parameterized.TestCase):
   """Contains tests that apply to the re2 module only.

   We disagree with Python on the escaping of some characters,
   so there is no point attempting to verify consistency.
   """

   @parameterized.parameters(
       (u'a*b+c?', u'a\\*b\\+c\\?'),
       (b'a*b+c?', b'a\\*b\\+c\\?'),
   )
   def test_escape(self, pattern, expected_escaped):
     escaped = re2.escape(pattern)
     self.assertEqual(expected_escaped, escaped)


 class ReMatchTest(parameterized.TestCase):
   """Contains tests that apply to the re and re2 modules."""

   MODULE = re

   def test_expand(self):
     pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
     text = u'I \u2665 RE2!\n'
     match = self.MODULE.search(pattern, text)

     self.assertEqual(u'\u2665\n!', match.expand(u'\\1\\n\\2'))
     self.assertEqual(u'\u2665\n!', match.expand(u'\\g<1>\\n\\g<2>'))
     self.assertEqual(u'\u2665\n!', match.expand(u'\\g<S>\\n\\g<P>'))
     self.assertEqual(u'\\1\\2\n\u2665!', match.expand(u'\\\\1\\\\2\\n\\1\\2'))

   def test_expand_with_octal(self):
     pattern = u'()()()()()()()()()(\\w+)'
     text = u'Hello, world.'
     match = self.MODULE.search(pattern, text)

     self.assertEqual(u'Hello\n', match.expand(u'\\g<0>\\n'))
     self.assertEqual(u'Hello\n', match.expand(u'\\g<10>\\n'))

     self.assertEqual(u'\x00\n', match.expand(u'\\0\\n'))
     self.assertEqual(u'\x00\n', match.expand(u'\\00\\n'))
     self.assertEqual(u'\x00\n', match.expand(u'\\000\\n'))
     self.assertEqual(u'\x000\n', match.expand(u'\\0000\\n'))

     self.assertEqual(u'\n', match.expand(u'\\1\\n'))
     self.assertEqual(u'Hello\n', match.expand(u'\\10\\n'))
     self.assertEqual(u'@\n', match.expand(u'\\100\\n'))
     self.assertEqual(u'@0\n', match.expand(u'\\1000\\n'))

   def test_getitem_group_groups_groupdict(self):
     pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
     text = u'Hello, world.\nI \u2665 RE2!\nGoodbye, world.\n'
     match = self.MODULE.search(pattern, text)

     # NOT supported by the Python 2 re module!
     if not (six.PY2 and self.MODULE is re):
       self.assertEqual(u'\u2665 RE2!', match[0])
       self.assertEqual(u'\u2665', match[1])
       self.assertEqual(u'!', match[2])
       self.assertEqual(u'\u2665', match[u'S'])
       self.assertEqual(u'!', match[u'P'])

     self.assertEqual(u'\u2665 RE2!', match.group())
     self.assertEqual(u'\u2665 RE2!', match.group(0))
     self.assertEqual(u'\u2665', match.group(1))
     self.assertEqual(u'!', match.group(2))
     self.assertEqual(u'\u2665', match.group(u'S'))
     self.assertEqual(u'!', match.group(u'P'))

     self.assertTupleEqual((u'\u2665', u'!'), match.group(1, 2))
     self.assertTupleEqual((u'\u2665', u'!'), match.group(u'S', u'P'))
     self.assertTupleEqual((u'\u2665', u'!'), match.groups())
     self.assertDictEqual({u'S': u'\u2665', u'P': u'!'}, match.groupdict())

   def test_bogus_group_start_end_and_span(self):
     pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
     text = u'I \u2665 RE2!\n'
     match = self.MODULE.search(pattern, text)

     self.assertRaises(IndexError, match.group, -1)
     self.assertRaises(IndexError, match.group, 3)
     self.assertRaises(IndexError, match.group, 'X')

     self.assertRaises(IndexError, match.start, -1)
     self.assertRaises(IndexError, match.start, 3)

     self.assertRaises(IndexError, match.end, -1)
     self.assertRaises(IndexError, match.end, 3)

     self.assertRaises(IndexError, match.span, -1)
     self.assertRaises(IndexError, match.span, 3)

   @parameterized.parameters(
       (u'((a)(b))((c)(d))', u'foo bar qux', None, None),
       (u'(?P<one>(a)(b))((c)(d))', u'foo abcd qux', 4, None),
       (u'(?P<one>(a)(b))(?P<four>(c)(d))', u'foo abcd qux', 4, 'four'),
   )
   def test_lastindex_lastgroup(self, pattern, text, expected_lastindex,
                                expected_lastgroup):
     match = self.MODULE.search(pattern, text)
     if expected_lastindex is None:
       self.assertIsNone(match)
     else:
       self.assertEqual(expected_lastindex, match.lastindex)
       self.assertEqual(expected_lastgroup, match.lastgroup)


 class Re2MatchTest(ReMatchTest):
   """Contains tests that apply to the re2 module only."""

   MODULE = re2


 class SetTest(absltest.TestCase):

   def test_search(self):
     s = re2.Set.SearchSet()
     self.assertEqual(0, s.Add('\\d+'))
     self.assertEqual(1, s.Add('\\s+'))
     self.assertEqual(2, s.Add('\\w+'))
     self.assertRaises(re2.error, s.Add, '(MEEP')
     s.Compile()
     self.assertItemsEqual([1, 2], s.Match('Hello, world.'))

   def test_match(self):
     s = re2.Set.MatchSet()
     self.assertEqual(0, s.Add('\\d+'))
     self.assertEqual(1, s.Add('\\s+'))
     self.assertEqual(2, s.Add('\\w+'))
     self.assertRaises(re2.error, s.Add, '(MEEP')
     s.Compile()
     self.assertItemsEqual([2], s.Match('Hello, world.'))

   def test_fullmatch(self):
     s = re2.Set.FullMatchSet()
     self.assertEqual(0, s.Add('\\d+'))
     self.assertEqual(1, s.Add('\\s+'))
     self.assertEqual(2, s.Add('\\w+'))
     self.assertRaises(re2.error, s.Add, '(MEEP')
     s.Compile()
     self.assertIsNone(s.Match('Hello, world.'))


 class FilterTest(absltest.TestCase):

   def test_match(self):
     f = re2.Filter()
     self.assertEqual(0, f.Add('Hello, \\w+\\.'))
     self.assertEqual(1, f.Add('\\w+, world\\.'))
     self.assertEqual(2, f.Add('Goodbye, \\w+\\.'))
     self.assertRaises(re2.error, f.Add, '(MEEP')
     f.Compile()
     self.assertItemsEqual([0, 1], f.Match('Hello, world.', potential=True))
     self.assertItemsEqual([0, 1], f.Match('HELLO, WORLD.', potential=True))
     self.assertItemsEqual([0, 1], f.Match('Hello, world.'))
     self.assertIsNone(f.Match('HELLO, WORLD.'))


 if __name__ == '__main__':
   absltest.main()
	# Copyright 2019 The RE2 Authors. All Rights Reserved.
	# Use of this source code is governed by a BSD-style
	# license that can be found in the LICENSE file.
	"""Tests for google3.third_party.re2.python.re2."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import collections
	import pickle
	import re
	import six

	from absl.testing import absltest
	from absl.testing import parameterized
	import re2


	class OptionsTest(parameterized.TestCase):

	@parameterized.parameters(*re2.Options.NAMES)
	def test_option(self, name):
	options = re2.Options()
	value = getattr(options, name)
	if isinstance(value, re2.Options.Encoding):
	value = next(v for v in type(value).__members__.values() if v != value)
	elif isinstance(value, bool):
	value = not value
	elif isinstance(value, int):
	value = value + 1
	else:
	raise TypeError('option {!r}: {!r} {!r}'.format(name, type(value), value))
	setattr(options, name, value)
	self.assertEqual(value, getattr(options, name))


	class Re2CompileTest(parameterized.TestCase):
	"""Contains tests that apply to the re2 module only.

	We disagree with Python on the string types of group names,
	so there is no point attempting to verify consistency.
	"""

	@parameterized.parameters(
	(u'(foo*)(?P<bar>qux+)', 2, [(u'bar', 2)]),
	(b'(foo*)(?P<bar>qux+)', 2, [(b'bar', 2)]),
	)
	def test_compile(self, pattern, expected_groups, expected_groupindex):
	regexp = re2.compile(pattern)
	if re2._HAVE_LRU_CACHE:
	self.assertIs(regexp, re2.compile(pattern)) # cached
	self.assertIsNotNone(regexp.options)
	self.assertEqual(expected_groups, regexp.groups)
	self.assertDictEqual(dict(expected_groupindex), regexp.groupindex)

	def test_compile_with_options(self):
	options = re2.Options()
	options.max_mem = 100
	with self.assertRaisesRegex(re2.error, 'pattern too large'):
	re2.compile('.{1000}', options=options)

	def test_programsize_reverseprogramsize(self):
	regexp = re2.compile('a+b')
	self.assertEqual(7, regexp.programsize)
	self.assertEqual(7, regexp.reverseprogramsize)

	def test_programfanout_reverseprogramfanout(self):
	regexp = re2.compile('a+b')
	self.assertListEqual([1, 1], regexp.programfanout)
	self.assertListEqual([3], regexp.reverseprogramfanout)


	Params = collections.namedtuple(
	'Params', ('pattern', 'text', 'spans', 'search', 'match', 'fullmatch'))

	PARAMS = [
	Params(u'\\d+', u'Hello, world.', None, False, False, False),
	Params(b'\\d+', b'Hello, world.', None, False, False, False),
	Params(u'\\s+', u'Hello, world.', [(6, 7)], True, False, False),
	Params(b'\\s+', b'Hello, world.', [(6, 7)], True, False, False),
	Params(u'\\w+', u'Hello, world.', [(0, 5)], True, True, False),
	Params(b'\\w+', b'Hello, world.', [(0, 5)], True, True, False),
	Params(u'(\\d+)?', u'Hello, world.', [(0, 0), (-1, -1)], True, True, False),
	Params(b'(\\d+)?', b'Hello, world.', [(0, 0), (-1, -1)], True, True, False),
	Params(u'youtube(_device\|_md\|_gaia\|_multiday\|_multiday_gaia)?',
	u'youtube_ads', [(0, 7), (-1, -1)], True, True, False),
	Params(b'youtube(_device\|_md\|_gaia\|_multiday\|_multiday_gaia)?',
	b'youtube_ads', [(0, 7), (-1, -1)], True, True, False),
	]


	def upper(match):
	return match.group().upper()


	class ReRegexpTest(parameterized.TestCase):
	"""Contains tests that apply to the re and re2 modules."""

	MODULE = re

	@parameterized.parameters((p.pattern,) for p in PARAMS)
	def test_pickle(self, pattern):
	regexp = self.MODULE.compile(pattern)
	rick = pickle.loads(pickle.dumps(regexp))
	self.assertEqual(regexp.pattern, rick.pattern)

	@parameterized.parameters(
	(p.pattern, p.text, (p.spans if p.search else None)) for p in PARAMS)
	def test_search(self, pattern, text, expected_spans):
	match = self.MODULE.search(pattern, text)
	if expected_spans is None:
	self.assertIsNone(match)
	else:
	spans = [match.span(group) for group in range(match.re.groups + 1)]
	self.assertListEqual(expected_spans, spans)

	def test_search_with_pos_and_endpos(self):
	regexp = self.MODULE.compile(u'.+') # empty string NOT allowed
	text = u'I \u2665 RE2!'
	# Note that len(text) is the position of the empty string at the end of
	# text, so range() stops at len(text) + 1 in order to include len(text).
	for pos in range(len(text) + 1):
	for endpos in range(pos, len(text) + 1):
	match = regexp.search(text, pos=pos, endpos=endpos)
	if pos == endpos:
	self.assertIsNone(match)
	else:
	self.assertEqual(pos, match.pos)
	self.assertEqual(endpos, match.endpos)
	self.assertEqual(pos, match.start())
	self.assertEqual(endpos, match.end())
	self.assertTupleEqual((pos, endpos), match.span())

	def test_search_with_bogus_pos_and_endpos(self):
	regexp = self.MODULE.compile(u'.*') # empty string allowed
	text = u'I \u2665 RE2!'

	match = regexp.search(text, pos=-100)
	self.assertEqual(0, match.pos)
	match = regexp.search(text, pos=100)
	self.assertEqual(8, match.pos)

	match = regexp.search(text, endpos=-100)
	self.assertEqual(0, match.endpos)
	match = regexp.search(text, endpos=100)
	self.assertEqual(8, match.endpos)

	match = regexp.search(text, pos=100, endpos=-100)
	self.assertIsNone(match)

	@parameterized.parameters(
	(p.pattern, p.text, (p.spans if p.match else None)) for p in PARAMS)
	def test_match(self, pattern, text, expected_spans):
	match = self.MODULE.match(pattern, text)
	if expected_spans is None:
	self.assertIsNone(match)
	else:
	spans = [match.span(group) for group in range(match.re.groups + 1)]
	self.assertListEqual(expected_spans, spans)

	@parameterized.parameters(
	(p.pattern, p.text, (p.spans if p.fullmatch else None)) for p in PARAMS)
	def test_fullmatch(self, pattern, text, expected_spans):
	# NOT supported by the Python 2 re module!
	if not (six.PY2 and self.MODULE is re):
	match = self.MODULE.fullmatch(pattern, text)
	if expected_spans is None:
	self.assertIsNone(match)
	else:
	spans = [match.span(group) for group in range(match.re.groups + 1)]
	self.assertListEqual(expected_spans, spans)

	@parameterized.parameters(
	(u'', u'', [(0, 0)]),
	(b'', b'', [(0, 0)]),
	(u'', u'x', [(0, 0), (1, 1)]),
	(b'', b'x', [(0, 0), (1, 1)]),
	(u'', u'xy', [(0, 0), (1, 1), (2, 2)]),
	(b'', b'xy', [(0, 0), (1, 1), (2, 2)]),
	(u'.', u'xy', [(0, 1), (1, 2)]),
	(b'.', b'xy', [(0, 1), (1, 2)]),
	(u'x', u'xy', [(0, 1)]),
	(b'x', b'xy', [(0, 1)]),
	(u'y', u'xy', [(1, 2)]),
	(b'y', b'xy', [(1, 2)]),
	(u'z', u'xy', []),
	(b'z', b'xy', []),
	(u'\\w*', u'Hello, world.', [(0, 5), (5, 5), (6, 6), (7, 12), (12, 12),
	(13, 13)]),
	(b'\\w*', b'Hello, world.', [(0, 5), (5, 5), (6, 6), (7, 12), (12, 12),
	(13, 13)]),
	)
	def test_finditer(self, pattern, text, expected_matches):
	matches = [match.span() for match in self.MODULE.finditer(pattern, text)]
	self.assertListEqual(expected_matches, matches)

	@parameterized.parameters(
	(u'\\w\\w+', u'Hello, world.', [u'Hello', u'world']),
	(b'\\w\\w+', b'Hello, world.', [b'Hello', b'world']),
	(u'(\\w)\\w+', u'Hello, world.', [u'H', u'w']),
	(b'(\\w)\\w+', b'Hello, world.', [b'H', b'w']),
	(u'(\\w)(\\w+)', u'Hello, world.', [(u'H', u'ello'), (u'w', u'orld')]),
	(b'(\\w)(\\w+)', b'Hello, world.', [(b'H', b'ello'), (b'w', b'orld')]),
	(u'(\\w)(\\w+)?', u'Hello, w.', [(u'H', u'ello'), (u'w', u'')]),
	(b'(\\w)(\\w+)?', b'Hello, w.', [(b'H', b'ello'), (b'w', b'')]),
	)
	def test_findall(self, pattern, text, expected_matches):
	matches = self.MODULE.findall(pattern, text)
	self.assertListEqual(expected_matches, matches)

	@parameterized.parameters(
	(u'\\W+', u'Hello, world.', -1, [u'Hello, world.']),
	(b'\\W+', b'Hello, world.', -1, [b'Hello, world.']),
	(u'\\W+', u'Hello, world.', 0, [u'Hello', u'world', u'']),
	(b'\\W+', b'Hello, world.', 0, [b'Hello', b'world', b'']),
	(u'\\W+', u'Hello, world.', 1, [u'Hello', u'world.']),
	(b'\\W+', b'Hello, world.', 1, [b'Hello', b'world.']),
	(u'(\\W+)', u'Hello, world.', -1, [u'Hello, world.']),
	(b'(\\W+)', b'Hello, world.', -1, [b'Hello, world.']),
	(u'(\\W+)', u'Hello, world.', 0, [u'Hello', u', ', u'world', u'.', u'']),
	(b'(\\W+)', b'Hello, world.', 0, [b'Hello', b', ', b'world', b'.', b'']),
	(u'(\\W+)', u'Hello, world.', 1, [u'Hello', u', ', u'world.']),
	(b'(\\W+)', b'Hello, world.', 1, [b'Hello', b', ', b'world.']),
	)
	def test_split(self, pattern, text, maxsplit, expected_pieces):
	pieces = self.MODULE.split(pattern, text, maxsplit)
	self.assertListEqual(expected_pieces, pieces)

	@parameterized.parameters(
	(u'\\w+', upper, u'Hello, world.', -1, u'Hello, world.', 0),
	(b'\\w+', upper, b'Hello, world.', -1, b'Hello, world.', 0),
	(u'\\w+', upper, u'Hello, world.', 0, u'HELLO, WORLD.', 2),
	(b'\\w+', upper, b'Hello, world.', 0, b'HELLO, WORLD.', 2),
	(u'\\w+', upper, u'Hello, world.', 1, u'HELLO, world.', 1),
	(b'\\w+', upper, b'Hello, world.', 1, b'HELLO, world.', 1),
	(u'\\w+', u'MEEP', u'Hello, world.', -1, u'Hello, world.', 0),
	(b'\\w+', b'MEEP', b'Hello, world.', -1, b'Hello, world.', 0),
	(u'\\w+', u'MEEP', u'Hello, world.', 0, u'MEEP, MEEP.', 2),
	(b'\\w+', b'MEEP', b'Hello, world.', 0, b'MEEP, MEEP.', 2),
	(u'\\w+', u'MEEP', u'Hello, world.', 1, u'MEEP, world.', 1),
	(b'\\w+', b'MEEP', b'Hello, world.', 1, b'MEEP, world.', 1),
	(u'\\\\', u'\\\\\\\\', u'Hello,\\world.', 0, u'Hello,\\\\world.', 1),
	(b'\\\\', b'\\\\\\\\', b'Hello,\\world.', 0, b'Hello,\\\\world.', 1),
	)
	def test_subn_sub(self, pattern, repl, text, count, expected_joined_pieces,
	expected_numsplit):
	joined_pieces, numsplit = self.MODULE.subn(pattern, repl, text, count)
	self.assertEqual(expected_joined_pieces, joined_pieces)
	self.assertEqual(expected_numsplit, numsplit)

	joined_pieces = self.MODULE.sub(pattern, repl, text, count)
	self.assertEqual(expected_joined_pieces, joined_pieces)


	class Re2RegexpTest(ReRegexpTest):
	"""Contains tests that apply to the re2 module only."""

	MODULE = re2

	def test_compile_with_latin1_encoding(self):
	options = re2.Options()
	options.encoding = re2.Options.Encoding.LATIN1
	with self.assertRaisesRegex(re2.error,
	('string type of pattern is Text .*, but '
	'encoding specified in options is LATIN1')):
	re2.compile(u'.?', options=options)

	# ... whereas this is fine, of course.
	re2.compile(b'.?', options=options)

	@parameterized.parameters(
	(u'\\p{Lo}', u'\u0ca0_\u0ca0', [(0, 1), (2, 3)]),
	(b'\\p{Lo}', b'\xe0\xb2\xa0_\xe0\xb2\xa0', [(0, 3), (4, 7)]),
	)
	def test_finditer_with_utf8(self, pattern, text, expected_matches):
	matches = [match.span() for match in self.MODULE.finditer(pattern, text)]
	self.assertListEqual(expected_matches, matches)

	def test_purge(self):
	if re2._HAVE_LRU_CACHE:
	re2.compile('Goodbye, world.')
	self.assertGreater(re2._Regexp._make.cache_info().currsize, 0)
	re2.purge()
	self.assertEqual(re2._Regexp._make.cache_info().currsize, 0)


	class Re2EscapeTest(parameterized.TestCase):
	"""Contains tests that apply to the re2 module only.

	We disagree with Python on the escaping of some characters,
	so there is no point attempting to verify consistency.
	"""

	@parameterized.parameters(
	(u'ab+c?', u'a\\b\\+c\\?'),
	(b'ab+c?', b'a\\b\\+c\\?'),
	)
	def test_escape(self, pattern, expected_escaped):
	escaped = re2.escape(pattern)
	self.assertEqual(expected_escaped, escaped)


	class ReMatchTest(parameterized.TestCase):
	"""Contains tests that apply to the re and re2 modules."""

	MODULE = re

	def test_expand(self):
	pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
	text = u'I \u2665 RE2!\n'
	match = self.MODULE.search(pattern, text)

	self.assertEqual(u'\u2665\n!', match.expand(u'\\1\\n\\2'))
	self.assertEqual(u'\u2665\n!', match.expand(u'\\g<1>\\n\\g<2>'))
	self.assertEqual(u'\u2665\n!', match.expand(u'\\g<S>\\n\\g<P>'))
	self.assertEqual(u'\\1\\2\n\u2665!', match.expand(u'\\\\1\\\\2\\n\\1\\2'))

	def test_expand_with_octal(self):
	pattern = u'()()()()()()()()()(\\w+)'
	text = u'Hello, world.'
	match = self.MODULE.search(pattern, text)

	self.assertEqual(u'Hello\n', match.expand(u'\\g<0>\\n'))
	self.assertEqual(u'Hello\n', match.expand(u'\\g<10>\\n'))

	self.assertEqual(u'\x00\n', match.expand(u'\\0\\n'))
	self.assertEqual(u'\x00\n', match.expand(u'\\00\\n'))
	self.assertEqual(u'\x00\n', match.expand(u'\\000\\n'))
	self.assertEqual(u'\x000\n', match.expand(u'\\0000\\n'))

	self.assertEqual(u'\n', match.expand(u'\\1\\n'))
	self.assertEqual(u'Hello\n', match.expand(u'\\10\\n'))
	self.assertEqual(u'@\n', match.expand(u'\\100\\n'))
	self.assertEqual(u'@0\n', match.expand(u'\\1000\\n'))

	def test_getitem_group_groups_groupdict(self):
	pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
	text = u'Hello, world.\nI \u2665 RE2!\nGoodbye, world.\n'
	match = self.MODULE.search(pattern, text)

	# NOT supported by the Python 2 re module!
	if not (six.PY2 and self.MODULE is re):
	self.assertEqual(u'\u2665 RE2!', match[0])
	self.assertEqual(u'\u2665', match[1])
	self.assertEqual(u'!', match[2])
	self.assertEqual(u'\u2665', match[u'S'])
	self.assertEqual(u'!', match[u'P'])

	self.assertEqual(u'\u2665 RE2!', match.group())
	self.assertEqual(u'\u2665 RE2!', match.group(0))
	self.assertEqual(u'\u2665', match.group(1))
	self.assertEqual(u'!', match.group(2))
	self.assertEqual(u'\u2665', match.group(u'S'))
	self.assertEqual(u'!', match.group(u'P'))

	self.assertTupleEqual((u'\u2665', u'!'), match.group(1, 2))
	self.assertTupleEqual((u'\u2665', u'!'), match.group(u'S', u'P'))
	self.assertTupleEqual((u'\u2665', u'!'), match.groups())
	self.assertDictEqual({u'S': u'\u2665', u'P': u'!'}, match.groupdict())

	def test_bogus_group_start_end_and_span(self):
	pattern = u'(?P<S>[\u2600-\u26ff]+).*?(?P<P>[^\\s\\w]+)'
	text = u'I \u2665 RE2!\n'
	match = self.MODULE.search(pattern, text)

	self.assertRaises(IndexError, match.group, -1)
	self.assertRaises(IndexError, match.group, 3)
	self.assertRaises(IndexError, match.group, 'X')

	self.assertRaises(IndexError, match.start, -1)
	self.assertRaises(IndexError, match.start, 3)

	self.assertRaises(IndexError, match.end, -1)
	self.assertRaises(IndexError, match.end, 3)

	self.assertRaises(IndexError, match.span, -1)
	self.assertRaises(IndexError, match.span, 3)

	@parameterized.parameters(
	(u'((a)(b))((c)(d))', u'foo bar qux', None, None),
	(u'(?P<one>(a)(b))((c)(d))', u'foo abcd qux', 4, None),
	(u'(?P<one>(a)(b))(?P<four>(c)(d))', u'foo abcd qux', 4, 'four'),
	)
	def test_lastindex_lastgroup(self, pattern, text, expected_lastindex,
	expected_lastgroup):
	match = self.MODULE.search(pattern, text)
	if expected_lastindex is None:
	self.assertIsNone(match)
	else:
	self.assertEqual(expected_lastindex, match.lastindex)
	self.assertEqual(expected_lastgroup, match.lastgroup)


	class Re2MatchTest(ReMatchTest):
	"""Contains tests that apply to the re2 module only."""

	MODULE = re2


	class SetTest(absltest.TestCase):

	def test_search(self):
	s = re2.Set.SearchSet()
	self.assertEqual(0, s.Add('\\d+'))
	self.assertEqual(1, s.Add('\\s+'))
	self.assertEqual(2, s.Add('\\w+'))
	self.assertRaises(re2.error, s.Add, '(MEEP')
	s.Compile()
	self.assertItemsEqual([1, 2], s.Match('Hello, world.'))

	def test_match(self):
	s = re2.Set.MatchSet()
	self.assertEqual(0, s.Add('\\d+'))
	self.assertEqual(1, s.Add('\\s+'))
	self.assertEqual(2, s.Add('\\w+'))
	self.assertRaises(re2.error, s.Add, '(MEEP')
	s.Compile()
	self.assertItemsEqual([2], s.Match('Hello, world.'))

	def test_fullmatch(self):
	s = re2.Set.FullMatchSet()
	self.assertEqual(0, s.Add('\\d+'))
	self.assertEqual(1, s.Add('\\s+'))
	self.assertEqual(2, s.Add('\\w+'))
	self.assertRaises(re2.error, s.Add, '(MEEP')
	s.Compile()
	self.assertIsNone(s.Match('Hello, world.'))


	class FilterTest(absltest.TestCase):

	def test_match(self):
	f = re2.Filter()
	self.assertEqual(0, f.Add('Hello, \\w+\\.'))
	self.assertEqual(1, f.Add('\\w+, world\\.'))
	self.assertEqual(2, f.Add('Goodbye, \\w+\\.'))
	self.assertRaises(re2.error, f.Add, '(MEEP')
	f.Compile()
	self.assertItemsEqual([0, 1], f.Match('Hello, world.', potential=True))
	self.assertItemsEqual([0, 1], f.Match('HELLO, WORLD.', potential=True))
	self.assertItemsEqual([0, 1], f.Match('Hello, world.'))
	self.assertIsNone(f.Match('HELLO, WORLD.'))


	if __name__ == '__main__':
	absltest.main()