Skip to content

gh-74598: add fnmatch.filterfalse for excluding names #121185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Doc/library/fnmatch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`.
but implemented more efficiently.


.. function:: filterfalse(names, pat)

Construct a list from those elements of the :term:`iterable` of filename
strings *names* that do not match the pattern string *pat*.
It is the same as ``[n for n in names if not fnmatch(n, pat)]``,
but implemented more efficiently.

.. versionadded:: next


.. function:: translate(pat)

Return the shell-style pattern *pat* converted to a regular expression for
Expand Down
7 changes: 7 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,13 @@ errno
(Contributed by James Roy in :gh:`126585`.)


fnmatch
-------

* Added :func:`fnmatch.filterfalse` for excluding names matching a pattern.
(Contributed by Bénédikt Tran in :gh:`74598`.)


fractions
---------

Expand Down
27 changes: 25 additions & 2 deletions Lib/fnmatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
The function translate(PATTERN) returns a regular expression
corresponding to PATTERN. (It does not compile it.)
"""

import functools
import itertools
import os
import posixpath
import re
import functools

__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
__all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"]


def fnmatch(name, pat):
"""Test whether FILENAME matches PATTERN.
Expand All @@ -35,6 +38,7 @@ def fnmatch(name, pat):
pat = os.path.normcase(pat)
return fnmatchcase(name, pat)


@functools.lru_cache(maxsize=32768, typed=True)
def _compile_pattern(pat):
if isinstance(pat, bytes):
Expand All @@ -45,6 +49,7 @@ def _compile_pattern(pat):
res = translate(pat)
return re.compile(res).match


def filter(names, pat):
"""Construct a list from those elements of the iterable NAMES that match PAT."""
result = []
Expand All @@ -61,6 +66,22 @@ def filter(names, pat):
result.append(name)
return result


def filterfalse(names, pat):
"""Construct a list from those elements of the iterable NAMES that do not match PAT."""
pat = os.path.normcase(pat)
match = _compile_pattern(pat)
if os.path is posixpath:
# normcase on posix is NOP. Optimize it away from the loop.
return list(itertools.filterfalse(match, names))

result = []
for name in names:
if match(os.path.normcase(name)) is None:
result.append(name)
return result


def fnmatchcase(name, pat):
"""Test whether FILENAME matches PATTERN, including case.

Expand All @@ -80,9 +101,11 @@ def translate(pat):
parts, star_indices = _translate(pat, '*', '.')
return _join_translated_parts(parts, star_indices)


_re_setops_sub = re.compile(r'([&~|])').sub
_re_escape = functools.lru_cache(maxsize=512)(re.escape)


def _translate(pat, star, question_mark):
res = []
add = res.append
Expand Down
96 changes: 58 additions & 38 deletions Lib/test/test_fnmatch.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Test cases for the fnmatch module."""

import unittest
import os
import string
import unittest
import warnings
from fnmatch import fnmatch, fnmatchcase, translate, filter, filterfalse


IGNORECASE = os.path.normcase('P') == os.path.normcase('p')
NORMSEP = os.path.normcase('\\') == os.path.normcase('/')

from fnmatch import fnmatch, fnmatchcase, translate, filter

class FnmatchTestCase(unittest.TestCase):

Expand Down Expand Up @@ -77,35 +81,32 @@ def test_bytes(self):
self.check_match(b'foo\nbar', b'foo*')

def test_case(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
check('abc', 'abc')
check('AbC', 'abc', ignorecase)
check('abc', 'AbC', ignorecase)
check('AbC', 'abc', IGNORECASE)
check('abc', 'AbC', IGNORECASE)
check('AbC', 'AbC')

def test_sep(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('usr/bin', 'usr/bin')
check('usr\\bin', 'usr/bin', normsep)
check('usr/bin', 'usr\\bin', normsep)
check('usr\\bin', 'usr/bin', NORMSEP)
check('usr/bin', 'usr\\bin', NORMSEP)
check('usr\\bin', 'usr\\bin')

def test_char_set(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
check(c, '[az]', c in 'az')
check(c, '[!az]', c not in 'az')
# Case insensitive.
for c in tescases:
check(c, '[AZ]', (c in 'az') and ignorecase)
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
check(c, '[AZ]', (c in 'az') and IGNORECASE)
check(c, '[!AZ]', (c not in 'az') or not IGNORECASE)
for c in string.ascii_uppercase:
check(c, '[az]', (c in 'AZ') and ignorecase)
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
check(c, '[az]', (c in 'AZ') and IGNORECASE)
check(c, '[!az]', (c not in 'AZ') or not IGNORECASE)
# Repeated same character.
for c in tescases:
check(c, '[aa]', c == 'a')
Expand All @@ -120,8 +121,6 @@ def test_char_set(self):
check('[!]', '[!]')

def test_range(self):
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
tescases = string.ascii_lowercase + string.digits + string.punctuation
for c in tescases:
Expand All @@ -131,11 +130,11 @@ def test_range(self):
check(c, '[!b-dx-z]', c not in 'bcdxyz')
# Case insensitive.
for c in tescases:
check(c, '[B-D]', (c in 'bcd') and ignorecase)
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
check(c, '[B-D]', (c in 'bcd') and IGNORECASE)
check(c, '[!B-D]', (c not in 'bcd') or not IGNORECASE)
for c in string.ascii_uppercase:
check(c, '[b-d]', (c in 'BCD') and ignorecase)
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
check(c, '[b-d]', (c in 'BCD') and IGNORECASE)
check(c, '[!b-d]', (c not in 'BCD') or not IGNORECASE)
# Upper bound == lower bound.
for c in tescases:
check(c, '[b-b]', c == 'b')
Expand All @@ -144,7 +143,7 @@ def test_range(self):
check(c, '[!-#]', c not in '-#')
check(c, '[!--.]', c not in '-.')
check(c, '[^-`]', c in '^_`')
if not (normsep and c == '/'):
if not (NORMSEP and c == '/'):
check(c, '[[-^]', c in r'[\]^')
check(c, r'[\-^]', c in r'\]^')
check(c, '[b-]', c in '-b')
Expand All @@ -160,47 +159,45 @@ def test_range(self):
check(c, '[d-bx-z]', c in 'xyz')
check(c, '[!d-bx-z]', c not in 'xyz')
check(c, '[d-b^-`]', c in '^_`')
if not (normsep and c == '/'):
if not (NORMSEP and c == '/'):
check(c, '[d-b[-^]', c in r'[\]^')

def test_sep_in_char_set(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('/', r'[/]')
check('\\', r'[\]')
check('/', r'[\]', normsep)
check('\\', r'[/]', normsep)
check('/', r'[\]', NORMSEP)
check('\\', r'[/]', NORMSEP)
check('[/]', r'[/]', False)
check(r'[\\]', r'[/]', False)
check('\\', r'[\t]')
check('/', r'[\t]', normsep)
check('/', r'[\t]', NORMSEP)
check('t', r'[\t]')
check('\t', r'[\t]', False)

def test_sep_in_range(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
check = self.check_match
check('a/b', 'a[.-0]b', not normsep)
check('a/b', 'a[.-0]b', not NORMSEP)
check('a\\b', 'a[.-0]b', False)
check('a\\b', 'a[Z-^]b', not normsep)
check('a\\b', 'a[Z-^]b', not NORMSEP)
check('a/b', 'a[Z-^]b', False)

check('a/b', 'a[/-0]b', not normsep)
check('a/b', 'a[/-0]b', not NORMSEP)
check(r'a\b', 'a[/-0]b', False)
check('a[/-0]b', 'a[/-0]b', False)
check(r'a[\-0]b', 'a[/-0]b', False)

check('a/b', 'a[.-/]b')
check(r'a\b', 'a[.-/]b', normsep)
check(r'a\b', 'a[.-/]b', NORMSEP)
check('a[.-/]b', 'a[.-/]b', False)
check(r'a[.-\]b', 'a[.-/]b', False)

check(r'a\b', r'a[\-^]b')
check('a/b', r'a[\-^]b', normsep)
check('a/b', r'a[\-^]b', NORMSEP)
check(r'a[\-^]b', r'a[\-^]b', False)
check('a[/-^]b', r'a[\-^]b', False)

check(r'a\b', r'a[Z-\]b', not normsep)
check(r'a\b', r'a[Z-\]b', not NORMSEP)
check('a/b', r'a[Z-\]b', False)
check(r'a[Z-\]b', r'a[Z-\]b', False)
check('a[Z-/]b', r'a[Z-\]b', False)
Expand Down Expand Up @@ -332,18 +329,41 @@ def test_mix_bytes_str(self):
self.assertRaises(TypeError, filter, [b'test'], '*')

def test_case(self):
ignorecase = os.path.normcase('P') == os.path.normcase('p')
self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
['Test.py', 'Test.PL'] if ignorecase else ['Test.py'])
['Test.py', 'Test.PL'] if IGNORECASE else ['Test.py'])
self.assertEqual(filter(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
['Test.py', 'Test.PL'] if ignorecase else ['Test.PL'])
['Test.py', 'Test.PL'] if IGNORECASE else ['Test.PL'])

def test_sep(self):
normsep = os.path.normcase('\\') == os.path.normcase('/')
self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
['usr/bin', 'usr\\lib'] if normsep else ['usr/bin'])
['usr/bin', 'usr\\lib'] if NORMSEP else ['usr/bin'])
self.assertEqual(filter(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
['usr/bin', 'usr\\lib'] if normsep else ['usr\\lib'])
['usr/bin', 'usr\\lib'] if NORMSEP else ['usr\\lib'])


class FilterFalseTestCase(unittest.TestCase):

def test_filterfalse(self):
actual = filterfalse(['Python', 'Ruby', 'Perl', 'Tcl'], 'P*')
self.assertListEqual(actual, ['Ruby', 'Tcl'])
actual = filterfalse([b'Python', b'Ruby', b'Perl', b'Tcl'], b'P*')
self.assertListEqual(actual, [b'Ruby', b'Tcl'])

def test_mix_bytes_str(self):
self.assertRaises(TypeError, filterfalse, ['test'], b'*')
self.assertRaises(TypeError, filterfalse, [b'test'], '*')

def test_case(self):
self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.p*'),
['Test.rb'] if IGNORECASE else ['Test.rb', 'Test.PL'])
self.assertEqual(filterfalse(['Test.py', 'Test.rb', 'Test.PL'], '*.P*'),
['Test.rb'] if IGNORECASE else ['Test.py', 'Test.rb',])

def test_sep(self):
self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr/*'),
['usr'] if NORMSEP else ['usr', 'usr\\lib'])
self.assertEqual(filterfalse(['usr/bin', 'usr', 'usr\\lib'], 'usr\\*'),
['usr'] if NORMSEP else ['usr/bin', 'usr'])


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add :func:`fnmatch.filterfalse` for excluding names matching a pattern.
Patch by Bénédikt Tran.
Loading