import re
text = 'abcdef abcde abc Hồ Chí Minh'
re.findall('abc', text)
text = 'abc123xyz define "123" var g = 123;'
re.findall('1', text)
re.findall('\d', text)
print(re.findall('\D', text))
['a', 'b', 'c', 'x', 'y', 'z', ' ', 'd', 'e', 'f', 'i', 'n', 'e', ' ', '"', '"', ' ', 'v', 'a', 'r', ' ', 'g', ' ', '=', ' ', ';']
text = 'cat. 896. ?=+. abc1'
re.findall('...\.', text)
text = 'can man fan dan ran pan'
re.findall('[cmf]an', text)
text = 'hog dog bog'
re.findall('[^b]og', text)
text = 'something or someone your mind'
re.findall('[^some]', text)
text = 'Ana Bob Cpc aax bby ccz'
re.findall('[A-Z]\w\w', text)
re.findall('\W', text)
text = 'wazzzzzup wazzzup wazup'
re.findall('z{3}', text)
re.findall('z{2,4}', text)
text = 'aaaabcc aabbbbc aacc a'
re.findall('b+c+', text)
re.findall('b*c+', text)
text = '''1 file found?
2 file found?
24 files found?
No files found.'''
re.findall('.+\?', text)
text = 'aaaabcc aabbbbc aacc a'
re.findall('d?\w+[^ ]', text)
text = '''
1. abc
2. abc
3. abc
4.abc
'''.strip()
re.findall('[0-9]\.\s+.*', text)
texts = [
'Mission',
'Mission: successful',
'Last Mission: unsuccessful',
'Next Mission: successful upon capture of target']
for text in texts:
print(re.findall('^success.+', text))
[]
[]
[]
[]
for text in texts:
print(re.findall('^Mission.+', text))
[]
['Mission: successful']
[]
[]
for text in texts:
print(re.findall('^Mission.+', text))
[]
['Mission: successful']
[]
[]
for text in texts:
print(re.findall('^Mission.*', text))
['Mission']
['Mission: successful']
[]
[]
texts = [
'file_record_transcript.pdf',
'file_07241999.pdf',
'testfile_fake.pdf.tmp']
for text in texts:
print(re.findall('(.+).pdf', text))
['file_record_transcript']
['file_07241999']
['testfile_fake']
for text in texts:
print(re.findall('(.+).pdf$', text))
['file_record_transcript']
['file_07241999']
[]
texts = [
'day Jan 1987',
'May 1969',
'Aug 2011']
for text in texts:
print(re.findall('([JFMASOND][a-z]{2} (\d{4}))', text))
[('Jan 1987', '1987')]
[('May 1969', '1969')]
[('Aug 2011', '2011')]
for text in texts:
print(re.findall('^([JFMASOND][a-z]{2} (\d{4}))', text))
[]
[('May 1969', '1969')]
[('Aug 2011', '2011')]
texts = [
'1280x720',
'1920x1600',
'1024x768',
'3450a1922',
'64x64',
'x256']
for text in texts:
print(re.findall('(\d+)x(\d+)', text))
[('1280', '720')]
[('1920', '1600')]
[('1024', '768')]
[]
[('64', '64')]
[]
for text in texts:
print(re.findall('(\d{3,4})?x(\d{3,4})', text))
[('1280', '720')]
[('1920', '1600')]
[('1024', '768')]
[]
[]
[('', '256')]
for text in texts:
print(re.findall('(\d{3,4})x(\d{3,4})', text))
[('1280', '720')]
[('1920', '1600')]
[('1024', '768')]
[]
[]
[]
texts = ['I love cats',
'I love dogs',
'I love logs',
'I love cogs']
for text in texts:
print(re.findall('(I love (cats|dogs))', text))
[('I love cats', 'cats')]
[('I love dogs', 'dogs')]
[]
[]
for text in texts:
print(re.match('I love (cats|dogs)', text))
<re.Match object; span=(0, 11), match='I love cats'>
<re.Match object; span=(0, 11), match='I love dogs'>
None
None
print(re.findall('I love [(cats|dogs)]+', 'I love cats I love dogs I love '))
['I love cats', 'I love dogs']