# 1. Import the module that provides support for regular expression # 2. Define a regular expression # 3. Mark regular expressions as raw strings (r'...') # 4. Create a regular expression object that matches the pattern # using re.compile(r'.....') # 5. Use the regular expression object to search or find # certain patterns in the given string # (let's call regular expression object as regex for short) import re # import module for regular expression # define a regular expression, mark it as a raw string, # then compile the raw string to create a regex object number_finder = re.compile(r'([1-9])([0-9]*)') # defined in 2 groups # grouping is determined by (..) # in this example, there are two groups: # group 1 is ([1-9]) # group 2 is ([0-9]*) print(number_finder) # let's see what pattern looks like # let's create some string, # we'll later use the regex object to look for # certain patterns in this string text = 'one two 1234 four 56 78ninety 1111a' # search() returns the first matched object matched = number_finder.search(text) print('first matched is', matched) # start() returns the first index of the match print('start() =', matched.start()) # end() returns the last index of the match print('end() =', matched.end()) # group() returns the matched object print('group() =', matched.group()) # groups() returns all matching subgroups in a tuple # applicable only if the regular expression is defined in groups print('groups() =', matched.groups()) print('number of groups in matched =', len(matched.groups())) print('matched.group() =', matched.group(), '= matched.group(0) =', matched.group(0)) print(' matched.group(1) =', matched.group(1)) # return subgroup 1 print(' matched.group(2) =', matched.group(2)) # return subgroup 2 # print(' group() =', matched.group(), \ # ' start() =', matched.start(), \ # ' end() =', matched.end(), # ' groups() =', matched.groups()) print('matched from index', matched.start(), 'to', matched.end(), 'of text is', text[matched.start():matched.end()]) print('finditer(text) -->') for m in number_finder.finditer(text): print(' ', m.group(), '--> subgroup =', m.group(1), ':', m.group(2))