import re # use search() to find a pattern in a string # and display the match objects on the screen # note: search() returns the first match objects if found # returns None otherwise def search_phone_numbers(filename, reg_ex): infile = open(filename, "r") # read mode for line in infile: obj = regex.search(line) if obj != None: print("use search :", obj, obj.group(), obj.start(), obj.end()) infile.close() # use match() to find a pattern in a string (at the start of the string) # and display the match objects on the screen # note: search() returns the first match objects if found # returns None otherwise def match_phone_numbers(filename, reg_ex): infile = open(filename, "r") # read mode for line in infile: obj = regex.match(line) if obj != None: print("use match :", obj, obj.group(), obj.start(), obj.end()) infile.close() # findall() returns a list of match strings def findall_phone_numbers(filename, reg_ex): infile = open(filename, "r") # read mode for line in infile: objs = regex.findall(line) for obj in objs: print("use findall :", obj) infile.close() # finditer() returns a list of match objects def finditer_phone_numbers(filename, reg_ex): infile = open(filename, "r") # read mode for line in infile: objs = reg_ex.finditer(line) for obj in objs: print("use finditer :", obj, obj.group(), obj.start(), obj.end()) infile.close() # grouping def search_phone_numbers_group(filename, reg_ex): infile = open(filename, "r") # read mode for line in infile: obj = regex.search(line) if obj != None: # group() is similar to group(0) print("use group :", obj.group(1), obj.group(2)) infile.close() # simple reg ex -- describing a sequence of pattern # regex = re.compile(r"[0-9]?[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]") # raw data, compile # regex = re.compile(r"[0-9]?[0-9]{3}-[0-9]{4}") # regex = re.compile(r"[0-9]{3,4}-[0-9]{4}") regex = re.compile(r"(\d{3,4})-(\d{4})") # find people whose first name start with "J" and last name is "Neu" # regex = re.compile(r"J.*Neu") # regex = re.compile(r"Neu") # last name is Neu search_phone_numbers("simpsons_phone_book.txt", regex) match_phone_numbers("simpsons_phone_book.txt", regex) findall_phone_numbers("simpsons_phone_book.txt", regex) finditer_phone_numbers("simpsons_phone_book.txt", regex) search_phone_numbers_group("simpsons_phone_book.txt", regex)