String
String
Strings of characters, immutable
#!/usr/bin/python
s = 'Hello World!';
#access element
print(s[0]); #H
#slicing
print(s[:2]); #He
#slice
sl = slice(0, 10, 2);
print(s[sl]); #HloWr
#concatenation
print(s+' ...');
#repeat
print(s*2);
#in
if 'H' in s:
print('Contain H');
#raw string, suppresses actual meaning of escape characters
print(r'raw string\n');
s = 'raw string\n';
print('%r' % s) # output string as raw string
#!/usr/bin/python
s = 'Hello World!';
#capitalize, capitalizes first letter of string
print(s.capitalize());
#center
print(s.center(20, '*'));
#count
print(s.count('l'));
#endswith
if s.endswith('!'):
print('end with ! ...');
#find
print(s.find('or')); #7
#index, find a string and raise an exception if the string is not found
print(s.index('or'));
#join
c = '-';
print(c.join(['a', 'b', 'c']));
#lower
print(s.lower());
#replace
print(s.replace('l', '-', 2));
#split
str = "Line1-abcdef, \nLine2-abc, \nLine4-abcd";
print(str.split());
import re
print(re.split('\n|, ',str)) # use multiple delimiter
#strip
print(' Hello ... '.strip());
#upper
print(s.upper()); #HELLO WORLD!
Regular Expression
Pattern
- ^, beginning of a line
- $, end of a line
- ., matches any single character except newline
- [...], matches any single character in brackets
- [^...], matches any single character not in brackets
- *, matches 0 or more occurrences of preceding expression
- +, matches 1 or more occurrence of preceding expression
- ?, matches 0 or 1 occurrence of preceding expression
- re{n}, match exact n time
- re{n, m}, matches at least n and at most m occurrences
- a|b, matches either a or b
- \w, match word characters
- \W, match none word characters
- <.*>, greedy repetition
- <.*?>, none greedy repetition, matches the smallest number of repetitions
Flags
- re.I, performs case-insensitive matching
- re.M, makes $ match the end of a line (not just the end of the string) and makes ^ match the start of any line
#!/usr/bin/python
import re;
phone = "2004-959-559 # This is Phone Number"
#match
m = re.match(r'(\d+)-(\d+)-\d+.*', phone);
if m:
print(type(m));
print(m.group()); #2004-959-559 # This is Phone Number
print(m.group(1)); # 2004
print(m.groups()); # ('2004', '959')
#search
s = re.search(r'\d+', phone);
if s:
print(type(s));
print(s.group()); #2004
#findall
a = re.findall(r'\d+', phone);
print(a); #['2004', '959', '559']
#replace
r = re.sub(r'\d', '*', phone);
print(r); # ****-***-***, This is Phone Number
Unicode
a sequence of code points, immutable
Python keep characters as unicode in memory
type 'str' represents unicode in Python 3, type 'bytes' represent byte string
#!/usr/bin/python
# unicode to str
s = 'Café';
print(type(s), len(s)); #'unicode', 4
s = u'Café';
print(type(s), len(s)); #'unicode', 4
print(type(s.encode('utf-8'))); #'bytes'
# read string from a file and print it to screen
f = open('temp.txt', 'rb');
l = next(f); #read a line and save it to byte string
print(type(l)) # byte string
l = l.decode('utf-8'); #decode str to unicode
print(type(l)); # 'unicode'
print(l); #陈, print encode unicode to str with utf-8
f.close();
# get unicode code point
c = ord(u'陈'); #38472
print(chr(38472)); #陈, do not print code point, it is encoded to str
read unicode, output unicode
#!/usr/bin/python
# read string from a file and print it to screen
f = open('temp.txt', 'r');
l = next(f); # 陈, read a line and save it to byte string
print(type(l)) # unicode
o = open('output.txt', 'w')
o.write(l) # convert byte string to unicode
o.close()
f.close();
read byte string, output unicode
#!/usr/bin/python
# read string from a file and print it to screen
f = open('temp.txt', 'rb');
l = next(f); # 陈, read a line and save it to byte string
print(type(l)) # byte string
o = open('output.txt', 'w') # write with Text IO
o.write(l.decode('utf-8')) # convert byte string to unicode
o.close()
f.close();
read byte string, output byte string
#!/usr/bin/python
# read string from a file and print it to screen
f = open('temp.txt', 'rb');
l = next(f); # 陈, read a line and save it to byte string
print(type(l)) # byte string
o = open('output.txt', 'wb') # write with Byte IO
o.write(l)
o.close()
f.close();
Reference