String
String
  • Strings of characters, immutable
  • #!/usr/bin/python
     
    s = 'Hello World!';
     
    #access element
    print(s[0]); #H
     
    #slicing
    print(s[:2]); #He
    
    #slice
    sl = slice(0, 10, 2);
    print(s[sl]); #HloWr
     
    #concatenation
    print(s+' ...');
     
    #repeat
    print(s*2);
     
    #in
    if 'H' in s:
        print('Contain H');
     
    #raw string, suppresses actual meaning of escape characters
    print(r'raw string\n');
    s = 'raw string\n';
    print('%r' % s) # output string as raw string
    		
    #!/usr/bin/python
     
    s = 'Hello World!';
     
    #capitalize, capitalizes first letter of string
    print(s.capitalize());
     
    #center
    print(s.center(20, '*'));
     
    #count
    print(s.count('l'));
     
    #endswith
    if s.endswith('!'):
        print('end with ! ...');
     
    #find
    print(s.find('or')); #7
    
    #index, find a string and raise an exception if the string is not found
    print(s.index('or'));
    
    #join
    c = '-';
    print(c.join(['a', 'b', 'c']));
    
    #lower
    print(s.lower());
    
    #replace
    print(s.replace('l', '-', 2));
    
    #split
    str = "Line1-abcdef, \nLine2-abc, \nLine4-abcd";
    print(str.split());
    import re
    print(re.split('\n|, ',str)) # use multiple delimiter
    
    #strip
    print('  Hello ... '.strip());
    
    #upper
    print(s.upper()); #HELLO WORLD!
    		
    Regular Expression
  • Pattern
  • Flags
  • #!/usr/bin/python
     
    import re;
     
    phone = "2004-959-559 # This is Phone Number"
     
    #match
    m = re.match(r'(\d+)-(\d+)-\d+.*', phone);
    if m:
        print(type(m));
        print(m.group()); #2004-959-559 # This is Phone Number
        print(m.group(1)); # 2004
        print(m.groups()); # ('2004', '959')
     
    #search
    s = re.search(r'\d+', phone);
    if s:
        print(type(s));
        print(s.group()); #2004
     
    #findall
    a = re.findall(r'\d+', phone);
    print(a); #['2004', '959', '559']
     
    #replace
    r = re.sub(r'\d', '*', phone);
    print(r); # ****-***-***, This is Phone Number
    		
    Unicode
  • a sequence of code points, immutable
  • Python keep characters as unicode in memory
  • type 'str' represents unicode in Python 3, type 'bytes' represent byte string
  • #!/usr/bin/python
    
    # unicode to str
    s = 'Café';
    print(type(s), len(s)); #'unicode', 4
    s = u'Café';
    print(type(s), len(s)); #'unicode', 4
    print(type(s.encode('utf-8'))); #'bytes'
    
    # read string from a file and print it to screen
    f = open('temp.txt', 'rb');
    l = next(f); #read a line and save it to byte string
    print(type(l)) # byte string
    l = l.decode('utf-8'); #decode str to unicode
    print(type(l)); # 'unicode'
    print(l); #陈, print encode unicode to str with utf-8
    f.close();
    
    # get unicode code point
    c = ord(u'陈'); #38472
    print(chr(38472)); #陈, do not print code point, it is encoded to str
    		
  • read unicode, output unicode
    #!/usr/bin/python
    
    # read string from a file and print it to screen
    f = open('temp.txt', 'r');
    
    l = next(f); # 陈, read a line and save it to byte string
    print(type(l)) # unicode
    
    o = open('output.txt', 'w')
    o.write(l) # convert byte string to unicode
    
    o.close()
    f.close();
    		
  • read byte string, output unicode
    #!/usr/bin/python
    
    # read string from a file and print it to screen
    f = open('temp.txt', 'rb');
    
    l = next(f); # 陈, read a line and save it to byte string
    print(type(l)) # byte string
    
    o = open('output.txt', 'w') # write with Text IO
    o.write(l.decode('utf-8')) # convert byte string to unicode
    
    o.close()
    f.close();
    		
  • read byte string, output byte string
    #!/usr/bin/python
    
    # read string from a file and print it to screen
    f = open('temp.txt', 'rb');
    
    l = next(f); # 陈, read a line and save it to byte string
    print(type(l)) # byte string
    
    o = open('output.txt', 'wb') # write with Byte IO
    o.write(l)
    
    o.close()
    f.close();
    		
  • Reference
  • Tutorialspoint String
  • Regular Expression
  • Unicode cheat sheet