BITE A CODE: Python String

Writing Python Scripts in Unicode

# -*- coding: utf-8 -*-

String Literals

# normal string
"Either single or double quotes can be used to quote strings."
'Either single or double quotes can be used to quote strings.'

# raw string
r"C:\Python26\README.txt"

# multiline string
txt = """
This is a multiline string literal
enclosed in triple double quotes.
"""

txt = '''
And this is a multiline string literal
enclosed in triple single quotes.
'''

# string formatting expressions
width = 80
height = 40
msg = "the size is %d x %d" % (width, height)

Basic String Operations

str = r"C:\Python26\README.txt"

# remove whitespace
str = str.rstrip()

# string length
len(str)

# fetch the last 4 characters
str[-4:] #>>> '.txt'

# end test
str.endswith(".txt")

# Split the string at the last occurrence of sep
str.rpartition("\\") #>>> ('C:\\Python26', '\\', 'README.txt')

Parsing a Text File

txtfile = open(r"C:\Python26\README.txt", "rt").readlines()
searchterm = ['python', 'documentation']
for ln, Line in enumerate(txtfile):
    lline = Line.lower()
    word_list = lline.split()
    for term in searchterm:
       if term in word_list:
          col = lline.index( term )
          print "Found '%s' in %d:%d" %(term, ln+1, col+1), Line,

Regular Expression

import re

txt = open(r"C:\Python26\README.txt", "rt").read()
emails = re.findall('[a-zA-Z0-9+_-.]+@[0-9a-zA-Z][.-0-9a-zA-Z]*.[a-zA-Z]+', txt)
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*(),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', txt)

print "emails=", emails, "\n"
print "urls=", urls, "\n"

Extract the title of a html page

import urllib, re

page = urllib.urlopen('http://www.google.com').read()
title = re.findall('(.*)', page)
print title

glob Module

import glob

searchterm = ['python', 'documentation']
files = glob.glob(r"C:\Python26\*.txt")

for filename in files:
    fh = open(filename, "rt")
    txt = fh.readlines()
    for ln, Line in enumerate(txt):
        lline = Line.lower()
        for term in searchterm:
            num_matches = lline.count(term)
            if num_matches:
                 print "found '%s' %d times in %s on line %d." % (term, num_matches, filename, ln+1)

    fh.close()

Read/Write a Unicode Text File

import codecs
f = codecs.open("test.txt", "r", "utf-8")

Reference

Kodos : The Python Regular Expression Debugger

BITE A CODE

Python String

Writing Python Scripts in Unicode

String Literals

Basic String Operations

Parsing a Text File

Regular Expression

glob Module

Read/Write a Unicode Text File

Reference

0 意見 :: Python String

張貼留言

Labels

Blog Archive

關於我自己