执行已编译的代码
execfile("hello.py")
def EXECFILE(filename, locals=None, globals=None):
exec compile(open(filename).read(), filename,
"exec") in locals, globals
EXECFILE("hello.py")
hello again, and welcome to the show
hello again, and welcome to the show
|
显式地访问 _ _builtin_ _ 模块中的函数
def open(filename, mode="rb"):
import _ _builtin_ _
file = _ _builtin_ _.open(filename, mode)
if file.read(5) not in("GIF87", "GIF89"):
raise IOError, "not a GIF file"
file.seek(0)
return file
fp = open("samples/sample.gif")
print len(fp.read()), "bytes"
fp = open("samples/sample.jpg")
print len(fp.read()), "bytes"
3565 bytes
Traceback (innermost last):
File "builtin-open-example-1.py", line
12, in ?
File "builtin-open-example-1.py", line
5, in open
IOError: not a GIF file |
Python 还提供了 execfile 函数, 一个从文件加载代码,
编译代码, 执行代码的快捷方式.
使用 execfile 函数
# python imports this module by itself, so the following # line isn't really needed # python 会自动导入该模块, 所以以下这行是不必要的 # import exceptions class HTTPError(Exception): # indicates an HTTP protocol error def _ _init_ _(self, url, errcode, errmsg): self.url = url self.errcode = errcode self.errmsg = errmsg def _ _str_ _(self): return ( "<HTTPError for %s: %s %s>" % (self.url, self.errcode, self.errmsg) ) try: raise HTTPError("http://www.python.org/foo", 200, "Not Found") except HTTPError, error: print "url", "=>", error.url print "errcode", "=>", error.errcode print "errmsg", "=>", error.errmsg raise # reraise exception url => http://www.python.org/foo errcode => 200 errmsg => Not Found Traceback (innermost last): File "exceptions-example-1", line 16, in ? HTTPError: <HTTPError for http://www.python.org/foo: 200 Not Found>
|
显式地访问 _ _builtin_ _ 模块中的函数
def open(filename, mode="rb"):
import _ _builtin_ _
file = _ _builtin_ _.open(filename, mode)
if file.read(5) not in("GIF87", "GIF89"):
raise IOError, "not a GIF file"
file.seek(0)
return file
fp = open("samples/sample.gif")
print len(fp.read()), "bytes"
fp = open("samples/sample.jpg")
print len(fp.read()), "bytes"
3565 bytes
Traceback (innermost last):
File "builtin-open-example-1.py", line
12, in ?
File "builtin-open-example-1.py", line
5, in open
IOError: not a GIF file
|
使用 exceptions 模块
# python imports this module by itself, so the
following
# line isn't really needed
# python 会自动导入该模块, 所以以下这行是不必要的
# import exceptions
class HTTPError(Exception):
# indicates an HTTP protocol error
def _ _init_ _(self, url, errcode, errmsg):
self.url = url
self.errcode = errcode
self.errmsg = errmsg
def _ _str_ _(self):
return (
"<HTTPError for %s: %s %s>" %
(self.url, self.errcode, self.errmsg)
)
try:
raise HTTPError("http://www.python.org/foo",
200, "Not Found")
except HTTPError, error:
print "url", "=>", error.url
print "errcode", "=>",
error.errcode
print "errmsg", "=>", error.errmsg
raise # reraise exception
url => http://www.python.org/foo
errcode => 200
errmsg => Not Found
Traceback (innermost last):
File "exceptions-example-1", line 16,
in ?
HTTPError: <HTTPError for http://www.python.org/foo:
200 Not Found>
|
使用 os 模块重命名和删除文件
import os import string def replace(file, search_for, replace_with): # replace strings in a text file back = os.path.splitext(file)[0] + ".bak" temp = os.path.splitext(file)[0] + ".tmp" try: # remove old temp file, if any os.remove(temp) except os.error: pass fi = open(file) fo = open(temp, "w") for s in fi.readlines(): fo.write(string.replace(s, search_for, replace_with)) fi.close() fo.close() try: # remove old backup file, if any os.remove(back) except os.error: pass # rename original to backup... os.rename(file, back) # ...and temporary to original os.rename(temp, file) # # try it out! file = "samples/sample.txt" replace(file, "hello", "tjena") replace(file, "tjena", "hello")
|
使用 os 列出目录下的文件
import os for file in os.listdir("samples"): print file sample.au sample.jpg sample.wav ...
|
getcwd 和 chdir 函数分别用于获得和改变当前工作目录
使用 os 模块改变当前工作目录
import os # where are we? cwd = os.getcwd() print "1", cwd # go down os.chdir("samples") print "2", os.getcwd() # go back up os.chdir(os.pardir) print "3", os.getcwd() 1 /ematter/librarybook 2 /ematter/librarybook/samples 3 /ematter/librarybook
|
makedirs 和 removedirs 函数用于创建或删除目录层
使用 os 模块创建/删除多个目录级
import os os.makedirs("test/multiple/levels") fp = open("test/multiple/levels/file", "w") fp.write("inspector praline") fp.close() # remove the file os.remove("test/multiple/levels/file") # and all empty directories above it os.removedirs("test/multiple/levels")
|
removedirs 函数会删除所给路径中最后一个目录下所有的空目录. 而 mkdir 和 rmdir
函数只能处理单个目录级
使用 os 模块创建/删除目录
import os
os.mkdir("test")
os.rmdir("test")
os.rmdir("samples") # this will fail
Traceback (innermost last):
File "os-example-7", line 6, in ?
OSError: [Errno 41] Directory not empty: 'samples'
|
如果需要删除非空目录, 你可以使用 shutil 模块中的 rmtree 函数
>>> import shutil shutil.rmtree("d:\\a")
|
复制文件目录(包括内部文件)
>>> shutil.copytree("d:\\new","d:\\a")
|
复制文件操作:
shutil.copyfile("d:\\new\\a.txt","d:\\a.txt") |
目录或文件的移动操作
shutil.move("d:\\new\\a.txt","d:\\")
|
使用 os 模块获取文件属性
import os import time file = "samples/sample.jpg" def dump(st): mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = st print "- size:", size, "bytes" print "- owner:", uid, gid print "- created:", time.ctime(ctime) print "- last accessed:", time.ctime(atime) print "- last modified:", time.ctime(mtime) print "- mode:", oct(mode) print "- inode/dev:", ino, dev # # get stats for a filename st = os.stat(file) print "stat", file dump(st) print # # get stats for an open file fp = open(file) st = os.fstat(fp.fileno()) print "fstat", file dump(st) stat samples/sample.jpg - size: 4762 bytes - owner: 0 0 - created: Tue Sep 07 22:45:58 1999 - last accessed: Sun Sep 19 00:00:00 1999 - last modified: Sun May 19 01:42:16 1996 - mode: 0100666 - inode/dev: 0 2 fstat samples/sample.jpg - size: 4762 bytes - owner: 0 0 - created: Tue Sep 07 22:45:58 1999 - last accessed: Sun Sep 19 00:00:00 1999 - last modified: Sun May 19 01:42:16 1996 - mode: 0100666 - inode/dev: 0 0
|
可以使用 chmod 和 utime 函数修改文件的权限模式和时间属性
使用 os 模块修改文件的权限和时间戳
import os import stat, time infile = "samples/sample.jpg" outfile = "out.jpg" # copy contents fi = open(infile, "rb") fo = open(outfile, "wb") while 1: s = fi.read(10000) if not s: break fo.write(s) fi.close() fo.close() # copy mode and timestamp st = os.stat(infile) os.chmod(outfile, stat.S_IMODE(st[stat.ST_MODE])) os.utime(outfile, (st[stat.ST_ATIME], st[stat.ST_MTIME])) print "original", "=>" print "mode", oct(stat.S_IMODE(st[stat.ST_MODE])) print "atime", time.ctime(st[stat.ST_ATIME]) print "mtime", time.ctime(st[stat.ST_MTIME]) print "copy", "=>" st = os.stat(outfile) print "mode", oct(stat.S_IMODE(st[stat.ST_MODE])) print "atime", time.ctime(st[stat.ST_ATIME]) print "mtime", time.ctime(st[stat.ST_MTIME]) original => mode 0666 atime Thu Oct 14 15:15:50 1999 mtime Mon Nov 13 15:42:36 1995 copy => mode 0666 atime Thu Oct 14 15:15:50 1999 mtime Mon Nov 13 15:42:36 1995
|
system 函数在当前进程下执行一个新命令, 并等待它完成
使用 os 执行操作系统命令
import os if os.name == "nt": command = "dir" else: command = "ls -l" os.system(command) -rwxrw-r-- 1 effbot effbot 76 Oct 9 14:17 README -rwxrw-r-- 1 effbot effbot 1727 Oct 7 19:00 SimpleAsyncHTTP.py -rwxrw-r-- 1 effbot effbot 314 Oct 7 20:29 aifc-example-1.py -rwxrw-r-- 1 effbot effbot 259 Oct 7 20:38 anydbm-example-1.py ...
|
命令通过操作系统的标准 shell 执行, 并返回 shell 的退出状态. 需要注意的是在 Windows
下, shell 通常是 command.com , 它的推出状态总是 0.
exec 函数会使用新进程替换当前进程(或者说是"转到进程").
使用 os 模块启动新进程
import os import sys program = "python" arguments = ["hello.py"] print os.execvp(program, (program,) + tuple(arguments)) print "goodbye" hello again, and welcome to the show
|
execvp 函数, 它会从标准路径搜索执行程序, 把第二个参数(元组)作为单独的参数传递给程序,
并使用当前的环境变量来运行程序. 其他七个同类型函数请参阅 Python Library Reference
.
在 Unix 环境下, 你可以通过组合使用 exec , fork 以及 wait 函数来从当前程序调用另一个程序,fork
函数复制当前进程, wait 函数会等待一个子进程执行结束.
使用 os 模块调用其他程序 (Unix)
import os
import sys
def run(program, *args):
pid = os.fork()
if not pid:
os.execvp(program, (program,) + args)
return os.wait()[0]
run("python", "hello.py")
print "goodbye"
hello again, and welcome to the show
goodbye
|
fork 函数在子进程返回中返回 0 (这个进程首先从 fork 返回值), 在父进程中返回一个非 0
的进程标识符(子进程的 PID ). 也就是说, 只有当我们处于子进程的时候 "not pid"
才为真.
fork 和 wait 函数在 Windows 上是不可用的, 但是你可以使用 spawn 函数不过,
spawn 不会沿着路径搜索可执行文件, 你必须自己处理好这些.
或许系统当前的“PATH”变量的值
>>> import string >>>import os >>> for path in string.split(os.environ["PATH"],os.pathsep): print path C:\Program Files\NVIDIA Corporation\PhysX\Common d:\program files\Python27\Lib\site-packages\PyQt4 C:\windows\system32 C:\windows C:\windows\System32\Wbem C:\windows\System32\WindowsPowerShell\v1.0\ C:\Program Files\Common Files\Thunder Network\KanKan\Codecs D:\Program Files\python D:\Program Files\Java\jdk1.6.0_23/bin D:\Program Files\Java\jdk1.6.0_23/jre/bin C:\Program Files\Microsoft SQL Server\90\Tools\binn\ D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\bin C:\Program Files\Intel\WiFi\bin\ C:\Program Files\Common Files\Intel\WirelessCommon\ C:\Program Files\Lenovo\Bluetooth Software\ D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\bin D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\lib D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\include D:\Qt\4.7.4\bin >>> |
使用 os 模块调用其他程序 (Windows)
>>> import string >>>import os >>> for path in string.split(os.environ["PATH"],os.pathsep): print path C:\Program Files\NVIDIA Corporation\PhysX\Common d:\program files\Python27\Lib\site-packages\PyQt4 C:\windows\system32 C:\windows C:\windows\System32\Wbem C:\windows\System32\WindowsPowerShell\v1.0\ C:\Program Files\Common Files\Thunder Network\KanKan\Codecs D:\Program Files\python D:\Program Files\Java\jdk1.6.0_23/bin D:\Program Files\Java\jdk1.6.0_23/jre/bin C:\Program Files\Microsoft SQL Server\90\Tools\binn\ D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\bin C:\Program Files\Intel\WiFi\bin\ C:\Program Files\Common Files\Intel\WirelessCommon\ C:\Program Files\Lenovo\Bluetooth Software\ D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\bin D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\lib D:\vs2010-qt-src-4.7.4\qt-src-4.7.4\include D:\Qt\4.7.4\bin >>> |
spawn 函数还可用于在后台运行一个程序.下面这个例子给 run 函数添加了一个可选的 mode
参数; 当设置为 os.P_NOWAIT 时, 这个脚本不会等待子程序结束, 默认值 os.P_WAIT
时 spawn 会等待子进程结束.
其它的标志常量还有 os.P_OVERLAY ,它使得 spawn 的行为和 exec 类似, 以及
os.P_DETACH , 它在后台运行子进程, 与当前控制台和键盘焦点隔离.
import os import string def run(program, *args, **kw): # find executable mode = kw.get("mode", os.P_WAIT) for path in string.split(os.environ["PATH"], os.pathsep): file = os.path.join(path, program) + ".exe" try: return os.spawnv(mode, file, (file,) + args) except os.error: pass raise os.error, "cannot find executable" run("python", "hello.py", mode=os.P_NOWAIT) print "goodbye" goodbye hello again, and welcome to the show
|
下面这个例子提供了一个在 Unix 和 Windows 平台上通用的 spawn 方法
使用 spawn 或 fork/exec 调用其他程序
import os import string if os.name in ("nt", "dos"): exefile = ".exe" else: exefile = "" def spawn(program, *args): try: # possible 2.0 shortcut! return os.spawnvp(program, (program,) + args) except AttributeError: pass try: spawnv = os.spawnv except AttributeError: # assume it's unix pid = os.fork() if not pid: os.execvp(program, (program,) + args) return os.wait()[0] else: # got spawnv but no spawnp: go look for an executable for path in string.split(os.environ["PATH"], os.pathsep): file = os.path.join(path, program) + exefile try: return spawnv(os.P_WAIT, file, (file,) + args) except os.error: pass raise IOError, "cannot find executable" # try it out! spawn("python", "hello.py") print "goodbye" hello again, and welcome to the show goodbye |
处理守护进程
Unix 系统中, 你可以使用 fork 函数把当前进程转入后台(一个"守护者/daemon").
一般来说, 你需要派生(fork off)一个当前进程的副本, 然后终止原进程
使用 os 模块使脚本作为守护执行 (Unix)
import os import time pid = os.fork() if pid: os._exit(0) # kill original print "daemon started" time.sleep(10) print "daemon terminated" |
使用 os 模块终止当前进程
import os
import sys
try:
sys.exit(1)
except SystemExit, value:
print "caught exit(%s)" % value
try:
os._exit(2)
except SystemExit, value:
print "caught exit(%s)" % value
print "bye!"
caught exit(1) |
使用 os.path 模块处理文件名
import os filename = "my/little/pony" print "using", os.name, "..." print "split", "=>", os.path.split(filename) print "splitext", "=>", os.path.splitext(filename) print "dirname", "=>", os.path.dirname(filename) print "basename", "=>", os.path.basename(filename) print "join", "=>", os.path.join(os.path.dirname(filename), os.path.basename(filename)) using nt ... split => ('my/little', 'pony') splitext => ('my/little/pony', '') dirname => my/little basename => pony join => my/little\pony
|
当前目录和上一级目录
>>> os.pardir >>> os.curdir |
使用 os.path 模块检查文件名的特征
import os FILES = ( os.curdir, "/", "file", "/file", "samples", "samples/sample.jpg", "directory/file", "../directory/file", "/directory/file" ) for file in FILES: print file, "=>", if os.path.exists(file): print "EXISTS", if os.path.isabs(file): print "ISABS", if os.path.isdir(file): print "ISDIR", if os.path.isfile(file): print "ISFILE", if os.path.islink(file): print "ISLINK", if os.path.ismount(file): print "ISMOUNT", print . => EXISTS ISDIR / => EXISTS ISABS ISDIR ISMOUNT file => /file => ISABS samples => EXISTS ISDIR samples/sample.jpg => EXISTS ISFILE directory/file => ../directory/file => /directory/file => ISABS
|
expanduser 函数以与大部分Unix shell相同的方式处理用户名快捷符号(~, 不过在
Windows 下工作不正常),
使用 os.path 模块将用户名插入到文件名
import os print os.path.expanduser("~/.pythonrc") # /home/effbot/.pythonrc
|
expandvars 函数将文件名中的环境变量替换为对应值
使用 os.path 替换文件名中的环境变量
import os os.environ["USER"] = "user" print os.path.expandvars("/home/$USER/config") print os.path.expandvars("$USER/folders") /home/user/config user/folders
|
列出目录下所有的文件和目录
>>> a=[file for file in os.listdir("d:\\new")] >>> for i in a: print i
|
walk 函数会帮你找出一个目录树下的所有文件. 它的参数依次是目录名, 回调函数, 以及传递给回调函数的数据对象.
使用 os.path 搜索文件系统
import os def callback(arg, directory, files): for file in files: print os.path.join(directory, file), repr(arg) os.path.walk(".", callback, "secret message") ./aifc-example-1.py 'secret message' ./anydbm-example-1.py 'secret message' ./array-example-1.py 'secret message' ... ./samples 'secret message' ./samples/sample.jpg 'secret message' ./samples/sample.txt 'secret message' ./samples/sample.zip 'secret message' ./samples/articles 'secret message' ./samples/articles/article-1.txt 'secret message' ./samples/articles/article-2.txt 'secret message' ...
|
index 函数会返回一个文件名列表, 你可以直接使用for-in 循环处理文件.
使用 os.listdir 搜索文件系统
import os def index(directory): # like os.listdir, but traverses directory trees stack = [directory] files = [] while stack: directory = stack.pop() for file in os.listdir(directory): fullname = os.path.join(directory, file) files.append(fullname) if os.path.isdir(fullname) and not os.path.islink(fullname): stack.append(fullname) return files for file in index("."): print file .\aifc-example-1.py .\anydbm-example-1.py .\array-example-1.py ...
|
一次返回一个文件
import os class DirectoryWalker: # a forward iterator that traverses a directory tree def _ _init_ _(self, directory): self.stack = [directory] self.files = [] self.index = 0 def _ _getitem_ _(self, index): while 1: try: file = self.files[self.index] self.index = self.index + 1 except IndexError: # pop next directory from stack self.directory = self.stack.pop() self.files = os.listdir(self.directory) self.index = 0 else: # got a filename fullname = os.path.join(self.directory, file) if os.path.isdir(fullname) and not os.path.islink(fullname): self.stack.append(fullname) return fullname for file in DirectoryWalker("."): print file .\aifc-example-1.py .\anydbm-example-1.py .\array-example-1.py ...
|
注意 DirectoryWalker 类并不检查传递给 _ _getitem_ _ 方法的索引值.
这意味着如果你越界访问序列成员(索引数字过大)的话, 这个类将不能正常工作.
下面这个例子它返回文件名和它的 os.stat 属性(一个元组). 这个版本在每个文件上都能节省一次或两次stat
调用( os.path.isdir 和 os.path.islink 内部都使用了 stat ), 并且在一些平台上运行很快.
使用 DirectoryStatWalker 搜索文件系统
import os, stat
class DirectoryStatWalker:
# a forward iterator that traverses a directory
tree, and
# returns the filename and additional file information
def _ _init_ _(self, directory):
self.stack = [directory]
self.files = []
self.index = 0
def _ _getitem_ _(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
self.files = os.listdir(self.directory)
self.index = 0
else:
# got a filename
fullname = os.path.join(self.directory, file)
st = os.stat(fullname)
mode = st[stat.ST_MODE]
if stat.S_ISDIR(mode) and not stat.S_ISLNK(mode):
self.stack.append(fullname)
return fullname, st
for file, st in DirectoryStatWalker("."):
print file, st[stat.ST_SIZE]
.\aifc-example-1.py 336
.\anydbm-example-1.py 244
.\array-example-1.py 526
|
Using the stat Module
import stat import os, time st = os.stat("samples/sample.txt") print "mode", "=>", oct(stat.S_IMODE(st[stat.ST_MODE])) print "type", "=>", if stat.S_ISDIR(st[stat.ST_MODE]): print "DIRECTORY", if stat.S_ISREG(st[stat.ST_MODE]): print "REGULAR", if stat.S_ISLNK(st[stat.ST_MODE]): print "LINK", print print "size", "=>", st[stat.ST_SIZE] print "last accessed", "=>", time.ctime(st[stat.ST_ATIME]) print "last modified", "=>", time.ctime(st[stat.ST_MTIME]) print "inode changed", "=>", time.ctime(st[stat.ST_CTIME]) mode => 0664 type => REGULAR size => 305 last accessed => Sun Oct 10 22:12:30 1999 last modified => Sun Oct 10 18:39:37 1999 inode changed => Sun Oct 10 15:26:38 1999
|
使用 string 模块
import string text = "Monty Python's Flying Circus" print "upper", "=>", string.upper(text) print "lower", "=>", string.lower(text) print "split", "=>", string.split(text) print "join", "=>", string.join(string.split(text), "+") print "replace", "=>", string.replace(text, "Python", "Java") print "find", "=>", string.find(text, "Python"), string.find(text, "Java") print "count", "=>", string.count(text, "n") upper => MONTY PYTHON'S FLYING CIRCUS lower => monty python's flying circus split => ['Monty', "Python's", 'Flying', 'Circus'] join => Monty+Python's+Flying+Circus replace => Monty Java's Flying Circus find => 6 -1 count => 3
|
使用字符串方法替代 string 模块函数
text = "Monty Python's Flying Circus" print "upper", "=>", text.upper() print "lower", "=>", text.lower() print "split", "=>", text.split() print "join", "=>", "+".join(text.split()) print "replace", "=>", text.replace("Python", "Perl") print "find", "=>", text.find("Python"), text.find("Perl") print "count", "=>", text.count("n") upper => MONTY PYTHON'S FLYING CIRCUS lower => monty python's flying circus split => ['Monty', "Python's", 'Flying', 'Circus'] join => Monty+Python's+Flying+Circus replace => Monty Perl's Flying Circus find => 6 -1 count => 3
|
使用 string 模块将字符串转为数字
import string print int("4711"), print string.atoi("4711"), print string.atoi("11147", 8), # octal 八进制 print string.atoi("1267", 16), # hexadecimal 十六进制 print string.atoi("3mv", 36) # whatever... print string.atoi("4711", 0), print string.atoi("04711", 0), print string.atoi("0x4711", 0) print float("4711"), print string.atof("1"), print string.atof("1.23e5") 4711 4711 4711 4711 4711 4711 2505 18193 4711.0 1.0 123000.0
|
operator 模块为 Python 提供了一个 "功能性" 的标准操作符接口.
当使用 map 以及 filter 一类的函数的时候, operator 模块中的函数可以替换一些lambda
函式. 而且这些函数在一些喜欢写晦涩代码的程序员中很流行.
使用 operator 模块
print "add", "=>", reduce(operator.add, sequence) print "sub", "=>", reduce(operator.sub, sequence) print "mul", "=>", reduce(operator.mul, sequence) print "concat", "=>", operator.concat("spam", "egg") print "repeat", "=>", operator.repeat("spam", 5) print "getitem", "=>", operator.getitem(sequence, 2) print "indexOf", "=>", operator.indexOf(sequence, 2) print "sequenceIncludes", "=>", operator.sequenceIncludes(sequence, 3) add => 7 sub => -5 mul => 8 concat => spamegg repeat => spamspamspamspamspam getitem => 4 indexOf => 1 sequenceIncludes => 0
|
使用 operator 模块检查类型
import operator import UserList def dump(data): print type(data), "=>", if operator.isCallable(data): print "CALLABLE", if operator.isMappingType(data): print "MAPPING", if operator.isNumberType(data): print "NUMBER", if operator.isSequenceType(data): print "SEQUENCE", print dump(0) dump("string") dump("string"[0]) dump([1, 2, 3]) dump((1, 2, 3)) dump({"a": 1}) dump(len) # function 函数 dump(UserList) # module 模块 dump(UserList.UserList) # class 类 dump(UserList.UserList()) # instance 实例 <type 'int'> => NUMBER <type 'string'> => SEQUENCE <type 'string'> => SEQUENCE <type 'list'> => SEQUENCE <type 'tuple'> => SEQUENCE <type 'dictionary'> => MAPPING <type 'builtin_function_or_method'> => CALLABLE <type 'module'> => <type 'class'> => CALLABLE <type 'instance'> => MAPPING NUMBER SEQUENCE
|
copy 模块包含两个函数, 用来拷贝对象
使用 copy 模块复制对象
import copy a = [[1],[2],[3]] b = copy.copy(a) print "before", "=>" print a print b # modify original a[0][0] = 0 a[1] = None print "after", "=>" print a print b before => [[1], [2], [3]] [[1], [2], [3]] after => [[0], None, [3]] [[0], [2], [3]]
|
使用 copy 模块复制集合(Collections)
import copy a = [[1],[2],[3]] b = copy.deepcopy(a) print "before", "=>" print a print b # modify original a[0][0] = 0 a[1] = None print "after", "=>" print a print b before => [[1], [2], [3]] [[1], [2], [3]] after => [[0], None, [3]] [[1], [2], [3]]
|
使用sys模块获得脚本的参数
import sys print "script name is", sys.argv[0] if len(sys.argv) > 1: print "there are", len(sys.argv)-1, "arguments:" for arg in sys.argv[1:]: print arg else: print "there are no arguments!" script name is sys-argv-example-1.py there are no arguments!
|
使用sys模块操作模块搜索路径
import sys
print "path has", len(sys.path), "members"
# add the sample directory to the path
sys.path.insert(0, "samples")
import sample
# nuke the path
sys.path = []
import random # oops!
path has 7 members
this is the sample module!
Traceback (innermost last):
File "sys-path-example-1.py", line 11,
in ?
import random # oops!
ImportError: No module named random
|
使用sys模块查找内建模块
import sys
def dump(module):
print module, "=>",
if module in sys.builtin_module_names:
print "<BUILTIN>"
else:
module = _ _import_ _(module)
print module._ _file_ _
dump("os")
dump("sys")
dump("string")
dump("strop")
dump("zlib")
os => C:\python\lib\os.pyc
sys => <BUILTIN>
string => C:\python\lib\string.pyc
strop => <BUILTIN>
zlib => C:\python\zlib.pyd
|
使用sys模块查找已导入的模块
modules 字典包含所有加载的模块. import 语句在从磁盘导入内容之前会先检查这个字典.
import sys print sys.modules.keys() ['os.path', 'os', 'exceptions', '_ _main_ _', 'ntpath', 'strop', 'nt', 'sys', '_ _builtin_ _', 'site', 'signal', 'UserDict', 'string', 'stat']
|
getrefcount 函数 返回给定对象的引用记数 - 也就是这个对象使用次数. Python
会跟踪这个值, 当它减少为0的时候, 就销毁这个对象.
使用sys模块获得引用记数
import sys variable = 1234 print sys.getrefcount(0) print sys.getrefcount(variable) print sys.getrefcount(None) 50 3 192 |
注意这个值总是比实际的数量大, 因为该函数本身在确定这个值的时候依赖这个对象
使用sys模块获得当前平台
import sys # # emulate "import os.path" (sort of)... if sys.platform == "win32": import ntpath pathmodule = ntpath elif sys.platform == "mac": import macpath pathmodule = macpath else: # assume it's a posix platform import posixpath pathmodule = posixpath print pathmodule
|
setprofiler 函数允许你配置一个分析函数(profiling function). 这个函数会在每次调用某个函数或方法时被调用(明确或隐含的),
或是遇到异常的时候被调用.
使用sys模块配置分析函数
import sys def test(n): j = 0 for i in range(n): j = j + i return n def profiler(frame, event, arg): print event, frame.f_code.co_name, frame.f_lineno, "->", arg # profiler is activated on the next call, return, or exception # 分析函数将在下次函数调用, 返回, 或异常时激活 sys.setprofile(profiler) # profile this function call # 分析这次函数调用 test(1) # disable profiler # 禁用分析函数 sys.setprofile(None) # don't profile this call # 不会分析这次函数调用 test(2) call test 3 -> None return test 7 -> 1
|
使用sys模块配置单步跟踪函数
import sys
def test(n):
j = 0
for i in range(n):
j = j + i
return n
def tracer(frame, event, arg):
print event, frame.f_code.co_name, frame.f_lineno,
"->", arg
return tracer
# tracer is activated on the next call, return,
or exception
# 跟踪器将在下次函数调用, 返回, 或异常时激活
sys.settrace(tracer)
# trace this function call
# 跟踪这次函数调用
test(1)
# disable tracing
# 禁用跟踪器
sys.settrace(None)
# don't trace this call
# 不会跟踪这次函数调用
test(2)
call test 3 -> None
line test 3 -> None
line test 4 -> None
line test 5 -> None
line test 5 -> None
line test 6 -> None
line test 5 -> None
line test 7 -> None
return test 7 -> 1
|
使用sys重定向输出
import sys
import string
class Redirect:
def _ _init_ _(self, stdout):
self.stdout = stdout
def write(self, s):
self.stdout.write(string.lower(s))
# redirect standard output (including the print
statement)
# 重定向标准输出(包括print语句)
old_stdout = sys.stdout
sys.stdout = Redirect(sys.stdout)
print "HEJA SVERIGE",
print "FRISKT HUM\303\226R"
# restore standard output
# 恢复标准输出
sys.stdout = old_stdout
print "M\303\205\303\205\303\205\303\205L!"
heja sverige friskt hum\303\266r
M\303\205\303\205\303\205\303\205L!
|
使用sys模块退出程序
import sys print "hello" sys.exit(1) print "there" hello
|
注意 sys.exit 并不是立即退出. 而是引发一个 SystemExit 异常. 这意味着你可以在主程序中捕获对
sys.exit 的调用
捕获sys.exit调用
import sys print "hello" try: sys.exit(1) except SystemExit: pass print "there" hello there
|
如果准备在退出前自己清理一些东西(比如删除临时文件), 你可以配置一个 "退出处理函数"(exit
handler), 它将在程序退出的时候自动被调用
另一种捕获sys.exit调用的方法
import sys def exitfunc(): print "world" sys.exitfunc = exitfunc print "hello" sys.exit(1) print "there" # never printed # 不会被 print hello world
|
在 Python 2.0 以后, 你可以使用 atexit 模块来注册多个退出处理函数.
atexit 模块允许你注册一个或多个终止函数(暂且这么叫), 这些函数将在解释器终止前被自动调用.
调用 register 函数, 便可以将函数注册为终止函数,你也可以添加更多的参数, 这些将作为 exit
函数的参数传递.
使用 atexit 模块
import atexit def exit(*args): print "exit", args # register two exit handler atexit.register(exit) atexit.register(exit, 1) atexit.register(exit, "hello", "world") exit ('hello', 'world') exit (1,) exit ()
|
time 模块提供了一些处理日期和一天内时间的函数. 它是建立在 C 运行时库的简单封装.
给定的日期和时间可以被表示为浮点型(从参考时间, 通常是 1970.1.1 到现在经过的秒数. 即 Unix
格式), 或者一个表示时间的 struct (类元组).
使用 time 模块获取当前时间
import time now = time.time() print now, "seconds since", time.gmtime(0)[:6] print print "or in other words:" print "- local time:", time.localtime(now) print "- utc:", time.gmtime(now) 937758359.77 seconds since (1970, 1, 1, 0, 0, 0) or in other words: - local time: (1999, 9, 19, 18, 25, 59, 6, 262, 1) - utc: (1999, 9, 19, 16, 25, 59, 6, 262, 0)
|
使用 time 模块格式化时间输出
import time now = time.localtime(time.time()) print time.asctime(now) print time.strftime("%y/%m/%d %H:%M", now) print time.strftime("%a %b %d", now) print time.strftime("%c", now) print time.strftime("%I %p", now) print time.strftime("%Y-%m-%d %H:%M:%S %Z", now) # do it by hand... year, month, day, hour, minute, second, weekday, yearday, daylight = now print "%04d-%02d-%02d" % (year, month, day) print "%02d:%02d:%02d" % (hour, minute, second) print ("MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN")[weekday], yearday Sun Oct 10 21:39:24 1999 99/10/10 21:39 Sun Oct 10 Sun Oct 10 21:39:24 1999 09 PM 1999-10-10 21:39:24 CEST 1999-10-10 21:39:24 SUN 283
|
在一些平台上, time 模块包含了 strptime 函数, 它的作用与 strftime 相反.
给定一个字符串和模式, 它返回相应的时间对象
使用 time.strptime 函数解析时间
import time # make sure we have a strptime function! # 确认有函数 strptime try: strptime = time.strptime except AttributeError: from strptime import strptime print strptime("31 Nov 00", "%d %b %y") print strptime("1 Jan 70 1:30pm", "%d %b %y %I:%M%p")
|
strptime 不完全实现
import re import string MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] SPEC = { # map formatting code to a regular expression fragment "%a": "(?P<weekday>[a-z]+)", "%A": "(?P<weekday>[a-z]+)", "%b": "(?P<month>[a-z]+)", "%B": "(?P<month>[a-z]+)", "%C": "(?P<century>\d\d?)", "%d": "(?P<day>\d\d?)", "%D": "(?P<month>\d\d?)/(?P<day>\d\d?)/(?P<year>\d\d)", "%e": "(?P<day>\d\d?)", "%h": "(?P<month>[a-z]+)", "%H": "(?P<hour>\d\d?)", "%I": "(?P<hour12>\d\d?)", "%j": "(?P<yearday>\d\d?\d?)", "%m": "(?P<month>\d\d?)", "%M": "(?P<minute>\d\d?)", "%p": "(?P<ampm12>am|pm)", "%R": "(?P<hour>\d\d?):(?P<minute>\d\d?)", "%S": "(?P<second>\d\d?)", "%T": "(?P<hour>\d\d?):(?P<minute>\d\d?):(?P<second>\d\d?)", "%U": "(?P<week>\d\d)", "%w": "(?P<weekday>\d)", "%W": "(?P<weekday>\d\d)", "%y": "(?P<year>\d\d)", "%Y": "(?P<year>\d\d\d\d)", "%%": "%" } class TimeParser: def _ _init_ _(self, format): # convert strptime format string to regular expression format = string.join(re.split("(?:\s|%t|%n)+", format)) pattern = [] try: for spec in re.findall("%\w|%%|.", format): if spec[0] == "%": spec = SPEC[spec] pattern.append(spec) except KeyError: raise ValueError, "unknown specificer: %s" % spec self.pattern = re.compile("(?i)" + string.join(pattern, "")) def match(self, daytime): # match time string match = self.pattern.match(daytime) if not match: raise ValueError, "format mismatch" get = match.groupdict().get tm = [0] * 9 # extract date elements y = get("year") if y: y = int(y) if y < 68: y = 2000 + y elif y < 100: y = 1900 + y tm[0] = y m = get("month") if m: if m in MONTHS: m = MONTHS.index(m) + 1 tm[1] = int(m) d = get("day") if d: tm[2] = int(d) # extract time elements h = get("hour") if h: tm[3] = int(h) else: h = get("hour12") if h: h = int(h) if string.lower(get("ampm12", "")) == "pm": h = h + 12 tm[3] = h m = get("minute") if m: tm[4] = int(m) s = get("second") if s: tm[5] = int(s) # ignore weekday/yearday for now return tuple(tm) def strptime(string, format="%a %b %d %H:%M:%S %Y"): return TimeParser(format).match(string) if _ _name_ _ == "_ _main_ _": # try it out import time print strptime("2000-12-20 01:02:03", "%Y-%m-%d %H:%M:%S") print strptime(time.ctime(time.time())) (2000, 12, 20, 1, 2, 3, 0, 0, 0) (2000, 11, 15, 12, 30, 45, 0, 0, 0)
|
使用 time 模块将本地时间元组转换为时间值(整数)
import time t0 = time.time() tm = time.localtime(t0) print tm print t0 print time.mktime(tm) (1999, 9, 9, 0, 11, 8, 3, 252, 1) 936828668.16 936828668.0
|
将 UTC 时间元组转换为时间值(整数)
import time def _d(y, m, d, days=(0,31,59,90,120,151,181,212,243,273,304,334,365)): # map a date to the number of days from a reference point return (((y - 1901)*1461)/4 + days[m-1] + d + ((m > 2 and not y % 4 and (y % 100 or not y % 400)) and 1)) def timegm(tm, epoch=_d(1970,1,1)): year, month, day, h, m, s = tm[:6] assert year >= 1970 assert 1 <= month <= 12 return (_d(year, month, day) - epoch)*86400 + h*3600 + m*60 + s t0 = time.time() tm = time.gmtime(t0) print tm print t0 print timegm(tm) (1999, 9, 8, 22, 12, 12, 2, 251, 0) 936828732.48 936828732
|
使用 time 模块评价算法
import time def procedure(): time.sleep(2.5) # measure process time t0 = time.clock() procedure() print time.clock() - t0, "seconds process time" # measure wall time t0 = time.time() procedure() print time.time() - t0, "seconds wall time" 0.0 seconds process time 2.50903499126 seconds wall time
|
使用 types 模块
import types def check(object): print object, if type(object) is types.IntType: print "INTEGER", if type(object) is types.FloatType: print "FLOAT", if type(object) is types.StringType: print "STRING", if type(object) is types.ClassType: print "CLASS", if type(object) is types.InstanceType: print "INSTANCE", print check(0) check(0.0) check("0") class A: pass class B: pass check(A) check(B) a = A() b = B() check(a) check(b) 0 INTEGER 0.0 FLOAT 0 STRING A CLASS B CLASS <A instance at 796960> INSTANCE <B instance at 796990> INSTANCE
|
使用 gc 模块收集循环引用垃圾
import gc # create a simple object that links to itself class Node: def _ _init_ _(self, name): self.name = name self.parent = None self.children = [] def addchild(self, node): node.parent = self self.children.append(node) def _ _repr_ _(self): return "<Node %s at %x>" % (repr(self.name), id(self)) # set up a self-referencing structure root = Node("monty") root.addchild(Node("eric")) root.addchild(Node("john")) root.addchild(Node("michael")) # remove our only reference del root print gc.collect(), "unreachable objects" print gc.collect(), "unreachable objects" 12 unreachable objects 0 unreachable objects
|
fileinput 模块允许你循环一个或多个文本文件的内容
使用 fileinput 模块循环一个文本文件
import fileinput import sys for line in fileinput.input("samples/sample.txt"): sys.stdout.write("-> ") sys.stdout.write(line) -> We will perhaps eventually be writing only small -> modules which are identified by name as they are -> used to build larger ones, so that devices like -> indentation, rather than delimiters, might become -> feasible for expressing local structure in the -> source language. -> -- Donald E. Knuth, December 1974
|
你也可以使用 fileinput 模块获得当前行的元信息 (meta information). 其中包括
isfirstline , filename , lineno
使用 fileinput 模块处理多个文本文件
import fileinput import glob import string, sys for line in fileinput.input(glob.glob("samples/*.txt")): if fileinput.isfirstline(): # first in a file? sys.stderr.write("-- reading %s --\n" % fileinput.filename()) sys.stdout.write(str(fileinput.lineno()) + " " + string.upper(line)) -- reading samples\sample.txt -- 1 WE WILL PERHAPS EVENTUALLY BE WRITING ONLY SMALL 2 MODULES WHICH ARE IDENTIFIED BY NAME AS THEY ARE 3 USED TO BUILD LARGER ONES, SO THAT DEVICES LIKE 4 INDENTATION, RATHER THAN DELIMITERS, MIGHT BECOME 5 FEASIBLE FOR EXPRESSING LOCAL STRUCTURE IN THE 6 SOURCE LANGUAGE. 7 -- DONALD E. KNUTH, DECEMBER 1974
|
文本文件的替换操作很简单. 只需要把 inplace 关键字参数设置为 1 , 传递给 input 函数,
该模块会帮你做好一切.
使用 fileinput 模块将 CRLF 改为 LF
import fileinput, sys for line in fileinput.input(inplace=1): # convert Windows/DOS text files to Unix files if line[-2:] == "\r\n": line = line[:-2] + "\n" sys.stdout.write(line)
|
shutil 实用模块包含了一些用于复制文件和文件夹的函数.
使用 shutil 复制文件
import shutil import os for file in os.listdir("."): if os.path.splitext(file)[1] == ".py": print file shutil.copy(file, os.path.join("backup", file)) aifc-example-1.py anydbm-example-1.py array-example-1.py ...
|
copytree 函数用于复制整个目录树 (与 cp -r 相同), 而 rmtree 函数用于删除整个目录树
(与 rm -r )
使用 shutil 模块复制/删除目录树
import shutil import os SOURCE = "samples" BACKUP = "samples-bak" # create a backup directory shutil.copytree(SOURCE, BACKUP) print os.listdir(BACKUP) # remove it shutil.rmtree(BACKUP) print os.listdir(BACKUP) ['sample.wav', 'sample.jpg', 'sample.au', 'sample.msg', 'sample.tgz', ... Traceback (most recent call last): File "shutil-example-2.py", line 17, in ? print os.listdir(BACKUP) os.error: No such file or directory
|
tempfile 模块允许你快速地创建名称唯一的临时文件供使用.
使用 tempfile 模块创建临时文件
import tempfile import os tempfile = tempfile.mktemp() print "tempfile", "=>", tempfile file = open(tempfile, "w+b") file.write("*" * 1000) file.seek(0) print len(file.read()), "bytes" file.close() try: # must remove file when done os.remove(tempfile) except OSError: pass tempfile => C:\TEMP\~160-1 1000 bytes
|
TemporaryFile 函数会自动挑选合适的文件名, 并打开文件而且它会确保该文件在关闭的时候会被删除.
(在 Unix 下, 你可以删除一个已打开的文件, 这 时文件关闭时它会被自动删除. 在其他平台上, 这通过一个特殊的封装类实现.)
使用 tempfile 模块打开临时文件
import tempfile file = tempfile.TemporaryFile() for i in range(100): file.write("*" * 100) file.close() # removes the file!
|
StringIO 模块的使用. 它实现了一个工作在内存的文件对象 (内存文件). 在大多需要标准文件对象的地方都可以使用它来替换.
使用 StringIO 模块从内存文件读入内容
import StringIO MESSAGE = "That man is depriving a village somewhere of a computer scientist." file = StringIO.StringIO(MESSAGE) print file.read() That man is depriving a village somewhere of a computer scientist.
|
StringIO 类实现了内建文件对象的所有方法, 此外还有 getvalue 方法用来返回它内部的字符串值
使用 StringIO 模块向内存文件写入内容
import StringIO file = StringIO.StringIO() file.write("This man is no ordinary man. ") file.write("This is Mr. F. G. Superman.") print file.getvalue() This man is no ordinary man. This is Mr. F. G. Superman.
|
使用 StringIO 模块捕获输出
import StringIO import string, sys stdout = sys.stdout sys.stdout = file = StringIO.StringIO() print """ According to Gbaya folktales, trickery and guile are the best ways to defeat the python, king of snakes, which was hatched from a dragon at the world's start. -- National Geographic, May 1997 """ sys.stdout = stdout print string.upper(file.getvalue()) ACCORDING TO GBAYA FOLKTALES, TRICKERY AND GUILE ARE THE BEST WAYS TO DEFEAT THE PYTHON, KING OF SNAKES, WHICH WAS HATCHED FROM A DRAGON AT THE WORLD'S START. -- NATIONAL GEOGRAPHIC, MAY 1997
|
cStringIO 是一个可选的模块, 是 StringIO 的更快速实现. 它的工作方式和 StringIO
基本相同, 但是它不可以被继承
使用 cStringIO 模块
import cStringIO MESSAGE = "That man is depriving a village somewhere of a computer scientist." file = cStringIO.StringIO(MESSAGE) print file.read() That man is depriving a village somewhere of a computer scientist.
|
为了让你的代码尽可能快, 但同时保证兼容低版本的 Python ,你可以使用一个小技巧在
cStringIO 不可用时启用 StringIO 模块,
后退至 StringIO
try: import cStringIO StringIO = cStringIO except ImportError: import StringIO print StringIO <module 'StringIO' (built-in)>
|
mmap 模块提供了操作系统内存映射函数的接口, 映射区域的行为和字符串对象类似,
但数据是直接从文件读取的.
使用 mmap 模块
import mmap import os filename = "samples/sample.txt" file = open(filename, "r+") size = os.path.getsize(filename) data = mmap.mmap(file.fileno(), size) # basics print data print len(data), size # use slicing to read from the file # 使用切片操作读取文件 print repr(data[:10]), repr(data[:10]) # or use the standard file interface # 或使用标准的文件接口 print repr(data.read(10)), repr(data.read(10)) <mmap object at 008A2A10> 302 302 'We will pe' 'We will pe' 'We will pe' 'rhaps even'
|
在 Windows 下, 这个文件必须以既可读又可写的模式打开( `r+`
, `w+` , 或 `a+` ), 否则 mmap 调用会失败.
对映射区域使用字符串方法和正则表达式
mport mmap import os, string, re def mapfile(filename): file = open(filename, "r+") size = os.path.getsize(filename) return mmap.mmap(file.fileno(), size) data = mapfile("samples/sample.txt") # search index = data.find("small") print index, repr(data[index-5:index+15]) # regular expressions work too! m = re.search("small", data) print m.start(), m.group() 43 'only small\015\012modules ' 43 small
|
|