读完《Learning Ext JS》后写了个脚本,用来将我用Markdown格式记录的笔记按章拆分成多个文件,以便发布日志和日后阅读,同时可以自动生成索引文件并转换各章节成HTML代码,以及插入插图等功能。
PYTHON:
-
#!/usr/bin/python
-
# -*- coding: utf-8 -*-
-
import sys
-
import getopt
-
import os
-
import re
-
import types
-
import markdown2
-
-
_version = '1.1'
-
_date = '2009-06-03 15:35:44 Wednesday'
-
-
def SplitFileContent(filePath):
-
''' Split the given source file in markdown syntax into several parts, then return a tuple when succeed.
-
The first element of the tuple is a boolean value, which identified that the operation is failed or successful,
-
and the second one is a list which contains several dictionaries, each dictionary contains one key-value pair,
-
the key is the title of that single section, and the value is the content.
-
-
'---' is considered to be the seperator of sections.
-
The first line which is not empty of each section is considered to be the title of that sec.
-
'''
-
contents = []
-
-
if os.path.exists(filePath) and os.path.isfile(filePath) and filePath.endswith('.mkd'):
-
fp = open(filePath, 'rb')
-
try:
-
lines = fp.readlines()
-
-
item = {}
-
for line in lines:
-
line = line.replace('\r', '')
-
if re.match('^---', line):
-
# According to the referencing characteristic of python, the clear() method will swap out the former contents
-
#item.clear()
-
# So the the right practice is assigning a new dictionary to the reference
-
item = {}
-
elif re.match('^\s*$', line):
-
if item == {}:
-
continue
-
else:
-
item[item.keys()[0]] += line
-
else:
-
if item == {}:
-
item[EscapeFileName(line)] = line
-
contents.append(item)
-
else:
-
item[item.keys()[0]] += line
-
except IOError:
-
return False,'Failed to read the file: '+filePath+' !'
-
finally:
-
fp.close()
-
else:
-
return False,'Invalid file: '+filePath+' !'
-
-
return True,contents
-
-
def MkDir(dirPath, makeSubDir=False, fileContents=None, force=False):
-
''' Make a directory according to the given path and some other parameters,
-
then return the final path when succeed
-
'''
-
if dirPath == None or re.match('^\s*$', dirPath):
-
dirPath = ''
-
-
if makeSubDir and type(fileContents) == types.ListType and len(fileContents)> 0 and type(fileContents[0]) == types.DictionaryType:
-
dirPath = os.path.join(dirPath, fileContents[0].keys()[0])
-
-
if os.path.exists(dirPath):
-
if force:
-
try:
-
RemoveDir(dirPath)
-
except OSError,err:
-
raise OSError,err
-
else:
-
raise OSError,'Directory '+dirPath+' already exists, remove it first or use the -f option to force to do that .'
-
-
if len(dirPath)> 0:
-
try:
-
os.makedirs(dirPath)
-
except OSError,err:
-
raise OSError,err
-
-
return dirPath
-
-
def RemoveDir(dirPath):
-
''' Remove the specified directory and all sub entries under it recursively
-
'''
-
entries = os.listdir(dirPath)
-
for entry in entries:
-
if os.path.isdir(os.path.join(dirPath, entry)):
-
RemoveDir(os.path.join(dirPath, entry))
-
else:
-
try:
-
os.remove(os.path.join(dirPath, entry))
-
except OSError,err:
-
raise OSError,err
-
try:
-
os.rmdir(dirPath)
-
except OSError,err:
-
raise OSError,err
-
-
def ExportToFiles(list, ext='mkd', dirPath='', createIndex=True, coverHtml=''):
-
''' Export every content stored in the given list into files, and store them in the given directory.
-
If parameter ext is 'mkd', all of the files is in markdown syntax, else if is 'html', in HTML syntax, if 'all' given, files in both syntax will be created.
-
By default, the parameter createIndex is True, which indicates that an index of all the files should be created at last.
-
'''
-
# Content of the index
-
index = ''
-
-
if type(list) != types.ListType:
-
return False,'List expected, '+str(type(list))+' given !'
-
-
for item in list:
-
# Generate the content of the index
-
if index == '':
-
index += item.values()[0]
-
if len(coverHtml)> 0:
-
index += coverHtml+'\n\n'
-
else:
-
index += '- ['+item.keys()[0].replace('_', ' ')+']('+item.keys()[0]+'.html)\n'
-
-
# the file name and the file content
-
fileName = item.keys()[0]+'.'+ext
-
fileContent = item.values()[0]
-
# if cover given, insert its html string into the content
-
if len(coverHtml)> 0:
-
tmpLines = fileContent.splitlines(True)
-
if len(tmpLines)> 0:
-
tmpLines.insert(1, '\n')
-
tmpLines.insert(2, coverHtml)
-
tmpLines.insert(3, '\n')
-
fileContent = ''.join(tmpLines)
-
-
# create the file
-
try:
-
fp = open(os.path.join(dirPath, fileName), 'w')
-
fp.write(markdown2.markdown(fileContent) if ext == 'html' else fileContent)
-
except IOError,err:
-
return False,'Failed to create file: '+fileName+' !\r\n'+str(err)
-
finally:
-
fp.close()
-
-
# if index needed, create it
-
if createIndex:
-
try:
-
fp = open(os.path.join(dirPath, 'index.'+ext), 'w')
-
fp.write(markdown2.markdown(index) if ext == 'html' else index)
-
except IOError,err:
-
return False,'Failed to create index: index.'+ext+' !\r\n'+str(err)
-
finally:
-
fp.close()
-
-
return True,''
-
-
def EscapeFileName(name):
-
''' Replace special characters which can not present in file or directory names
-
'''
-
name = name.replace('#', '')
-
name = name.replace(':', '')
-
name = name.replace('?', '')
-
name = name.replace('\'', '')
-
name = name.replace('\r', '')
-
name = name.replace('\n', '')
-
name = name.replace(' ', '_')
-
return name
-
-
def GetCoverHTML():
-
''' Generate a HTML string which displays the cover image of this book, according to the user's input
-
'''
-
coverHtml = '<div style="float:right;margin-left:10px;margin-bottom:10px;">'
-
rawStr = raw_input('Input a link to the image or the name of the image file or a HTML string:\r\n')
-
-
if rawStr.find('<')> -1:
-
coverHtml += rawStr
-
elif re.match('^http://', rawStr):
-
coverHtml += '<img src="'+rawStr+'" alt="Cover Image: '+rawStr+'">'
-
else:
-
coverHtml += '<img src="cover.jpg" alt="cover.jpg">'
-
-
coverHtml += '</div>'
-
-
return coverHtml
-
-
def GetUsage():
-
''' Return the usage information of the application
-
'''
-
usage = '''
-
notesplitter.py
-
-
Split text files in markdown syntax and formatted according to my habit into seperate ones, meanwhile, offer the ability to convert markdown syntax into HTML.
-
-
Options
-
-
-h/--help
-
Print this message
-
-
-m <mode>/--mode=<mode>
-
Valid modes are 'all','html', and 'mkd', default is 'mkd'
-
-
-d <dir_path>/--directory=<dir_path>
-
In which directory should the files be placed in, if not specified, the current directory is used
-
-
-s/--sub
-
If specified, the title of the source file will be taken as the name of a sub directory under the dir_path, all files will be placed in the sub dir
-
-
-f/--force
-
When the files's directory already exists, this option will force to remove it recursively
-
-
-n/--noindex
-
By default, an index will be created after the source file is splitted, if this option is given, no index will be created
-
-
-c/--cover
-
If specified, the application will prompt to get the URL or HTML string displaying an image from the user; By default, the application supposes that an image named 'cover.jpg' exists in the same directory with all of the files
-
-
Samples:
-
-
python notesplitter.py -d notes -s -f --mode=all --noindex -c my_note.mkd
-
-
Author: Lenin Lee
-
Version: %s
-
Date: %s
-
'''
-
-
return usage % (_version, _date)
-
-
if __name__ == '__main__':
-
# Default mode is mkd
-
mode = 'mkd'
-
# By default, directory won't be removed when exists
-
force = False
-
# By default, put files in the current directory
-
dirPath = None
-
# By default, put files in the specified directory directly
-
makeSubDir = False
-
# By default, an index will be created after the source file is splitted
-
createIndex = True
-
# A HTML string displaying the cover image of this book
-
coverHtml = ''
-
-
# Parse options and arguments
-
try:
-
opts,args = getopt.getopt(sys.argv[1:], 'hfsncm:d:', ['help','force','sub', 'noindex', 'cover', 'mode=','directory='])
-
# Parse options
-
if opts.count(('-h',''))> 0 or opts.count(('--help',''))> 0:
-
print GetUsage()
-
sys.exit(0)
-
for o,a in opts:
-
if o == '-m' or o == '--mode':
-
if a == 'mkd' or a == 'html' or a == 'all':
-
mode = a
-
else:
-
raise getopt.GetoptError,'Option '+o+' only accepts "mkd","html" and "all" as its value !'
-
elif o == '-f' or o == '--force':
-
force = True
-
elif o == '-d' or o == '--directory':
-
dirPath = a
-
elif o == '-s' or o == '--sub':
-
makeSubDir = True
-
elif o == '-n' or o == '--noindex':
-
createIndex = False
-
elif o == '-c' or o == '--cover':
-
coverHtml = GetCoverHTML()
-
# Parse arguments
-
if len(args) == 0:
-
raise getopt.GetoptError,'No source files given !'
-
except getopt.GetoptError,err:
-
print err
-
print GetUsage()
-
sys.exit(1)
-
-
# Offer the ability to manipulate multiple files
-
for filePath in args:
-
# Split the file into several parts, each stored in a dictionary
-
rawContents = SplitFileContent(filePath)
-
# If the former operation succeeds:
-
if rawContents[0]:
-
# Prepare the depositary directory
-
try:
-
dirPath = MkDir(dirPath, makeSubDir, rawContents[1], force)
-
except OSError,err:
-
print err
-
sys.exit(1)
-
-
# If the mode is 'all', split the source file into seperate ones and convert them into HTML files
-
if mode == 'all':
-
status = ExportToFiles(rawContents[1], 'mkd', dirPath, createIndex, coverHtml)
-
if not status[0]:
-
print status[1]
-
sys.exit(1)
-
-
status = ExportToFiles(rawContents[1], 'html', dirPath, createIndex, coverHtml)
-
if not status[0]:
-
print status[1]
-
sys.exit(1)
-
else:
-
status = ExportToFiles(rawContents[1], mode, dirPath, createIndex, coverHtml)
-
if not status[0]:
-
print status[1]
-
sys.exit(1)
-
-
print 'Operation completed !'
-
# If the split operation fails, print the error message and exit
-
else:
-
print rawContents[1]
-
sys.exit(1)
由于是针对我习惯记录笔记的格式,所以此脚本不具有通用性。通过写这个脚本,主要使我对python程序处理命令行参数的方式有了了解,getopt模块的确很好用。
程序与笔记示例可以在这里下载:
使用如下命令查看帮助信息:
python notesplitter.py -h