2009年06月09日 Tuesday , 546 次点击

读完《Learning Ext JS》后写了个脚本,用来将我用Markdown格式记录的笔记按章拆分成多个文件,以便发布日志和日后阅读,同时可以自动生成索引文件并转换各章节成HTML代码,以及插入插图等功能。

PYTHON:
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys
  4. import getopt
  5. import os
  6. import re
  7. import types
  8. import markdown2
  9.  
  10. _version = '1.1'
  11. _date = '2009-06-03 15:35:44 Wednesday'
  12.  
  13. def SplitFileContent(filePath):
  14.     ''' Split the given source file in markdown syntax into several parts, then return a tuple when succeed.
  15.    The first element of the tuple is a boolean value, which identified that the operation is failed or successful,
  16.    and the second one is a list which contains several dictionaries, each dictionary contains one key-value pair,
  17.    the key is the title of that single section, and the value is the content.
  18.  
  19.    '---' is considered to be the seperator of sections.
  20.    The first line which is not empty of each section is considered to be the title of that sec.
  21.    '''
  22.     contents = []
  23.  
  24.     if os.path.exists(filePath) and os.path.isfile(filePath) and filePath.endswith('.mkd'):
  25.         fp = open(filePath, 'rb')
  26.         try:
  27.             lines = fp.readlines()
  28.  
  29.             item = {}
  30.             for line in lines:
  31.                 line = line.replace('\r', '')
  32.                 if re.match('^---', line):
  33.                     # According to the referencing characteristic of python, the clear() method will swap out the former contents
  34.                     #item.clear()
  35.                     # So the the right practice is assigning a new dictionary to the reference
  36.                     item = {}
  37.                 elif re.match('^\s*$', line):
  38.                     if item == {}:
  39.                         continue
  40.                     else:
  41.                         item[item.keys()[0]] += line
  42.                 else:
  43.                     if item == {}:
  44.                         item[EscapeFileName(line)] = line
  45.                         contents.append(item)
  46.                     else:
  47.                         item[item.keys()[0]] += line
  48.         except IOError:
  49.             return False,'Failed to read the file: '+filePath+' !'
  50.         finally:
  51.             fp.close()
  52.     else:
  53.         return False,'Invalid file: '+filePath+' !'
  54.  
  55.     return True,contents
  56.  
  57. def MkDir(dirPath, makeSubDir=False, fileContents=None, force=False):
  58.     ''' Make a directory according to the given path and some other parameters,
  59.    then return the final path when succeed
  60.    '''
  61.     if dirPath == None or re.match('^\s*$', dirPath):
  62.         dirPath = ''
  63.  
  64.     if makeSubDir and type(fileContents) == types.ListType and len(fileContents)> 0 and type(fileContents[0]) == types.DictionaryType:
  65.         dirPath = os.path.join(dirPath, fileContents[0].keys()[0])
  66.  
  67.     if os.path.exists(dirPath):
  68.         if force:
  69.             try:
  70.                 RemoveDir(dirPath)
  71.             except OSError,err:
  72.                 raise OSError,err
  73.         else:
  74.             raise OSError,'Directory '+dirPath+' already exists, remove it first or use the -f option to force to do that .'
  75.  
  76.     if len(dirPath)> 0:
  77.         try:
  78.             os.makedirs(dirPath)
  79.         except OSError,err:
  80.             raise OSError,err
  81.  
  82.     return dirPath
  83.  
  84. def RemoveDir(dirPath):
  85.     ''' Remove the specified directory and all sub entries under it recursively
  86.    '''
  87.     entries = os.listdir(dirPath)
  88.     for entry in entries:
  89.         if os.path.isdir(os.path.join(dirPath, entry)):
  90.             RemoveDir(os.path.join(dirPath, entry))
  91.         else:
  92.             try:
  93.                 os.remove(os.path.join(dirPath, entry))
  94.             except OSError,err:
  95.                 raise OSError,err
  96.     try:
  97.         os.rmdir(dirPath)
  98.     except OSError,err:
  99.         raise OSError,err
  100.  
  101. def ExportToFiles(list, ext='mkd', dirPath='', createIndex=True, coverHtml=''):
  102.     ''' Export every content stored in the given list into files, and store them in the given directory.
  103.    If parameter ext is 'mkd', all of the files is in markdown syntax, else if is 'html', in HTML syntax, if 'all' given, files in both syntax will be created.
  104.    By default, the parameter createIndex is True, which indicates that an index of all the files should be created at last.
  105.    '''
  106.     # Content of the index
  107.     index = ''
  108.  
  109.     if type(list) != types.ListType:
  110.         return False,'List expected, '+str(type(list))+' given !'
  111.  
  112.     for item in list:
  113.         # Generate the content of the index
  114.         if index == '':
  115.             index += item.values()[0]
  116.             if len(coverHtml)> 0:
  117.                 index += coverHtml+'\n\n'
  118.         else:
  119.             index += '- ['+item.keys()[0].replace('_', ' ')+']('+item.keys()[0]+'.html)\n'
  120.  
  121.         # the file name and the file content
  122.         fileName = item.keys()[0]+'.'+ext
  123.         fileContent = item.values()[0]
  124.         # if cover given, insert its html string into the content
  125.         if len(coverHtml)> 0:
  126.             tmpLines = fileContent.splitlines(True)
  127.             if len(tmpLines)> 0:
  128.                 tmpLines.insert(1, '\n')
  129.                 tmpLines.insert(2, coverHtml)
  130.                 tmpLines.insert(3, '\n')
  131.             fileContent = ''.join(tmpLines)
  132.  
  133.         # create the file
  134.         try:
  135.             fp = open(os.path.join(dirPath, fileName), 'w')
  136.             fp.write(markdown2.markdown(fileContent) if ext == 'html' else fileContent)
  137.         except IOError,err:
  138.             return False,'Failed to create file: '+fileName+' !\r\n'+str(err)
  139.         finally:
  140.             fp.close()
  141.  
  142.     # if index needed, create it
  143.     if createIndex:
  144.         try:
  145.             fp = open(os.path.join(dirPath, 'index.'+ext), 'w')
  146.             fp.write(markdown2.markdown(index) if ext == 'html' else index)
  147.         except IOError,err:
  148.             return False,'Failed to create index: index.'+ext+' !\r\n'+str(err)
  149.         finally:
  150.             fp.close()
  151.  
  152.     return True,''
  153.  
  154. def EscapeFileName(name):
  155.     ''' Replace special characters which can not present in file or directory names
  156.    '''
  157.     name = name.replace('#', '')
  158.     name = name.replace(':', '')
  159.     name = name.replace('?', '')
  160.     name = name.replace('\'', '')
  161.     name = name.replace('\r', '')
  162.     name = name.replace('\n', '')
  163.     name = name.replace(' ', '_')
  164.     return name
  165.  
  166. def GetCoverHTML():
  167.     ''' Generate a HTML string which displays the cover image of this book, according to the user's input
  168.     '''
  169.    coverHtml = '<div style="float:right;margin-left:10px;margin-bottom:10px;">'
  170.    rawStr = raw_input('Input a link to the image or the name of the image file or a HTML string:\r\n')
  171.  
  172.    if rawStr.find('<')> -1:
  173.        coverHtml += rawStr
  174.    elif re.match('^http://', rawStr):
  175.        coverHtml += '<img src="'+rawStr+'" alt="Cover Image: '+rawStr+'">'
  176.    else:
  177.        coverHtml += '<img src="cover.jpg" alt="cover.jpg">'
  178.  
  179.    coverHtml += '</div>'
  180.  
  181.    return coverHtml
  182.  
  183. def GetUsage():
  184.    ''' Return the usage information of the application
  185.     '''
  186.    usage = '''
  187. notesplitter.py
  188.  
  189. Split text files in markdown syntax and formatted according to my habit into seperate ones, meanwhile, offer the ability to convert markdown syntax into HTML.
  190.  
  191. Options
  192.  
  193. -h/--help
  194.     Print this message
  195.  
  196. -m <mode>/--mode=<mode>
  197.     Valid modes are 'all','html', and 'mkd', default is 'mkd'
  198.  
  199. -d <dir_path>/--directory=<dir_path>
  200.     In which directory should the files be placed in, if not specified, the current directory is used
  201.  
  202. -s/--sub
  203.     If specified, the title of the source file will be taken as the name of a sub directory under the dir_path, all files will be placed in the sub dir
  204.  
  205. -f/--force
  206.     When the files's directory already exists, this option will force to remove it recursively
  207.  
  208. -n/--noindex
  209.    By default, an index will be created after the source file is splitted, if this option is given, no index will be created
  210.  
  211. -c/--cover
  212.    If specified, the application will prompt to get the URL or HTML string displaying an image from the user; By default, the application supposes that an image named 'cover.jpg' exists in the same directory with all of the files
  213.  
  214. Samples:
  215.  
  216. python notesplitter.py -d notes -s -f --mode=all --noindex -c my_note.mkd
  217.  
  218. Author:     Lenin Lee
  219. Version:    %s
  220. Date:       %s
  221.    '''
  222.  
  223.     return usage % (_version, _date)
  224.  
  225. if __name__ == '__main__':
  226.     # Default mode is mkd
  227.     mode = 'mkd'
  228.     # By default, directory won't be removed when exists
  229.     force = False
  230.     # By default, put files in the current directory
  231.     dirPath = None
  232.     # By default, put files in the specified directory directly
  233.     makeSubDir = False
  234.     # By default, an index will be created after the source file is splitted
  235.     createIndex = True
  236.     # A HTML string displaying the cover image of this book
  237.     coverHtml = ''
  238.  
  239.     # Parse options and arguments
  240.     try:
  241.         opts,args = getopt.getopt(sys.argv[1:], 'hfsncm:d:', ['help','force','sub', 'noindex', 'cover', 'mode=','directory='])
  242.         # Parse options
  243.         if opts.count(('-h',''))> 0 or opts.count(('--help',''))> 0:
  244.             print GetUsage()
  245.             sys.exit(0)
  246.         for o,a in opts:
  247.             if o == '-m' or o == '--mode':
  248.                 if a == 'mkd' or a == 'html' or a == 'all':
  249.                     mode = a
  250.                 else:
  251.                     raise getopt.GetoptError,'Option '+o+' only accepts "mkd","html" and "all" as its value !'
  252.             elif o == '-f' or o == '--force':
  253.                 force = True
  254.             elif o == '-d' or o == '--directory':
  255.                 dirPath = a
  256.             elif o == '-s' or o == '--sub':
  257.                 makeSubDir = True
  258.             elif o == '-n' or o == '--noindex':
  259.                 createIndex = False
  260.             elif o == '-c' or o == '--cover':
  261.                 coverHtml = GetCoverHTML()
  262.         # Parse arguments
  263.         if len(args) == 0:
  264.             raise getopt.GetoptError,'No source files given !'
  265.     except getopt.GetoptError,err:
  266.         print err
  267.         print GetUsage()
  268.         sys.exit(1)
  269.  
  270.     # Offer the ability to manipulate multiple files
  271.     for filePath in args:
  272.         # Split the file into several parts, each stored in a dictionary
  273.         rawContents = SplitFileContent(filePath)
  274.         # If the former operation succeeds:
  275.         if rawContents[0]:
  276.             # Prepare the depositary directory
  277.             try:
  278.                 dirPath = MkDir(dirPath, makeSubDir, rawContents[1], force)
  279.             except OSError,err:
  280.                 print err
  281.                 sys.exit(1)
  282.  
  283.             # If the mode is 'all', split the source file into seperate ones and convert them into HTML files
  284.             if mode == 'all':
  285.                 status = ExportToFiles(rawContents[1], 'mkd', dirPath, createIndex, coverHtml)
  286.                 if not status[0]:
  287.                     print status[1]
  288.                     sys.exit(1)
  289.  
  290.                 status = ExportToFiles(rawContents[1], 'html', dirPath, createIndex, coverHtml)
  291.                 if not status[0]:
  292.                     print status[1]
  293.                     sys.exit(1)
  294.             else:
  295.                 status = ExportToFiles(rawContents[1], mode, dirPath, createIndex, coverHtml)
  296.                 if not status[0]:
  297.                     print status[1]
  298.                     sys.exit(1)
  299.  
  300.             print 'Operation completed !'
  301.         # If the split operation fails, print the error message and exit
  302.         else:
  303.             print rawContents[1]
  304.             sys.exit(1)

由于是针对我习惯记录笔记的格式,所以此脚本不具有通用性。通过写这个脚本,主要使我对python程序处理命令行参数的方式有了了解,getopt模块的确很好用。

程序与笔记示例可以在这里下载:

使用如下命令查看帮助信息:

python notesplitter.py -h

Tags :

随机日志

來留言吧!


Please copy the string dpLKQ4 to the field below:

*
To prove you're a person (not a spam script), type the security word shown in the picture. Click on the picture to hear an audio file of the word.
Click to hear an audio file of the anti-spam word

尚未有留言

尚未有留言

留言板RSS 引用 URI

來留言吧!

«
»