← Back to team overview

zim-wiki team mailing list archive

Re: howto strip metadata header

 

This will process a directory tree and remove the first 4 lines of
each file.  I did a quick test on a copy of my tree, but please make
sure to back up your data first, and if it blows up, it's your fault.
:)
#! /usr/bin/env python

# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
# 
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 
# The name of the author may not be used to endorse or promote
# products derived from this software without specific prior written
# permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
# IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import logging as log
import argparse
import os
import sys

def process_directory(dir):
    
    oldcwd = os.getcwd()

    for root, dirs, files in os.walk(dir, onerror=log.error):
        
        log.info("Processing directory %s" % root)
        
        if not files:
            log.info("No files in directory; skipping")
            continue

        os.chdir(root)


        # Iterate over existing files
        for file in sorted(files):
            
            # Skip non-files
            if not os.path.isfile(file):
                log.debug("Skipping non-file: %s" % file)
                continue
	    
	    if not '.txt' in file:
		continue
            
            log.debug("Processing: %s" % file)
            
            if options.write:
		f = open(file, 'r+')

		lines = f.readlines()
		
		f.close()
		
		f = open(file, 'w')
		
		i = 0
		for line in lines:
		    if i > 3:
			f.write(line)

		    i += 1 
		    
		f.close()
		
	    else:
		f = open(file, 'r')
		
		i = 0
		for line in f.readlines():
		    if i > 3:
			print line

		    i += 1
		    
		f.close()
		

        log.debug("Finished processing directory %s" % root)
        
    os.chdir(oldcwd)


if __name__ == "__main__":

    parser = argparse.ArgumentParser(description="Unzim")

    parser.add_argument('directory', nargs='+')
    parser.add_argument("-w", "--write", dest="write", action="store_true", help="Actually overwrite the files (otherwise print to stdout)")
    parser.add_argument("-v", "--verbose", action="count", dest="verbose", help="Print more output (up to -vv)")

    options = parser.parse_args()

    if options.verbose == 1:
        LOG_LEVEL = log.INFO
    elif options.verbose >=2:
        LOG_LEVEL = log.DEBUG
    else:
        LOG_LEVEL = log.WARNING

    log.basicConfig(level=LOG_LEVEL, format="%(levelname)s: %(message)s")
    
    log.debug("Options: %s" % options)

    if not options.directory:
        log.critical("You need to specify the directories to process, silly.  :)")
        parser.print_usage()
        sys.exit(2)

    # Verify arguments are directories
    quit = False
    for dir in options.directory:
        if not os.path.isdir(dir):
            log.critical("%s is not a directory" % dir)
            quit = True
    if quit:
        sys.exit(2)
            
    # Iterate over directories
    consistent = True
    for dir in options.directory:
        if not process_directory(dir):  # Returns false when inconsistencies are found
            consistent = False

Follow ups

References