zim-wiki team mailing list archive
-
zim-wiki team
-
Mailing list archive
-
Message #01639
Re: howto strip metadata header
This will process a directory tree and remove the first 4 lines of
each file. I did a quick test on a copy of my tree, but please make
sure to back up your data first, and if it blows up, it's your fault.
:)
#! /usr/bin/env python
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# The name of the author may not be used to endorse or promote
# products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
# IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import logging as log
import argparse
import os
import sys
def process_directory(dir):
oldcwd = os.getcwd()
for root, dirs, files in os.walk(dir, onerror=log.error):
log.info("Processing directory %s" % root)
if not files:
log.info("No files in directory; skipping")
continue
os.chdir(root)
# Iterate over existing files
for file in sorted(files):
# Skip non-files
if not os.path.isfile(file):
log.debug("Skipping non-file: %s" % file)
continue
if not '.txt' in file:
continue
log.debug("Processing: %s" % file)
if options.write:
f = open(file, 'r+')
lines = f.readlines()
f.close()
f = open(file, 'w')
i = 0
for line in lines:
if i > 3:
f.write(line)
i += 1
f.close()
else:
f = open(file, 'r')
i = 0
for line in f.readlines():
if i > 3:
print line
i += 1
f.close()
log.debug("Finished processing directory %s" % root)
os.chdir(oldcwd)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Unzim")
parser.add_argument('directory', nargs='+')
parser.add_argument("-w", "--write", dest="write", action="store_true", help="Actually overwrite the files (otherwise print to stdout)")
parser.add_argument("-v", "--verbose", action="count", dest="verbose", help="Print more output (up to -vv)")
options = parser.parse_args()
if options.verbose == 1:
LOG_LEVEL = log.INFO
elif options.verbose >=2:
LOG_LEVEL = log.DEBUG
else:
LOG_LEVEL = log.WARNING
log.basicConfig(level=LOG_LEVEL, format="%(levelname)s: %(message)s")
log.debug("Options: %s" % options)
if not options.directory:
log.critical("You need to specify the directories to process, silly. :)")
parser.print_usage()
sys.exit(2)
# Verify arguments are directories
quit = False
for dir in options.directory:
if not os.path.isdir(dir):
log.critical("%s is not a directory" % dir)
quit = True
if quit:
sys.exit(2)
# Iterate over directories
consistent = True
for dir in options.directory:
if not process_directory(dir): # Returns false when inconsistencies are found
consistent = False
Follow ups
References