zim-wiki team mailing list archive
-
zim-wiki team
-
Mailing list archive
-
Message #01338
NoteCase converter
-
To:
zim-wiki@xxxxxxxxxxxxxxxxxxx
-
From:
Jigho <jigho@xxxxxxx>
-
Date:
Sun, 28 Aug 2011 18:59:34 +0200
-
User-agent:
Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.20) Gecko/20110805 Lightning/1.0b2 Thunderbird/3.1.12
Hi all,
I wrote a converter for NoteCase files (http://www.virtual-sky.com/).
Not all features are supported but that seems to work with basic
features, which are those I use.
The Python script is my first one and mainly for my own needs (yes, I'm
joining the Zim fans !) but other may think it's usefull.
Script is enclosed in this e-mail, along with a sample NoteCase file
(.ncd).
I could post it to the Wiki
(http://zim-wiki.org/wiki/doku.php?id=tips_and_tricks) if needed.
Jigho
Attachment:
notecase.ncd
Description: application/notecase-plain
#!/usr/bin/python
# Simple script to convert NoteCase Document to a Zim notebook folder
#
# NoteCase reference:
# http://notecase.sourceforge.net/ (Free version, discontinued)
# http://www.virtual-sky.com/ (Pro version)
#
# Based on BeautifulSoup (you need to install it before running notecase2zim):
# http://www.crummy.com/software/BeautifulSoup/
#
# Adapted to my use of NoteCase and Zim => other may want to adapt it
# For instance:
# Color "red" in NoteCase => I use "italic" in Zim
# Background Color "grey" in NoteCase => Title 3 in Zim
#
# Usage :
# -------
# 1. Save NoteCase document to .ncd format (plain text, no compression)
# 2. This script assumes the name is "notecase.ncd". This can be changed below
# 3. Run: python notecase2zim.py
# 4. Get a Folder named "notecase.zim" with the main file "notebook.zim" inside
#
# v1.1
# Jigho 2011
# Contact: https://launchpad.net/~jigho
#
import os
import shutil
import sys
import re
import datetime
sys.path.append('./BeautifulSoup')
from BeautifulSoup import BeautifulSoup
notecasefile = 'notecase.ncd'
def create_file_zim():
# You may change the name and endofline mode here
fileZim = open('notebook.zim', 'w')
fileZim.write('[Notebook]\nname=Notes\nversion=0.4\nendofline=dos')
fileZim.close()
def process_title(titre, date):
# Some titles are plain, but some have information that we do not use in Zim
if (titre.span):
m = titre.span.contents
titre2 = str(m[1])
elif (titre.string):
titre2 = titre.string
else:
m = titre.contents
titre2 = str(m[1])
# Delete white space, / and " in the filename
output1 = str(titre2 + '.txt').replace(' ', '_')
output2 = output1.replace('/', '')
output3 = output2.replace('\"', '')
output = unicode(output3, 'utf-8', errors='ignore')
# Some verbose, usefull on large contents
# to be aware that the program is still processing...
print 'Creating file: ', output
fileOut = open(output, 'w')
# Standard information at the start of any Zim file
fileOut.write('Content-Type: text/x-zim-wiki\n')
fileOut.write('Wiki-Format: zim 0.4\n')
fileOut.write('Creation-Date: ' + str(date) + '\n')
fileOut.write('\n====== ' + titre2 + ' ======\n')
fileOut.write('\n')
return fileOut
def create_subdir(repertoire):
rep = repertoire.name.replace('.txt', '')
os.mkdir(rep)
os.chdir(rep)
def process_format(c, fichier, formatString):
# for basic formatting tags (underline, bold, italic,...)
# do the core job
newLine = False
# Open Wiki format
fichier.write(formatString)
# Another trick in case of formatted content ends with a newline
# I then prefer to close the formatting tag and then write the
# new line without formatting
if (len(c.contents) > 1):
if (c.contents[-2].__class__.__name__ == 'Tag'):
if (c.contents[-2].name == 'br'):
c.contents[-2].extract()
c.contents[-1].extract()
newLine = True
# Process content (recursively !)
process_content(c, fichier, formatString)
# Close Wiki format
fichier.write(formatString)
# End of the trick for content finishing with a newline
if newLine:
fichier.write('\n')
def process_content(contenu, fichier, currentFormat):
# "currentFormat" is a trick to close the Wiki format at end of each line
# even if the format is applied to multi-lines
# Nota: this trick would need to be be enhanced
# when multiple formats are nested
for c in contenu:
if (c.__class__.__name__ == 'Tag'):
# <dl> tag stands for new note, ie new Zim file
if c.name == 'dl':
create_subdir(fichier)
process_page(c)
os.chdir('..')
# <br> tag stands for new line
# use the "currentFormat" trick to properly close format tag
# and then reopen it on the the new line
elif c.name == 'br':
fichier.write(currentFormat)
fichier.write('\n')
fichier.write(currentFormat)
# <u> tag stands for underline
elif c.name == 'u':
process_format(c, fichier, '__')
# <b> tag stands for bold
elif c.name == 'b':
process_format(c, fichier, '**')
# <i> tag stands fr italic
elif c.name == 'i':
process_format(c, fichier, '//')
# <s> tag stands for strike-through
elif c.name == 's':
process_format(c, fichier, '~~')
# <span> tag can have different purposes according to arguments
elif c.name == 'span':
# Color "red" in NoteCase => I use "italic" in Zim
if (c['style'] == "color:#ff0000"):
process_format(c, fichier, '//')
# Color "blue" in NoteCase => I use "bold" in Zim
elif (c['style'] == "color:#0000ff"):
process_format(c, fichier, '**')
# Color "green" in NoteCase => I use "bold" in Zim
elif (c['style'] == "color:#00ff00"):
process_format(c, fichier, '**')
# Background Color "grey" in NoteCase => Title 3 in Zim
elif (c['style'] == "background-color:#bfbfbf"):
fichier.write('===== ')
# Don't not use the "currentFormat" trick,
# since title format is not symetrical
#(which add difficulty)
# and Zim seems to autoclose this format at the end of line
process_content(c, fichier, currentFormat)
currentFormat = ''
# Other <span> contents are treated as plain text
# You may add more cases according to your needs
else:
print "WARNING : unknown SPAN type", c.attrs
process_content(c, fichier, currentFormat)
# <p> tag is not taken into account
elif c.name == 'p':
process_content(c, fichier, currentFormat)
# <a> tag stands for links
elif c.name == 'a':
fichier.write('[[')
fichier.write(c['href'].encode('utf-8'))
fichier.write('|')
process_content(c, fichier, currentFormat)
fichier.write(']]')
# In case program encounter a Tag which is not dealt with
# according to your needs, you can then add specific bloc
else:
print 'WARNING, unknown tag: ', c.name
fichier.write(
'TAG ' + c.name + ' / ' + c.string.encode("UTF-8"))
else:
ligne = c.string.encode("UTF-8")
# Delete the new line symbol at start of the line
# This happens when there was a <br> just before
# but <br> is already taken into account
fichier.write(re.sub("^\n", '', ligne))
def process_page(page):
creation = datetime.date.today()
for a in page.contents:
if (a.__class__.__name__ == 'Tag'):
if a.name == 'dt':
fileOut = process_title(a, creation)
elif a.name == 'dd':
process_content(a.contents, fileOut, '')
elif (a.__class__.__name__ == 'Comment'):
m = re.match("<!--property:date_created=(.*)-->$", str(a))
if (m):
creation = datetime.date.fromtimestamp(float(m.group(1)))
def main(repertoire):
xml = open(notecasefile, 'r').read()
soup = BeautifulSoup(xml, convertEntities=BeautifulSoup.XML_ENTITIES)
level0 = soup.html.body.dl
os.chdir(repertoire)
create_file_zim()
process_page(level0)
if __name__ == '__main__':
zimdir = re.sub(".ncd$", ".zim", notecasefile)
#shutil.rmtree(zimdir)
os.mkdir(zimdir)
main(zimdir)
Follow ups