zim-wiki team mailing list archive
-
zim-wiki team
-
Mailing list archive
-
Message #01062
Re: import softnotes data to zim
On Wed, Jan 19, 2011 at 2:25 PM, Pascal <pascal.legrand@xxxxxxxxxxxxxxx> wrote:
> Hello,
> i'm using for a long time an application named softnotes, i would like
> very much use zim now, but i dont know how to do that.
> I'm not a programmer and dont see how to import data from softnotes to
> zim.
> The best would be a script wich make conversion but i dont know how to
> do that.
> I made a screen capture of softnotes and the xml file associated
> http://plegrand1.free.fr/softnote.png
> http://plegrand1.free.fr/softnote.xml
>
> I think the first step is to convert each rtf note in a zim format, and
> after to import all the notes in zim with the same arborescence.
>
> Does somebody could help me to give some indication to realise this
> conversion script ?
Attached a quick hack that somewhat works. It uses pyth as the rtf
parser, see http://pypi.python.org/pypi/pyth/ (download, untar+unzip
and put it in the same folder as the script).
Be aware conversion will not be one on one though. Biggest issue I see
is that it drops strike through text, looks like it is not supported
by the rtf parser.
Hope this helps,
Jaap
Attachment:
Screenshot.png
Description: PNG image
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2011 Jaap Karssenberg <pardus@xxxxxxxx>
# Simple script to convert softnote XML to a zim notebook folder
# Writen as a quick hack, so quality of results may vary
# This script needs pyth, see http://pypi.python.org/pypi/pyth/
# TODO:
# * Looks like we loose strike formatting - blame pyht, other parser available for rtf ?
# * Nested formatting not supported by zim, but we output it anyway
import os
import sys
sys.path.append('./pyth-0.5.6/')
from xml.etree import ElementTree
from StringIO import StringIO
from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.xhtml.writer import XHTMLWriter
from zim.fs import Dir, TmpFile
from zim.notebook import Notebook, Path
import zim.stores.xml
import zim.stores.files
def parse(data):
'''Converts softnote xml to xml representing a zim notebook'''
tree = ElementTree.fromstring(data)
notebook = ElementTree.Element('section')
categories = {} # top level folders by name
pages = {} # pages by id
for xrecord in tree.findall('XRECORDDATA'):
#~ print 'FOUND:', map(xrecord.findtext, ('XCATALOG', 'XSUBJECT', 'XID', 'XPARENT'))
parentid = xrecord.findtext('XPARENT')
if parentid == '0':
# we found a top node within category
category = xrecord.findtext('XCATALOG')
if not category in categories:
# first time we see this category
el = ElementTree.Element('page', {'name': category})
el.tail = '\n'
notebook.append(el)
categories[category] = el
parent = categories[category]
else:
# some sub-note
assert parentid in pages, 'Found sub-note before parent :('
parent = pages[parentid]
title = xrecord.findtext('XSUBJECT')
name = title.replace(':', ' ') # will confuse hierarchy
name = Notebook.cleanup_pathname(title, purge=True) # make a valid name
el = ElementTree.Element('page', {'name': name})
el.tail = '\n'
parent.append(el)
id = xrecord.findtext('XID')
pages[id] = el
el.text = convert_rtf(xrecord.findtext('XBODY'))
return ElementTree.tostring(notebook)
def convert_rtf(rtf):
'''Converts rtf to zim wiki text'''
doc = Rtf15Reader.read(StringIO(rtf))
html = XHTMLWriter.write(doc, pretty=True).read()
return convert_html(html)
def convert_html(html):
'''Converts html to zim wiki text'''
#~ print "GOT HTML:\n", html
tree = ElementTree.fromstring(html)
text = _serialize_html(tree)
#~ print "MADE TEXT:\n", text
return text
def _serialize_html(tree):
text = tree.text or ''
for el in tree:
if el.tag == 'strong':
text += "**" + _serialize_html(el) + "**"
elif el.tag == 'em':
text += "//" + _serialize_html(el) + "//"
elif el.tag == 'u':
text += "__" + _serialize_html(el) + "__"
elif el.tag == 'strike':
text += "~~" + _serialize_html(el) + "~~"
else:
text += _serialize_html(el)
text += el.tail or ''
return text
def dump(xml, folder):
'''Takes zim notebook in XML format and dump to file structure'''
sourcefile = TmpFile('softnote2zim-tmp')
sourcefile.write(xml)
source = zim.stores.xml.Store(FakeNotebook(), Path(':'), file=sourcefile)
target = zim.stores.files.Store(FakeNotebook(), Path(':'), dir=Dir(folder))
for s_page in source.walk():
text = source.get_node(s_page).text
#~ print 'PAGE:', s_page.name
#~ print text
t_page = target.get_page(s_page)
assert not t_page.source.exists(), 'Don\'t want to overwrite %s' % t_page.source.path
print 'Writing:', t_page.source.path
t_page.source.write(text)
class FakeNotebook(object):
if os.name == 'nt': # Windows
endofline = 'dos'
else:
endofline = 'unix'
if __name__ == '__main__':
if len(sys.argv) == 3:
input = sys.argv[1]
xml = parse(open(input).read())
#~ print xml
dump(xml, sys.argv[2])
else:
print 'Usage: softnote2zim.py SOFTNOTE_XML OUTPUT_FOLDER'
print 'output folder should be a new empty folder'
References