zim-wiki team mailing list archive

Thread
Date

Re: import softnotes data to zim

To: Pascal <pascal.legrand@xxxxxxxxxxxxxxx>
From: Jaap Karssenberg <jaap.karssenberg@xxxxxxxxx>
Date: Wed, 19 Jan 2011 21:00:05 +0100
Cc: Zim <zim-wiki@xxxxxxxxxxxxxxxxxxx>
In-reply-to: <20110119132503.19691.69395.launchpad@potassium.ubuntu.com>

On Wed, Jan 19, 2011 at 2:25 PM, Pascal <pascal.legrand@xxxxxxxxxxxxxxx> wrote:
> Hello,
> i'm using for a long time an application named softnotes, i would like
> very much use zim now, but i dont know how to do that.
> I'm not a programmer and dont see how to import data from softnotes to
> zim.
> The best would be a script wich make conversion but i dont know how to
> do that.
> I made a screen capture of softnotes and the xml file associated
> http://plegrand1.free.fr/softnote.png
> http://plegrand1.free.fr/softnote.xml
>
> I think the first step is to convert each rtf note in a zim format, and
> after to import all the notes in zim with the same arborescence.
>
> Does somebody could help me to give some indication to realise this
> conversion script ?

Attached a quick hack that somewhat works. It uses pyth as the rtf
parser, see http://pypi.python.org/pypi/pyth/ (download, untar+unzip
and put it in the same folder as the script).

Be aware conversion will not be one on one though. Biggest issue I see
is that it drops strike through text, looks like it is not supported
by the rtf parser.

Hope this helps,

Jaap

Attachment: Screenshot.png
Description: PNG image

#!/usr/bin/python

# -*- coding: utf-8 -*-

# Copyright 2011 Jaap Karssenberg <pardus@xxxxxxxx>

# Simple script to convert softnote XML to a zim notebook folder
# Writen as a quick hack, so quality of results may vary

# This script needs pyth, see http://pypi.python.org/pypi/pyth/

# TODO:
# * Looks like we loose strike formatting - blame pyht, other parser available for rtf ?
# * Nested formatting not supported by zim, but we output it anyway


import os
import sys
sys.path.append('./pyth-0.5.6/')

from xml.etree import ElementTree
from StringIO import StringIO

from pyth.plugins.rtf15.reader import Rtf15Reader
from pyth.plugins.xhtml.writer import XHTMLWriter

from zim.fs import Dir, TmpFile
from zim.notebook import Notebook, Path

import zim.stores.xml
import zim.stores.files


def parse(data):
	'''Converts softnote xml to xml representing a zim notebook'''
	tree = ElementTree.fromstring(data)
	notebook = ElementTree.Element('section')

	categories = {} # top level folders by name
	pages = {} # pages by id

	for xrecord in tree.findall('XRECORDDATA'):
		#~ print 'FOUND:', map(xrecord.findtext, ('XCATALOG', 'XSUBJECT', 'XID', 'XPARENT'))

		parentid = xrecord.findtext('XPARENT')
		if parentid == '0':
			# we found a top node within category
			category = xrecord.findtext('XCATALOG')
			if not category in categories:
				# first time we see this category
				el = ElementTree.Element('page', {'name': category})
				el.tail = '\n'
				notebook.append(el)
				categories[category] = el
			parent = categories[category]
		else:
			# some sub-note
			assert parentid in pages, 'Found sub-note before parent :('
			parent = pages[parentid]

		title = xrecord.findtext('XSUBJECT')
		name = title.replace(':', ' ') # will confuse hierarchy
		name = Notebook.cleanup_pathname(title, purge=True) # make a valid name
		el = ElementTree.Element('page', {'name': name})
		el.tail = '\n'
		parent.append(el)

		id = xrecord.findtext('XID')
		pages[id] = el

		el.text = convert_rtf(xrecord.findtext('XBODY'))

	return ElementTree.tostring(notebook)

def convert_rtf(rtf):
	'''Converts rtf to zim wiki text'''
	doc = Rtf15Reader.read(StringIO(rtf))
	html = XHTMLWriter.write(doc, pretty=True).read()
	return convert_html(html)

def convert_html(html):
	'''Converts html to zim wiki text'''
	#~ print "GOT HTML:\n", html
	tree = ElementTree.fromstring(html)
	text = _serialize_html(tree)
	#~ print "MADE TEXT:\n", text
	return text

def _serialize_html(tree):
	text = tree.text or ''
	for el in tree:
		if el.tag == 'strong':
			text += "**" + _serialize_html(el) + "**"
		elif el.tag == 'em':
			text += "//" + _serialize_html(el) + "//"
		elif el.tag == 'u':
			text += "__" + _serialize_html(el) + "__"
		elif el.tag == 'strike':
			text += "~~" + _serialize_html(el) + "~~"
		else:
			text += _serialize_html(el)
		text += el.tail or ''
	return text


def dump(xml, folder):
	'''Takes zim notebook in XML format and dump to file structure'''
	sourcefile = TmpFile('softnote2zim-tmp')
	sourcefile.write(xml)
	source = zim.stores.xml.Store(FakeNotebook(), Path(':'), file=sourcefile)

	target = zim.stores.files.Store(FakeNotebook(), Path(':'), dir=Dir(folder))

	for s_page in source.walk():
		text = source.get_node(s_page).text
		#~ print 'PAGE:', s_page.name
		#~ print text

		t_page = target.get_page(s_page)
		assert not t_page.source.exists(), 'Don\'t want to overwrite %s' % t_page.source.path
		print 'Writing:', t_page.source.path
		t_page.source.write(text)


class FakeNotebook(object):

	if os.name == 'nt': # Windows
		endofline = 'dos'
	else:
		endofline = 'unix'



if __name__ == '__main__':
	if len(sys.argv) == 3:
		input = sys.argv[1]
		xml = parse(open(input).read())
		#~ print xml
		dump(xml, sys.argv[2])
	else:
		print 'Usage: softnote2zim.py SOFTNOTE_XML OUTPUT_FOLDER'
		print 'output folder should be a new empty folder'

References

import softnotes data to zim
From: Pascal, 2011-01-19