← Back to team overview

linuxdcpp-team team mailing list archive

[Branch ~dcplusplus-team/dcplusplus/trunk] Rev 3335: Added support for CDATA in XML parser

 

------------------------------------------------------------
revno: 3335
committer: Fredrik Ullner <ullner@xxxxxxxxx>
branch nick: dcplusplus
timestamp: Thu 2013-08-08 23:23:51 +0200
message:
   Added support for CDATA in XML parser
modified:
  dcpp/SimpleXMLReader.cpp
  dcpp/SimpleXMLReader.h
  test/testxml.cpp


--
lp:dcplusplus
https://code.launchpad.net/~dcplusplus-team/dcplusplus/trunk

Your team Dcplusplus-team is subscribed to branch lp:dcplusplus.
To unsubscribe from this branch go to https://code.launchpad.net/~dcplusplus-team/dcplusplus/trunk/+edit-subscription
=== modified file 'dcpp/SimpleXMLReader.cpp'
--- dcpp/SimpleXMLReader.cpp	2013-01-18 21:28:38 +0000
+++ dcpp/SimpleXMLReader.cpp	2013-08-08 21:23:51 +0000
@@ -395,6 +395,28 @@
 	return true;
 }
 
+bool SimpleXMLReader::cdata() {
+	while(bufSize() > 0) {
+		int c = charAt(0);
+
+		if(c == ']') {
+			if(!needChars(3)) {
+				return true;
+			}
+			if(charAt(1) == ']' && charAt(2) == '>') {
+				state = STATE_CONTENT;
+				advancePos(3);
+				return true;
+			}
+		}
+
+		append(value, MAX_VALUE_SIZE, c);
+		advancePos(1);
+	}
+
+	return true;
+}
+
 bool SimpleXMLReader::entref(string& d) {
 	if(d.size() + 1 >= MAX_VALUE_SIZE) {
 		error("Buffer overflow");
@@ -684,9 +706,14 @@
 			comment()
 			|| error("Error while parsing comment");
 			break;
+		case STATE_CDATA:
+			cdata()
+			|| error("Error while parsing CDATA");
+			break;
 		case STATE_CONTENT:
 			skipSpace(true)
-			|| literal(LITN("<!--"), false, STATE_COMMENT)
+			|| literal(LITN("<!--"), false, STATE_COMMENT)
+			|| literal(LITN("<![CDATA["), false, STATE_CDATA)
 			|| element()
 			|| literal(LITN("</"), false, STATE_ELEMENT_END)
 			|| content()

=== modified file 'dcpp/SimpleXMLReader.h'
--- dcpp/SimpleXMLReader.h	2013-01-18 21:28:38 +0000
+++ dcpp/SimpleXMLReader.h	2013-08-08 21:23:51 +0000
@@ -133,6 +133,8 @@
 
 		STATE_CONTENT,
 
+		STATE_CDATA,
+
 		STATE_END
 	};
 
@@ -177,6 +179,7 @@
 	bool elementAttrValue();
 
 	bool comment();
+	bool cdata();
 
 	bool content();
 

=== modified file 'test/testxml.cpp'
--- test/testxml.cpp	2012-11-02 22:23:18 +0000
+++ test/testxml.cpp	2013-08-08 21:23:51 +0000
@@ -92,6 +92,20 @@
     ASSERT_EQ(collector.endTags["root"], 1);
 }
 
+TEST(testxml, test_cdata)
+{
+	Collector collector;
+	SimpleXMLReader reader(&collector);
+
+    const char xml[] = "<root><![CDATA[Within this Character Data block I can use double dashes as much as I want (along with <, &, ', and \") ... however, I can't use the CEND sequence (if I need to use it I must escape one of the brackets or the greater-than sign).]]></root>";
+    for(size_t i = 0, iend = sizeof(xml); i < iend; ++i) {
+    	reader.parse(xml + i, 1);
+    }
+
+    ASSERT_EQ(collector.startTags["root"], 1);
+    ASSERT_EQ(collector.endTags["root"], 1);
+}
+
 #include <dcpp/File.h>
 
 TEST(testxml, test_file)