zorba-coders team mailing list archive
-
zorba-coders team
-
Mailing list archive
-
Message #20551
[Merge] lp:~zorba-coders/zorba/bug1123835 into lp:zorba
Juan Zacarias has proposed merging lp:~zorba-coders/zorba/bug1123835 into lp:zorba.
Commit message:
Fixes for FOTS errors in fn:unparsed-text* functions
Requested reviews:
Chris Hillery (ceejatec)
Related bugs:
Bug #1123835 in Zorba: "fn-unparsed-text* failures (at least 20 failures)"
https://bugs.launchpad.net/zorba/+bug/1123835
Bug #1167147 in Zorba: "fn-unparsed-text-036 throws wrong error"
https://bugs.launchpad.net/zorba/+bug/1167147
For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug1123835/+merge/158978
Fixes for FOTS errors in fn:unparsed-text* functions
--
https://code.launchpad.net/~zorba-coders/zorba/bug1123835/+merge/158978
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp 2013-04-11 23:37:12 +0000
+++ src/runtime/sequences/sequences_impl.cpp 2013-04-15 17:04:30 +0000
@@ -2025,7 +2025,7 @@
*/
static void readDocument(
zstring const& aUri,
- zstring const& aEncoding,
+ zstring& aEncoding,
static_context* aSctx,
PlanState& aPlanState,
QueryLoc const& loc,
@@ -2033,7 +2033,14 @@
{
//Normalize input to handle filesystem paths, etc.
zstring lNormUri;
- normalizeInputUri(aUri, aSctx, loc, &lNormUri);
+ try
+ {
+ normalizeInputUri(aUri, aSctx, loc, &lNormUri);
+ }
+ catch (...)
+ {
+ throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));
+ }
//Check for a fragment identifier
//Create a zorba::URI for validating if it contains a fragment
@@ -2042,6 +2049,15 @@
{
throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));
}
+ lUri.reset(new zorba::URI(aSctx->get_base_uri()));
+ if (lUri->get_encoded_fragment() == "UNDEFINED")
+ {
+ throw XQUERY_EXCEPTION(err::XPST0001, ERROR_PARAMS("", aUri), ERROR_LOC(loc));
+ }
+ if (!transcode::is_supported(aEncoding.c_str()))
+ {
+ throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
+ }
//Resolve URI to stream
zstring lErrorMessage;
@@ -2055,18 +2071,56 @@
{
throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));
}
+
StreamReleaser lStreamReleaser = lStreamResource->getStreamReleaser();
std::unique_ptr<std::istream, StreamReleaser> lStream(lStreamResource->getStream(), lStreamReleaser);
+<<<<<<< TREE
lStreamResource->setStreamReleaser(nullptr);
+=======
+
+ lStreamResource->setStreamReleaser(nullptr);
+>>>>>>> MERGE-SOURCE
+ //Check for bom utf-8 and remove the bom definition and
+ char peek = lStream.get()->peek();
+ if (peek == '�)
+ {
+ lStream.get()->get();
+ peek = lStream.get()->peek();
+ if ( peek == '�' )
+ {
+ lStream.get()->get();
+ peek = lStream.get()->peek();
+ if ( peek == '�' )
+ {
+ lStream.get()->get();
+ }
+ else
+ {
+ lStream.get()->unget();
+ }
+ }
+ else
+ {
+ lStream.get()->unget();
+ }
+ }
+ //check for bom of utf-16 and change encoding if no othe rencoding was specified
+ else if (peek == '�')
+ {
+ lStream.get()->get();
+ peek = lStream.get()->peek();
+ if ( peek == '�' )
+ {
+ aEncoding = "UTF-16";
+ }
+ lStream.get()->unget();
+ }
+
//check if encoding is needed
if (transcode::is_necessary(aEncoding.c_str()))
{
- if (!transcode::is_supported(aEncoding.c_str()))
- {
- throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
- }
transcode::attach(*lStream.get(), aEncoding.c_str());
}
//creates stream item
@@ -2075,7 +2129,7 @@
*lStream.release(),
lStream.get_deleter()
);
-
+
if (oResult.isNull())
{
throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));
@@ -2105,6 +2159,7 @@
uriItem->getStringValue2(uriString);
readDocument(uriString, encodingString, theSctx, planState, loc, result);
+
STACK_PUSH(true, state);
STACK_END(state);
@@ -2145,6 +2200,8 @@
}
catch (XQueryException const& e)
{
+ if (e.diagnostic() == err::XPST0001)
+ throw e;
unparsedText = NULL;
}
@@ -2156,6 +2213,88 @@
/*******************************************************************************
14.8.6 fn:unparsed-text-lines
********************************************************************************/
+
+template<typename CharType,class TraitsType,class Rep>
+std::basic_istream<CharType,TraitsType>&
+getline_no_endlines( std::basic_istream<CharType,TraitsType> &is, rstring<Rep> &s) {
+ typedef std::basic_istream<CharType,TraitsType> istream_type;
+ typedef typename istream_type::int_type int_type;
+ typedef std::basic_streambuf<CharType,TraitsType> streambuf_type;
+ typedef rstring<Rep> string_type;
+ typedef typename string_type::size_type size_type;
+
+ std::ios_base::iostate err = std::ios_base::iostate( std::ios_base::goodbit );
+ size_type extracted = 0;
+ int_type const idelim1 = TraitsType::to_int_type( '\r' );
+ int_type const idelim2 = TraitsType::to_int_type( '\n' );
+ int_type const eof = TraitsType::eof();
+ std::string check ="";
+ s.clear();
+ try {
+ streambuf_type *const sb = is.rdbuf();
+ int_type c = sb->sgetc();
+
+ while ( !TraitsType::eq_int_type( c, eof ) &&
+ ( !TraitsType::eq_int_type( c, idelim1 ) &&
+ !TraitsType::eq_int_type( c, idelim2 ) ) ) {
+ s += TraitsType::to_char_type( c );
+ check += TraitsType::to_char_type( c );
+ ++extracted;
+ c = sb->snextc();
+ }
+ if ( TraitsType::eq_int_type( c, eof ) )
+ err |= std::ios_base::eofbit;
+ else if ( TraitsType::eq_int_type (c, idelim1) ) {
+ ++extracted;
+ sb->sbumpc();
+ c = sb->sgetc();
+ if (!c)
+ {
+ ++extracted;
+ sb->sbumpc();
+ c = sb->sgetc();
+ }
+ if ( TraitsType::eq_int_type( c, eof ))
+ {
+ err |= std::ios_base::eofbit;
+ }
+ if ( TraitsType::eq_int_type( c, idelim2 ) ) {
+ ++extracted;
+ sb->sbumpc();
+ c = sb->sgetc();
+ if (!c)
+ {
+ ++extracted;
+ sb->sbumpc();
+ c = sb->sgetc();
+ }
+ if ( TraitsType::eq_int_type( c, eof ))
+ {
+ err |= std::ios_base::eofbit;
+ }
+ }
+ }
+ else if ( TraitsType::eq_int_type( c, idelim2 ) ) {
+ ++extracted;
+ sb->sbumpc();
+ c = sb->sgetc();
+ if ( TraitsType::eq_int_type( c, eof ))
+ {
+ err |= std::ios_base::eofbit;
+ }
+ } else
+ err |= std::ios_base::failbit;
+ }
+ catch ( ... ) {
+ is.setstate( std::ios_base::badbit );
+ }
+ if ( !extracted )
+ err |= std::ios_base::failbit;
+ if ( err )
+ is.setstate( err );
+ return is;
+}
+
FnUnparsedTextLinesIteratorState::~FnUnparsedTextLinesIteratorState()
{
delete theStream;
@@ -2176,6 +2315,8 @@
std::auto_ptr<internal::Resource> lResource;
StreamReleaser lStreamReleaser;
std::auto_ptr<zorba::URI> lUri;
+ char peek;
+ bool isFixedEncoding = false;
FnUnparsedTextLinesIteratorState* state;
DEFAULT_STACK_INIT(FnUnparsedTextLinesIteratorState, state, planState);
@@ -2189,20 +2330,36 @@
{
consumeNext(encodingItem, theChildren[1].getp(), planState);
encodingItem->getStringValue2(encodingString);
+ isFixedEncoding = true;
}
//Normalize input to handle filesystem paths, etc.
uriItem->getStringValue2(uriString);
- normalizeInputUri(uriString, theSctx, loc, &lNormUri);
+
+ try
+ {
+ normalizeInputUri(uriString, theSctx, loc, &lNormUri);
+ }
+ catch (...)
+ {
+ throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(uriString), ERROR_LOC(loc));
+ }
//Check for a fragment identifier
//Create a zorba::URI for validating if it contains a fragment
lUri.reset(new zorba::URI(lNormUri));
+
if (lUri->get_encoded_fragment() != "")
{
throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(uriString), ERROR_LOC(loc));
}
+ lUri.reset(new zorba::URI(theSctx->get_base_uri()));
+ if (lUri->get_encoded_fragment() == "UNDEFINED")
+ {
+ throw XQUERY_EXCEPTION(err::XPST0001, ERROR_PARAMS("", uriString), ERROR_LOC(loc));
+ }
+
//Resolve URI to stream
lResource = theSctx->resolve_uri
(lNormUri, internal::EntityData::SOME_CONTENT, lErrorMessage);
@@ -2217,6 +2374,44 @@
state->theStream = new std::unique_ptr<std::istream, StreamReleaser> (state->theStreamResource->getStream(), lStreamReleaser);
state->theStreamResource->setStreamReleaser(nullptr);
+ //Check for bom utf-8 and remove the bom definition and
+ //change encoding to UTF-8 if no other encoding is specified
+ peek = state->theStream->get()->peek();
+ if (peek == '�)
+ {
+ state->theStream->get()->get();
+ peek = state->theStream->get()->peek();
+ if ( peek == '�' )
+ {
+ state->theStream->get()->get();
+ peek = state->theStream->get()->peek();
+ if ( peek == '�' )
+ {
+ state->theStream->get()->get();
+ }
+ else
+ {
+ state->theStream->get()->unget();
+ }
+ }
+ else
+ {
+ state->theStream->get()->unget();
+ }
+ }
+ //check for bom of utf-16 and change encoding if no othe rencoding was specified
+ else if (peek == '�')
+ {
+ state->theStream->get()->get();
+ peek = state->theStream->get()->peek();
+ if ( peek == '�' )
+ {
+ if (!isFixedEncoding)
+ encodingString = "UTF-16";
+ }
+ state->theStream->get()->unget();
+ }
+
//check if encoding is needed
if (transcode::is_necessary(encodingString.c_str()))
{
@@ -2226,10 +2421,10 @@
}
transcode::attach(*state->theStream->get(), encodingString.c_str());
}
-
+
while (state->theStream->get()->good())
{
- getline(*state->theStream->get(), streamLine);
+ getline_no_endlines(*state->theStream->get(), streamLine);
STACK_PUSH(GENV_ITEMFACTORY->createString(result, streamLine), state);
}
Follow ups