← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/bug1123835 into lp:zorba

 

Juan Zacarias has proposed merging lp:~zorba-coders/zorba/bug1123835 into lp:zorba.

Commit message:
Fixes for FOTS errors in fn:unparsed-text* functions

Requested reviews:
  Chris Hillery (ceejatec)
Related bugs:
  Bug #1123835 in Zorba: "fn-unparsed-text* failures (at least 20 failures)"
  https://bugs.launchpad.net/zorba/+bug/1123835
  Bug #1167147 in Zorba: "fn-unparsed-text-036 throws wrong error"
  https://bugs.launchpad.net/zorba/+bug/1167147

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug1123835/+merge/158978

Fixes for FOTS errors in fn:unparsed-text* functions
-- 
https://code.launchpad.net/~zorba-coders/zorba/bug1123835/+merge/158978
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'src/runtime/sequences/sequences_impl.cpp'
--- src/runtime/sequences/sequences_impl.cpp	2013-04-11 23:37:12 +0000
+++ src/runtime/sequences/sequences_impl.cpp	2013-04-15 17:04:30 +0000
@@ -2025,7 +2025,7 @@
   */
 static void readDocument(
   zstring const& aUri,
-  zstring const& aEncoding,
+  zstring& aEncoding,
   static_context* aSctx,
   PlanState& aPlanState,
   QueryLoc const& loc,
@@ -2033,7 +2033,14 @@
 {
   //Normalize input to handle filesystem paths, etc.
   zstring lNormUri;
-  normalizeInputUri(aUri, aSctx, loc, &lNormUri);
+  try
+  {
+    normalizeInputUri(aUri, aSctx, loc, &lNormUri);
+  }
+  catch (...)
+  {
+    throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc)); 
+  }
 
   //Check for a fragment identifier
   //Create a zorba::URI for validating if it contains a fragment  
@@ -2042,6 +2049,15 @@
   {
     throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));    
   }
+  lUri.reset(new zorba::URI(aSctx->get_base_uri()));
+  if (lUri->get_encoded_fragment() == "UNDEFINED")
+  {
+    throw XQUERY_EXCEPTION(err::XPST0001, ERROR_PARAMS("", aUri), ERROR_LOC(loc));
+  }
+  if (!transcode::is_supported(aEncoding.c_str()))
+  {
+    throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
+  }
 
   //Resolve URI to stream
   zstring lErrorMessage;
@@ -2055,18 +2071,56 @@
   {
     throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));
   }
+
   StreamReleaser lStreamReleaser = lStreamResource->getStreamReleaser();
   std::unique_ptr<std::istream, StreamReleaser> lStream(lStreamResource->getStream(), lStreamReleaser);
+<<<<<<< TREE
 
   lStreamResource->setStreamReleaser(nullptr);
+=======
+  
+  lStreamResource->setStreamReleaser(nullptr);  
+>>>>>>> MERGE-SOURCE
 
+  //Check for bom utf-8 and remove the bom definition and 
+  char peek = lStream.get()->peek();
+  if (peek == '�)
+  {
+    lStream.get()->get();
+    peek = lStream.get()->peek();
+    if ( peek == '�' )
+    {
+     lStream.get()->get();
+      peek = lStream.get()->peek();
+      if ( peek == '�' )
+      {
+        lStream.get()->get();
+      }
+      else
+      {
+        lStream.get()->unget();
+      }
+    }
+    else
+    {
+      lStream.get()->unget();
+    }
+  }
+  //check for bom of utf-16 and change encoding if no othe rencoding was specified
+  else if (peek == '�')
+  {
+    lStream.get()->get();
+    peek = lStream.get()->peek();
+    if ( peek == '�' )
+    {
+        aEncoding = "UTF-16";
+    }
+    lStream.get()->unget();
+  }
+  
   //check if encoding is needed
   if (transcode::is_necessary(aEncoding.c_str()))
   {
-    if (!transcode::is_supported(aEncoding.c_str()))
-    {
-      throw XQUERY_EXCEPTION(err::FOUT1190, ERROR_PARAMS(aUri), ERROR_LOC(loc));
-    }
     transcode::attach(*lStream.get(), aEncoding.c_str());
   }
   //creates stream item
@@ -2075,7 +2129,7 @@
     *lStream.release(),
     lStream.get_deleter()
     );
-
+  
   if (oResult.isNull())
   {
     throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(aUri), ERROR_LOC(loc));
@@ -2105,6 +2159,7 @@
 
   uriItem->getStringValue2(uriString);
   readDocument(uriString, encodingString, theSctx, planState, loc, result);
+  
   STACK_PUSH(true, state);
 
   STACK_END(state);
@@ -2145,6 +2200,8 @@
   }
   catch (XQueryException const& e)
   {
+    if (e.diagnostic() == err::XPST0001)
+        throw e;
     unparsedText = NULL;
   }
 
@@ -2156,6 +2213,88 @@
 /*******************************************************************************
   14.8.6 fn:unparsed-text-lines
 ********************************************************************************/
+
+template<typename CharType,class TraitsType,class Rep>
+std::basic_istream<CharType,TraitsType>&
+getline_no_endlines( std::basic_istream<CharType,TraitsType> &is, rstring<Rep> &s) {
+  typedef std::basic_istream<CharType,TraitsType> istream_type;
+  typedef typename istream_type::int_type int_type;
+  typedef std::basic_streambuf<CharType,TraitsType> streambuf_type;
+  typedef rstring<Rep> string_type;
+  typedef typename string_type::size_type size_type;
+
+  std::ios_base::iostate err = std::ios_base::iostate( std::ios_base::goodbit );
+  size_type extracted = 0;
+  int_type const idelim1 = TraitsType::to_int_type( '\r' );
+  int_type const idelim2 = TraitsType::to_int_type( '\n' );
+  int_type const eof = TraitsType::eof();
+  std::string check ="";
+  s.clear();
+  try {
+    streambuf_type *const sb = is.rdbuf();
+    int_type c = sb->sgetc();
+
+    while ( !TraitsType::eq_int_type( c, eof ) &&
+            ( !TraitsType::eq_int_type( c, idelim1 ) &&
+              !TraitsType::eq_int_type( c, idelim2 ) ) ) {
+      s += TraitsType::to_char_type( c );
+      check += TraitsType::to_char_type( c );
+      ++extracted;
+      c = sb->snextc();
+    }
+    if ( TraitsType::eq_int_type( c, eof ) )
+      err |= std::ios_base::eofbit;
+    else if ( TraitsType::eq_int_type (c, idelim1) ) {
+      ++extracted;      
+      sb->sbumpc();
+      c = sb->sgetc();
+      if (!c)
+      {
+        ++extracted;      
+        sb->sbumpc();
+        c = sb->sgetc();
+      }
+      if ( TraitsType::eq_int_type( c, eof ))
+      {
+        err |= std::ios_base::eofbit;
+      }
+      if ( TraitsType::eq_int_type( c, idelim2 ) ) {
+        ++extracted;
+        sb->sbumpc();
+        c = sb->sgetc();
+        if (!c)
+        {
+          ++extracted;      
+          sb->sbumpc();
+          c = sb->sgetc();
+        }
+        if ( TraitsType::eq_int_type( c, eof ))
+        {
+          err |= std::ios_base::eofbit;
+        }
+      }
+    }
+    else if ( TraitsType::eq_int_type( c, idelim2 ) ) {
+      ++extracted;
+      sb->sbumpc();
+      c = sb->sgetc();
+      if ( TraitsType::eq_int_type( c, eof ))
+      {
+        err |= std::ios_base::eofbit;
+      }
+    } else
+      err |= std::ios_base::failbit;
+  }
+  catch ( ... ) {
+    is.setstate( std::ios_base::badbit );
+  }
+  if ( !extracted )
+    err |= std::ios_base::failbit;
+  if ( err )
+    is.setstate( err );
+  return is;
+}
+
 FnUnparsedTextLinesIteratorState::~FnUnparsedTextLinesIteratorState()
 {
   delete theStream;
@@ -2176,6 +2315,8 @@
   std::auto_ptr<internal::Resource> lResource;
   StreamReleaser lStreamReleaser;
   std::auto_ptr<zorba::URI> lUri;
+  char peek;
+  bool isFixedEncoding = false;
 
   FnUnparsedTextLinesIteratorState* state;
   DEFAULT_STACK_INIT(FnUnparsedTextLinesIteratorState, state, planState);
@@ -2189,20 +2330,36 @@
   {
     consumeNext(encodingItem, theChildren[1].getp(), planState);
     encodingItem->getStringValue2(encodingString);
+    isFixedEncoding = true;
   }
 
   //Normalize input to handle filesystem paths, etc.
   uriItem->getStringValue2(uriString);
-  normalizeInputUri(uriString, theSctx, loc, &lNormUri);
+
+  try
+  {
+    normalizeInputUri(uriString, theSctx, loc, &lNormUri);
+  }
+  catch (...)
+  {
+    throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(uriString), ERROR_LOC(loc)); 
+  }
 
   //Check for a fragment identifier
   //Create a zorba::URI for validating if it contains a fragment  
   lUri.reset(new zorba::URI(lNormUri));
+
   if (lUri->get_encoded_fragment() != "")
   {
     throw XQUERY_EXCEPTION(err::FOUT1170, ERROR_PARAMS(uriString), ERROR_LOC(loc));    
   }
 
+  lUri.reset(new zorba::URI(theSctx->get_base_uri()));
+  if (lUri->get_encoded_fragment() == "UNDEFINED")
+  {
+    throw XQUERY_EXCEPTION(err::XPST0001, ERROR_PARAMS("", uriString), ERROR_LOC(loc));
+  }
+
   //Resolve URI to stream
   lResource = theSctx->resolve_uri
     (lNormUri, internal::EntityData::SOME_CONTENT, lErrorMessage);
@@ -2217,6 +2374,44 @@
   state->theStream = new std::unique_ptr<std::istream, StreamReleaser> (state->theStreamResource->getStream(), lStreamReleaser);
   state->theStreamResource->setStreamReleaser(nullptr);
 
+  //Check for bom utf-8 and remove the bom definition and 
+  //change encoding to UTF-8 if no other encoding is specified
+  peek = state->theStream->get()->peek();
+  if (peek == '�)
+  {
+    state->theStream->get()->get();
+    peek = state->theStream->get()->peek();
+    if ( peek == '�' )
+    {
+      state->theStream->get()->get();
+      peek = state->theStream->get()->peek();
+      if ( peek == '�' )
+      {
+        state->theStream->get()->get();
+      }
+      else
+      {
+        state->theStream->get()->unget();
+      }
+    }
+    else
+    {
+      state->theStream->get()->unget();
+    }
+  }
+  //check for bom of utf-16 and change encoding if no othe rencoding was specified
+  else if (peek == '�')
+  {
+    state->theStream->get()->get();
+    peek = state->theStream->get()->peek();
+    if ( peek == '�' )
+    {
+      if (!isFixedEncoding)
+        encodingString = "UTF-16";
+    }
+    state->theStream->get()->unget();
+  }
+
   //check if encoding is needed
   if (transcode::is_necessary(encodingString.c_str()))
   {
@@ -2226,10 +2421,10 @@
     }
     transcode::attach(*state->theStream->get(), encodingString.c_str());
   }
-
+  
   while (state->theStream->get()->good())
   {
-    getline(*state->theStream->get(), streamLine);
+    getline_no_endlines(*state->theStream->get(), streamLine); 
     STACK_PUSH(GENV_ITEMFACTORY->createString(result, streamLine), state);
   }
 


Follow ups