← Back to team overview

zorba-coders team mailing list archive

[Merge] lp:~zorba-coders/zorba/bug-921458 into lp:zorba

 

Matthias Brantner has proposed merging lp:~zorba-coders/zorba/bug-921458 into lp:zorba.

Requested reviews:
  William Candillon (wcandillon)
  Paul J. Lucas (paul-lucas)
Related bugs:
  Bug #921458 in Zorba: "file:read-text-lines() blocking"
  https://bugs.launchpad.net/zorba/+bug/921458

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/bug-921458/+merge/102058

Fix for bug #921458 (file:read-text-lines() blocking)

The problem was that the string:split function isn't suitable for tokenizing with newlines as separator.

I have provided a native implementation using C++'s getline.
-- 
https://code.launchpad.net/~zorba-coders/zorba/bug-921458/+merge/102058
Your team Zorba Coders is subscribed to branch lp:zorba.
=== modified file 'ChangeLog'
--- ChangeLog	2012-04-14 06:12:29 +0000
+++ ChangeLog	2012-04-16 07:08:21 +0000
@@ -21,6 +21,7 @@
   * Fixed bug #872234 (prevent a rewritting to take place in case of sequential expr)
   * Fixed bug #906494 (default compile with D_FILE_OFFSET_BITS=64)
   * Fixed bug #912586, #912593 and #912722 (assertion failures with lax validation)
+  * Fixed bug #921458 (file:read-text-lines() blocking)
   * Fixed bug #949910 (has-children may be invoked on all nodes). Internally, zorba::store::Item::getChildren() now returns NULL on node classes without offspring (instead of raising an error).
 
 

=== modified file 'modules/org/expath/ns/file.xq'
--- modules/org/expath/ns/file.xq	2012-04-14 01:36:33 +0000
+++ modules/org/expath/ns/file.xq	2012-04-16 07:08:21 +0000
@@ -422,11 +422,7 @@
 declare %an:nondeterministic function file:read-text-lines(
   $file as xs:string,
   $encoding as xs:string
-) as xs:string*
-{
-  let $content := file:read-text($file, $encoding)
-  return fn:tokenize($content, "\n")
-};
+) as xs:string* external;
 
 (:~
  : This is an internal function that copies an entire source directory to an

=== modified file 'modules/org/expath/ns/file.xq.src/file.cpp'
--- modules/org/expath/ns/file.xq.src/file.cpp	2012-03-30 19:03:09 +0000
+++ modules/org/expath/ns/file.xq.src/file.cpp	2012-04-16 07:08:21 +0000
@@ -223,6 +223,124 @@
 
 //*****************************************************************************
 
+ReadTextLinesFunction::ReadTextLinesFunction(const FileModule* aModule)
+  : FileFunction(aModule)
+{
+}
+
+ItemSequence_t
+ReadTextLinesFunction::evaluate(
+  const ExternalFunction::Arguments_t& aArgs,
+  const StaticContext*                          aSctxCtx,
+  const DynamicContext*                         aDynCtx) const
+{
+  String lFileStr = getFilePathString(aArgs, 0);
+  File_t lFile = File::createFile(lFileStr.c_str());
+  String lEncoding("UTF-8");
+
+  // preconditions
+  if (!lFile->exists()) {
+    raiseFileError("FOFL0001", "A file does not exist at this path", lFile->getFilePath());
+  }
+  if (lFile->isDirectory()) {
+    raiseFileError("FOFL0004", "The given path points to a directory", lFile->getFilePath());
+  }
+
+  lEncoding = getEncodingArg(aArgs, 1);
+
+  return ItemSequence_t(new LinesItemSequence(lFile, lEncoding, this));
+}
+
+ReadTextLinesFunction::LinesItemSequence::LinesItemSequence(
+    const File_t& aFile,
+    const String& aEncoding,
+    const ReadTextLinesFunction* aFunc)
+  : theFile(aFile),
+    theEncoding(aEncoding),
+    theFunc(aFunc)
+{
+}
+
+Iterator_t
+ReadTextLinesFunction::LinesItemSequence::getIterator()
+{
+  return new ReadTextLinesFunction::LinesItemSequence::LinesIterator(
+      theFile, theEncoding, theFunc
+    );
+}
+
+ReadTextLinesFunction::LinesItemSequence::LinesIterator::LinesIterator(
+    const File_t& aFile,
+    const String& aEncoding,
+    const ReadTextLinesFunction* aFunc)
+  : theFile(aFile),
+    theEncoding(aEncoding),
+    theFunc(aFunc),
+    theStream(0)
+{
+}
+
+ReadTextLinesFunction::LinesItemSequence::LinesIterator::~LinesIterator()
+{
+  delete theStream;
+}
+
+void
+ReadTextLinesFunction::LinesItemSequence::LinesIterator::open()
+{
+  if ( transcode::is_necessary( theEncoding.c_str() ) )
+  {
+    try
+    {
+      theStream = new transcode::stream<std::ifstream>(theEncoding.c_str());
+    }
+    catch (std::invalid_argument const& e)
+    {
+      theFunc->raiseFileError("FOFL0006", "Unsupported encoding", theEncoding.c_str());
+    }
+  }
+  else
+  {
+    theStream = new std::ifstream();
+  }
+  theFile->openInputStream(*theStream, false, true);
+}
+
+bool
+ReadTextLinesFunction::LinesItemSequence::LinesIterator::next(Item& aRes)
+{
+  if (!theStream || !theStream->good())
+    return false;
+
+  std::string lStr;
+  getline(*theStream, lStr);
+  
+  if (theStream->bad())
+  {
+    return false;
+  }
+  else
+  {
+    aRes = theFunc->theModule->getItemFactory()->createString(lStr);
+    return true;
+  }
+}
+
+void
+ReadTextLinesFunction::LinesItemSequence::LinesIterator::close()
+{
+  delete theStream;
+  theStream = 0;
+}
+
+bool
+ReadTextLinesFunction::LinesItemSequence::LinesIterator::isOpen() const
+{
+  return theStream != 0;
+}
+
+//*****************************************************************************
+
 ExistsFunction::ExistsFunction(const FileModule* aModule)
   : FileFunction(aModule)
 {

=== modified file 'modules/org/expath/ns/file.xq.src/file.h'
--- modules/org/expath/ns/file.xq.src/file.h	2012-03-30 19:03:09 +0000
+++ modules/org/expath/ns/file.xq.src/file.h	2012-04-16 07:08:21 +0000
@@ -316,6 +316,70 @@
 
 //*****************************************************************************
 
+  class ReadTextLinesFunction : public FileFunction
+  {
+    public:
+      ReadTextLinesFunction(const FileModule* aModule);
+
+      virtual String
+      getLocalName() const { return "read-text-lines"; }
+  
+      virtual ItemSequence_t 
+      evaluate(const ExternalFunction::Arguments_t& args,
+               const StaticContext* aSctxCtx,
+               const DynamicContext* aDynCtx) const;
+
+    protected:
+      class LinesItemSequence : public ItemSequence
+      {
+        protected:
+          File_t            theFile;
+          String            theEncoding;
+          const ReadTextLinesFunction* theFunc;
+
+          class LinesIterator : public Iterator
+          {
+            protected:
+              const File_t&     theFile;
+              const String&     theEncoding;
+              const ReadTextLinesFunction* theFunc;
+
+              std::ifstream* theStream;
+
+            public:
+              LinesIterator(
+                  const File_t&,
+                  const String&,
+                  const ReadTextLinesFunction*);
+
+              virtual ~LinesIterator();
+
+              virtual void
+              open();
+
+              virtual bool
+              next(Item&);
+
+              virtual void
+              close();
+
+              virtual bool
+              isOpen() const;
+          };
+
+        public:
+          LinesItemSequence(
+              const File_t& aFile,
+              const String& aEncoding,
+              const ReadTextLinesFunction*);
+
+          Iterator_t
+          getIterator();
+      };
+  };
+
+//*****************************************************************************
+
   class WriteTextFunction : public WriterFileFunction
   {
     public:

=== modified file 'modules/org/expath/ns/file.xq.src/file_module.cpp'
--- modules/org/expath/ns/file.xq.src/file_module.cpp	2012-03-30 19:03:09 +0000
+++ modules/org/expath/ns/file.xq.src/file_module.cpp	2012-04-16 07:08:21 +0000
@@ -46,6 +46,8 @@
       lFunc = new ReadBinaryFunction(this);
     } else if (aLocalname == "read-text") {
       lFunc = new ReadTextFunction(this);
+    } else if (aLocalname == "read-text-lines") {
+      lFunc = new ReadTextLinesFunction(this);
     } else if (aLocalname == "exists") {
       lFunc = new ExistsFunction(this);
     } else if (aLocalname == "is-directory") {

=== added file 'test/rbkt/ExpQueryResults/zorba/file/file_read_text_lines.xml.res'
--- test/rbkt/ExpQueryResults/zorba/file/file_read_text_lines.xml.res	1970-01-01 00:00:00 +0000
+++ test/rbkt/ExpQueryResults/zorba/file/file_read_text_lines.xml.res	2012-04-16 07:08:21 +0000
@@ -0,0 +1,32 @@
+&lt;products&gt;
+  &lt;product&gt;
+    &lt;name&gt;broiler&lt;/name&gt;
+    &lt;category&gt;kitchen&lt;/category&gt;
+    &lt;price&gt;100&lt;/price&gt;
+    &lt;cost&gt;70&lt;/cost&gt;
+  &lt;/product&gt;
+  &lt;product&gt;
+    &lt;name&gt;toaster&lt;/name&gt;
+    &lt;category&gt;kitchen&lt;/category&gt;
+    &lt;price&gt;30&lt;/price&gt;
+    &lt;cost&gt;10&lt;/cost&gt;
+  &lt;/product&gt;
+  &lt;product&gt;
+    &lt;name&gt;blender&lt;/name&gt;
+    &lt;category&gt;kitchen&lt;/category&gt;
+    &lt;price&gt;50&lt;/price&gt;
+    &lt;cost&gt;25&lt;/cost&gt;
+  &lt;/product&gt;
+  &lt;product&gt;
+    &lt;name&gt;socks&lt;/name&gt;
+    &lt;category&gt;clothes&lt;/category&gt;
+    &lt;price&gt;10&lt;/price&gt;
+    &lt;cost&gt;2&lt;/cost&gt;
+  &lt;/product&gt;
+  &lt;product&gt;
+    &lt;name&gt;shirt&lt;/name&gt;
+    &lt;category&gt;clothes&lt;/category&gt;
+    &lt;price&gt;10&lt;/price&gt;
+    &lt;cost&gt;3&lt;/cost&gt;
+  &lt;/product&gt;
+&lt;/products&gt;

=== added file 'test/rbkt/Queries/zorba/file/file_read_text_lines.xq'
--- test/rbkt/Queries/zorba/file/file_read_text_lines.xq	1970-01-01 00:00:00 +0000
+++ test/rbkt/Queries/zorba/file/file_read_text_lines.xq	2012-04-16 07:08:21 +0000
@@ -0,0 +1,4 @@
+import module namespace file = "http://expath.org/ns/file";;
+
+string-join(file:read-text-lines(fn:resolve-uri("mydata.xml")), '
+')


Follow ups