launchpad-reviewers team mailing list archive
-
launchpad-reviewers team
-
Mailing list archive
-
Message #32794
[Merge] ~lgp171188/launchpad:parse-ppa-apache-access-logs-log-error-once-per-broken-line-not-error-out into launchpad:master
Guruprasad has proposed merging ~lgp171188/launchpad:parse-ppa-apache-access-logs-log-error-once-per-broken-line-not-error-out into launchpad:master.
Commit message:
Report an error parsing an apache2 log file line just once
This will allow the parsing to proceed past the broken lines, if any, in
the subsequent runs without getting stuck there every time till the log
file in question is rotated out. Also reduces the number of OOPSes
raised to one per broken log line.
LP: #2032180
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #2032180 in Launchpad itself: "The parse-ppa-apache-access-logs.py script should report an error parsing a log line just once"
https://bugs.launchpad.net/launchpad/+bug/2032180
For more details, see:
https://code.launchpad.net/~lgp171188/launchpad/+git/launchpad/+merge/489672
--
Your team Launchpad code reviewers is requested to review the proposed merge of ~lgp171188/launchpad:parse-ppa-apache-access-logs-log-error-once-per-broken-line-not-error-out into launchpad:master.
diff --git a/lib/lp/services/apachelogparser/base.py b/lib/lp/services/apachelogparser/base.py
index d6b3e00..691a8cb 100644
--- a/lib/lp/services/apachelogparser/base.py
+++ b/lib/lp/services/apachelogparser/base.py
@@ -152,10 +152,11 @@ def parse_file(fd, start_position, logger, get_download_key, parsed_lines=0):
daily_downloads[country_code] = 0
daily_downloads[country_code] += 1
except Exception as e:
- # Update parsed_bytes to the end of the last line we parsed
- # successfully, log this as an error and break the loop so that
- # we return.
- parsed_bytes -= len(line)
+ # We log an error here but leave the parsed_bytes
+ # unchanged so that in the next run, the remaining
+ # lines in the log file, if any, are parsed without
+ # getting stuck at the same broken line till the log
+ # file in question is rotated out.
logger.error('Error (%s) while parsing "%s"' % (e, line_text))
break
diff --git a/lib/lp/services/apachelogparser/tests/apache-log-files/librarian-log-with-format-error.log b/lib/lp/services/apachelogparser/tests/apache-log-files/librarian-log-with-format-error.log
new file mode 100644
index 0000000..3c6c573
--- /dev/null
+++ b/lib/lp/services/apachelogparser/tests/apache-log-files/librarian-log-with-format-error.log
@@ -0,0 +1,2 @@
+aa asdasdasd
+201.158.154.121 - - [13/Jun/2008:18:38:57 +0100] "GET /15166065/gnome-do-0.5.0.1.tar.gz HTTP/1.1" 200 479565 "-" "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.14) Gecko/20080208 Mandriva/2.0.0.14-3.1mdv2008.1 (2008.1) Firefox/2.0.0.14"
diff --git a/lib/lp/services/apachelogparser/tests/test_apachelogparser.py b/lib/lp/services/apachelogparser/tests/test_apachelogparser.py
index dee5327..08a5c03 100644
--- a/lib/lp/services/apachelogparser/tests/test_apachelogparser.py
+++ b/lib/lp/services/apachelogparser/tests/test_apachelogparser.py
@@ -294,7 +294,7 @@ class TestLogFileParsing(TestCase):
)
self.assertIn("Error", self.logger.getLogBuffer())
self.assertEqual(downloads, {})
- self.assertEqual(parsed_bytes, 0)
+ self.assertEqual(parsed_bytes, 9)
def _assertResponseWithGivenStatusIsIgnored(self, status):
"""Assert that responses with the given status are ignored."""
@@ -544,6 +544,39 @@ class TestParsedFilesDetection(TestCase):
# the first line ends.
self.assertEqual(len(first_line), position)
+ def test_parsed_file_incomplete_parsing_parser_error_previous_time(self):
+ logger = BufferLogger()
+ log_file = os.path.join(
+ here, "apache-log-files/librarian-log-with-format-error.log"
+ )
+ with open(log_file) as fd:
+ downloads, parsed_bytes, parsed_lines = parse_file(
+ fd,
+ start_position=0,
+ logger=logger,
+ get_download_key=get_path_download_key,
+ )
+ self.assertIn("Error", logger.getLogBuffer())
+ self.assertEqual(downloads, {})
+ self.assertEqual(parsed_bytes, 13)
+
+ with open(log_file) as fd:
+ lines = fd.readlines()
+
+ ParsedApacheLog(lines[0], len(lines[0]))
+ files_to_parse = list(get_files_to_parse([log_file]))
+ self.assertEqual(1, len(files_to_parse))
+ fd, position = files_to_parse[0]
+ downloads, parsed_bytes, parsed_lines = parse_file(
+ fd,
+ start_position=position,
+ logger=logger,
+ get_download_key=get_path_download_key,
+ )
+ self.assertEqual(1, len(downloads))
+ self.assertEqual(len(lines[1]), parsed_bytes - len(lines[0]))
+ self.assertEqual(1, parsed_lines)
+
def test_different_files_with_same_name(self):
# Thanks to log rotation, two runs of our script may see files with
# the same name but completely different content. If we see a file