← Back to team overview

oem-qa team mailing list archive

[Fwd: Parse Wiki Script]

 

Using the wiki results script on wiki.canonical.com

-------- Original Message --------
Subject: Parse Wiki Script
Date: Sat, 21 Feb 2009 12:16:00 -0800
From: Leann Ogasawara <leann.ogasawara@xxxxxxxxxxxxx>
To: Chris Gregan <chris.gregan@xxxxxxxxxxxxx>

Hi Chris,

I've got the script working, you just need to do a small initial one
time setup which I'll describe below.  I'm basically using the same code
that editmoin has.

1) create a ~/.moin_ids file which contains the following line:

        https://wiki.canonical.com (paste your MOIN_ID here)

        You can find what your MOIN_ID is by first making sure you've
        logged into https://wiki.canonical.com .  Assuming you use
        Firefox, from your browser go to Edit->Preferences .  Click on
        the Privacy icon then click on the "Show Cookies..." button.
        Scroll down to wiki.canonical.com and expand the view.  Select
        the MOIN_SESSION cookie.  Your MOIN_ID is what's in the
        "Content:" field (it's a long string of letters and numbers).
        Copy and paste that to the .moin_ids file so it'll look
        something similar to:

        https://wiki.canonical.com f7asd89asdfa-ys_k7oof2-_d-m

That's pretty much all you need to do before running the script.  You'll
need to update the urls part of the script to include all the new links
for the test cases.  I only put two in there for now to test with.  Let
me know if this new script works or not.

Thanks,
Leann


--

Chris Gregan
QA Manager
OEM Services
Canonical USA Inc.
cgregan@xxxxxxxxxxxxx
cgregan@freenode
M-240-422-9224
#!/usr/bin/env python
"""
    Site-scraping script
"""

import urllib
import re
import sys
import getopt
import string
import os
import time

IDFILENAME = os.path.expanduser("~/.moin_ids")

def get_id(moinurl):
    if os.path.isfile(IDFILENAME):
        file = open(IDFILENAME)
        for line in file.readlines():
            line = line.strip()
            if line and line[0] != "#":
                tokens = line.split()
                if len(tokens) > 1:
                    url, id = tokens[:2]
                else:
                    url, id = tokens[0], None
                if moinurl.startswith(url):
                    return id
    return None

def help():
    print "Usage:  python parse-wiki.py [options]"
    print "options:"
    print " --help"
    print " --url=url[,url,url. . .]"
    print " --format=[wiki|csv]"
    print " --print-last"

#default parameter options
format = "wiki"
print_last = 0

urls = ["https://wiki.canonical.com/OEMServices/QA/Testing/Elvis/Elvis-boot","https://wiki.canonical.com/OEMServices/QA/Testing/Elvis/Elvis-connect";,]

try:
    opts, args = getopt.getopt(sys.argv[1:], "hu:f:", ["help", "url=", "format=", "print-last"])
    for opt,arg in opts:
        if opt in ("h", "--help"):
            raise
        if opt in ("u", "--url"):
            urls = arg.split(",")
        if opt in ("f", "--format"):
            if arg == "csv":
                format = "csv"
        if opt in ("p", "--print-last"):
            print_last = 1
except:
    help()
    sys.exit(1)

for url in urls:
    url_orig = url
    id = get_id(url)
    url = url + "?action=raw"
    urlopener = urllib.FancyURLopener()
    urlopener.addheader("Cookie", "MOIN_SESSION=\"%s\"" % id)
    filename, headers = urlopener.retrieve(url)
    wiki_url = open(filename)
    reg_exp = "===(.*?)==="
    reg_exp2 = "(\|\|.*\|\|)"
    wiki_table = []
    csv_table = []
    csv = ""
    wiki = ""
    for line in wiki_url.readlines():
        test_case = re.search(reg_exp, line)
        table_row = re.search(reg_exp2, line)
        if test_case != None:
            wiki += "\n=== %s ===\n" % (test_case.group(1))
            wiki += "%s\n" % (url_orig)
            wiki_table.append(test_case.group(1).strip() + "\n")
            wiki_table.append(url_orig + "\n")
            wiki_table.append("")
            csv += test_case.group(1).strip() + "\n"
            csv += "\t" + url_orig + "\n"
            csv_table.append(test_case.group(1).strip() + "\n")
            csv_table.append("\t" + url_orig + "\n")
            csv_table.append("")
        elif table_row != None:
            wiki += table_row.group(1) + "\n"
            wiki_table[-1] = table_row.group(1) + "\n"
            line = line.strip()
            line = line.lstrip("||")
            cells = line.split("||")
            count = 0
            for x in cells:
                cells[count] = x.strip()
                count = count + 1
            csv += "\t" + string.join(cells, ",") + "\n"
            csv_table[-1] = "\t" + string.join(cells, ",") + "\n"

    if format == "csv":
        if print_last:
            print string.join(csv_table, "")
        else:
           print csv
    else:
        if print_last:
            print string.join(wiki_table, "")
        else:
            print wiki

    time.sleep(15)