← Back to team overview

elementary-translators team mailing list archive

State of translations site generator

 

Hi, everyone!

Being somewhat frustrated with not being able to get a good view of how the current state of the translations in each of the projects is, and eager to learn some python, I've created a simple script which will generate a simple website which will hopefully be useful.

It will find all of the translatable projects at https://translations.launchpad.net/elementary and will then go through each of the projects, extract the untranslated and needs review numbers and output all of the information as html in a file called index.htm.

Some usage help:

# This will output information for all of the languages
python elementary_translations.py

# This will only output information for the Swedish language
python elementary_translations.py -l sv

# This will only output information for the Swedish and English (United Kingdom) languages
python elementary_translations.py -l sv -l en_GB

Attached is the script and a sample output.

/Marcus
<https://translations.launchpad.net/granite/trunk/+pots/granite/en_GB/+translate>
##########################################################################################
# Copyright (C) 2013 Marcus Lundgren
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the Software
# is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
##########################################################################################

from HTMLParser import HTMLParser
import urllib2
import re
import argparse

class ProjectParser(HTMLParser):
    def __init__(self):
        self.is_parsing_language = False
        self.retrieve_data = False
        HTMLParser.__init__(self)
    
    def look_for_language(self, tag, attr):
        if tag == "tr":
            for name, value in attr:
                if name == "class" and "language-" in value:
                    m = re.search("language-(\w+)", value)
                    self.current_language["class"] = value[m.start(1):m.end(1)]
                    self.is_parsing_language = True
                    self.td_count = 0
    
    def look_for_translations(self, tag, attr):
        if self.is_parsing_language:
            if tag == "td":
                self.td_count += 1
                self.has_parsed_td = False
            
            if not self.has_parsed_td and self.td_count == 1 and tag == "a":
                for name, value in attr:
                    if name == "href":
                        self.current_language["url"] = "https://translations.launchpad.net"; + value
            
            if not self.has_parsed_td and (self.td_count == 1 or self.td_count == 3 or self.td_count == 4):
                self.retrieve_data = True
                
            if not self.has_parsed_td and self.td_count == 5:
                self.project["languages"].append(self.current_language)
                self.current_language = {}
                self.has_parsed_td = True
    
    def handle_data(self, data):
        if self.retrieve_data and len(data.strip()) > 0:
            if self.td_count == 1:
                self.current_language["name"] = data.strip()
            elif self.td_count == 3:
                self.current_language["untranslated"] = data.strip()
            elif self.td_count == 4:
                self.current_language["needs_review"] = data.strip()
            self.retrieve_data = False
            self.has_parsed_td = True
    
    def handle_starttag(self, tag, attr):
        if not self.is_parsing_language:
            self.look_for_language(tag, attr)
        else:
            self.look_for_translations(tag, attr)

    def handle_endtag(self, tag):
        if tag == "tr":
            self.is_parsing_language = False
            
    def parse(self, project):
        self.project = project
        self.project["languages"] = []
        self.current_language = {}
        page = urllib2.urlopen(project["url"])
        self.feed(page.read())
        return self.project

class ProjectsFinder(HTMLParser):
    def __init__(self):
        self.is_in_div = False
        self.is_in_link = False
        self.has_been_in_div = False
        self.projects = []
        self.current_project = {}
        HTMLParser.__init__(self)
    
    def find_projects(self):
        page = urllib2.urlopen("https://translations.launchpad.net/elementary";)
        self.feed(page.read())
        return self.projects
    
    def look_for_div(self, tag, attr):
        if tag == "div":
            for name, value in attr:
                if name == "id" and value == "translatable-projects":
                    self.is_in_div = True

    def look_for_link(self, tag, attr):
        if tag == "a":
            for name, value in attr:
                if(name == "href"):
                    self.is_in_link = True
                    self.current_project["url"] = value

    def handle_starttag(self, tag, attr):
        if not self.has_been_in_div:
            if self.is_in_div:
                self.look_for_link(tag, attr)
            else:
                self.look_for_div(tag, attr)
                
    def handle_data(self, data):
        if self.is_in_link:
            self.current_project["name"] = data
            self.projects.append(self.current_project)
            self.current_project = {}
            self.is_in_link = False
            
    def handle_endtag(self, tag):
        if self.is_in_div and tag == "div":
            self.has_been_in_div = True
            self.is_in_div = False

def translations_header_html():
    html = "<tr>"
    html += "<td><strong>{0}</strong></td>".format("Language")
    html += "<td><strong>{0}</strong></td>".format("Untranslated")
    html += "<td><strong>{0}</strong></td>".format("Needs review")
    html += "<tr>"
    return html

def translation_html(translation):
    html = "<tr>"
    html += "<td><a href='{0}'>{1}</a></td>".format(translation["url"], translation["name"])
    html += "<td style='text-align:right;'>{0}</td>".format(translation["untranslated"])
    html += "<td style='text-align:right;'>{0}</td>".format(translation["needs_review"])
    html += "<tr>"
    return html

def project_html(project, languages_to_extract):
    html = "<h3><a href='{0}'>{1}</a></h3>".format(project["url"], project["name"])
    html += "<table>"
    html += translations_header_html()
    for translation in project["languages"]:
        if len(languages_to_extract) == 0 or translation["class"] in languages_to_extract: 
            html += translation_html(translation)
    html += "</table>"
    return html

def generate_html(projects, languages_to_extract):
    html = "<html>"
    html += "<head><title>elementary - State of translations</title></head>"
    html += "<body>"
    for project in projects:
        html += project_html(project, languages_to_extract)
    html += "</body>"
    html += "</html>"
    return html

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="elementary - State of translations generator")
    parser.add_argument("-l", help="If specified, will only extract those languages. e.g. [-l sv -l en_GB] to extract Swedish and English (United Kingdom)", required=False, dest="language", action="append")
    args = parser.parse_args()
    
    if args.language is not None:
        languages_to_extract = args.language
    else:
        languages_to_extract = []
    
    print "Finding projects..."
    finder = ProjectsFinder()
    projects_found = finder.find_projects()
    projects = []
    
    print "Parsing projects (this might take a while)..."
    project_parser = ProjectParser()
    for project in projects_found:
        projects.append(project_parser.parse(project))
    
    print "Generating html..."
    html = generate_html(projects, languages_to_extract)
    
    print "Writing to file..."
    f = open("index.htm", "w")
    f.write(html)
    
    print "Done!"

Follow ups