elementary-translators team mailing list archive
-
elementary-translators team
-
Mailing list archive
-
Message #00116
State of translations site generator
Hi, everyone!
Being somewhat frustrated with not being able to get a good view of how
the current state of the translations in each of the projects is, and
eager to learn some python, I've created a simple script which will
generate a simple website which will hopefully be useful.
It will find all of the translatable projects at
https://translations.launchpad.net/elementary and will then go through
each of the projects, extract the untranslated and needs review numbers
and output all of the information as html in a file called index.htm.
Some usage help:
# This will output information for all of the languages
python elementary_translations.py
# This will only output information for the Swedish language
python elementary_translations.py -l sv
# This will only output information for the Swedish and English (United
Kingdom) languages
python elementary_translations.py -l sv -l en_GB
Attached is the script and a sample output.
/Marcus
<https://translations.launchpad.net/granite/trunk/+pots/granite/en_GB/+translate>
##########################################################################################
# Copyright (C) 2013 Marcus Lundgren
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the Software
# is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
##########################################################################################
from HTMLParser import HTMLParser
import urllib2
import re
import argparse
class ProjectParser(HTMLParser):
def __init__(self):
self.is_parsing_language = False
self.retrieve_data = False
HTMLParser.__init__(self)
def look_for_language(self, tag, attr):
if tag == "tr":
for name, value in attr:
if name == "class" and "language-" in value:
m = re.search("language-(\w+)", value)
self.current_language["class"] = value[m.start(1):m.end(1)]
self.is_parsing_language = True
self.td_count = 0
def look_for_translations(self, tag, attr):
if self.is_parsing_language:
if tag == "td":
self.td_count += 1
self.has_parsed_td = False
if not self.has_parsed_td and self.td_count == 1 and tag == "a":
for name, value in attr:
if name == "href":
self.current_language["url"] = "https://translations.launchpad.net" + value
if not self.has_parsed_td and (self.td_count == 1 or self.td_count == 3 or self.td_count == 4):
self.retrieve_data = True
if not self.has_parsed_td and self.td_count == 5:
self.project["languages"].append(self.current_language)
self.current_language = {}
self.has_parsed_td = True
def handle_data(self, data):
if self.retrieve_data and len(data.strip()) > 0:
if self.td_count == 1:
self.current_language["name"] = data.strip()
elif self.td_count == 3:
self.current_language["untranslated"] = data.strip()
elif self.td_count == 4:
self.current_language["needs_review"] = data.strip()
self.retrieve_data = False
self.has_parsed_td = True
def handle_starttag(self, tag, attr):
if not self.is_parsing_language:
self.look_for_language(tag, attr)
else:
self.look_for_translations(tag, attr)
def handle_endtag(self, tag):
if tag == "tr":
self.is_parsing_language = False
def parse(self, project):
self.project = project
self.project["languages"] = []
self.current_language = {}
page = urllib2.urlopen(project["url"])
self.feed(page.read())
return self.project
class ProjectsFinder(HTMLParser):
def __init__(self):
self.is_in_div = False
self.is_in_link = False
self.has_been_in_div = False
self.projects = []
self.current_project = {}
HTMLParser.__init__(self)
def find_projects(self):
page = urllib2.urlopen("https://translations.launchpad.net/elementary")
self.feed(page.read())
return self.projects
def look_for_div(self, tag, attr):
if tag == "div":
for name, value in attr:
if name == "id" and value == "translatable-projects":
self.is_in_div = True
def look_for_link(self, tag, attr):
if tag == "a":
for name, value in attr:
if(name == "href"):
self.is_in_link = True
self.current_project["url"] = value
def handle_starttag(self, tag, attr):
if not self.has_been_in_div:
if self.is_in_div:
self.look_for_link(tag, attr)
else:
self.look_for_div(tag, attr)
def handle_data(self, data):
if self.is_in_link:
self.current_project["name"] = data
self.projects.append(self.current_project)
self.current_project = {}
self.is_in_link = False
def handle_endtag(self, tag):
if self.is_in_div and tag == "div":
self.has_been_in_div = True
self.is_in_div = False
def translations_header_html():
html = "<tr>"
html += "<td><strong>{0}</strong></td>".format("Language")
html += "<td><strong>{0}</strong></td>".format("Untranslated")
html += "<td><strong>{0}</strong></td>".format("Needs review")
html += "<tr>"
return html
def translation_html(translation):
html = "<tr>"
html += "<td><a href='{0}'>{1}</a></td>".format(translation["url"], translation["name"])
html += "<td style='text-align:right;'>{0}</td>".format(translation["untranslated"])
html += "<td style='text-align:right;'>{0}</td>".format(translation["needs_review"])
html += "<tr>"
return html
def project_html(project, languages_to_extract):
html = "<h3><a href='{0}'>{1}</a></h3>".format(project["url"], project["name"])
html += "<table>"
html += translations_header_html()
for translation in project["languages"]:
if len(languages_to_extract) == 0 or translation["class"] in languages_to_extract:
html += translation_html(translation)
html += "</table>"
return html
def generate_html(projects, languages_to_extract):
html = "<html>"
html += "<head><title>elementary - State of translations</title></head>"
html += "<body>"
for project in projects:
html += project_html(project, languages_to_extract)
html += "</body>"
html += "</html>"
return html
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="elementary - State of translations generator")
parser.add_argument("-l", help="If specified, will only extract those languages. e.g. [-l sv -l en_GB] to extract Swedish and English (United Kingdom)", required=False, dest="language", action="append")
args = parser.parse_args()
if args.language is not None:
languages_to_extract = args.language
else:
languages_to_extract = []
print "Finding projects..."
finder = ProjectsFinder()
projects_found = finder.find_projects()
projects = []
print "Parsing projects (this might take a while)..."
project_parser = ProjectParser()
for project in projects_found:
projects.append(project_parser.parse(project))
print "Generating html..."
html = generate_html(projects, languages_to_extract)
print "Writing to file..."
f = open("index.htm", "w")
f.write(html)
print "Done!"
Follow ups