← Back to team overview

fenics team mailing list archive

Download statistics

 

I have added download statistics to fenics.org:

    http://www.fenics.org/statistics/downloads.html

The statistics is collected as unique pairs of (file name, IP number)
and is broken down into different packages and month of download.

There are some interesting trends:

1. Most downloads are "on the diagonal", meaning that most people
download the most current version of the current month.

2. The number of downloads of old versions is nonzero.

3. The downloads of Puffin spiked in November, probably due to a
course given by Johan Hoffman at KTH.

4. In total, we have around 200-300 downloads/month with 2500
downloads since February. (But we release often and there are multiple
packages, which helps boost the numbers.)

Take a look and see if you can spot any errors or want to add
something (see attached script).

/Anders
#!/usr/bin/python
#
# Copyright (C) 2005 Anders Logg.
# Licensed under the GNU GPL Version 2.
#
# Parse Apache log file and generate a summary of
# downloads in HTML format.
#
# First created: 2005-12-07

import string

logfile = open("fenics-access.log")
outfile = open("dtrack.html", "w")

suffixes = (".tar.gz", ".tgz")
names = ("FIAT", "DOLFIN", "FFC", "Ko", "Puffin")
ignores = ("berakningsmatematik.tar.gz")

def mapname(package):
    "Map package names."
    for name in names:
        if string.lower(package) == string.lower(name):
            return name
    return package

# First extract all downloads
print "Extracting downloads..."
downloads = []
dates = []
counted = set()
while 1:

    # Read next line
    line = logfile.readline()
    if not line: break
    
    # Get date
    date = line.split("[")[1].split("]")[0]
    month = date.split("/")[1]
    year = date.split("/")[2].split(":")[0]
    date = month + " " + year
    if not date in dates:
        print "  Adding " + str(date)
        dates += [date]

    # Look for suffixes
    for suffix in suffixes:
        if "GET" in line and suffix in line:
            filename = line.split("GET ")[1].split(" ")[0].split("/")[-1]
            ipnumber = line.split(" ")[0]
            if not (filename, ipnumber) in counted and not filename in ignores:
                downloads += [(filename, ipnumber, date)]
                counted.add((filename, ipnumber))

# Count package downloads
print "Generating statistics..."
stats = {}
for (filename, ipnumber, date) in downloads:
    package = mapname(filename.split("-")[0])
    if package in stats:
        if (filename, date) in stats[package]:
            stats[package][(filename, date)] += 1
        else:
            stats[package][(filename, date)] = 1
    else:
        print "  Adding statistics for package " + package
        stats[package] = {}
        stats[package][(filename, date)] = 1

# Sort package names
packages = [package for package in stats]
packages.sort()

# Summarize downloads for packages
datesums = {}
totals = {}
total = 0
packagesums = {}
for date in dates:
    packagesums[date] = 0
for package in packages:
    datesums[package] = {}
    totals[package] = 0
    for date in dates:
        datesums[package][date] = 0
    for (filename, date) in stats[package]:
        downloads = stats[package][(filename, date)]
        datesums[package][date] += downloads
        totals[package] += downloads
        packagesums[date] += downloads
        total += downloads

# Print summary
outfile.write("<h2>Summary for all projects</h2><p>\n\n")
outfile.write("<center>\n")
outfile.write("<table border=\"1\">\n")
outfile.write("<tr><td align=\"left\"> Project </td><td align=\"center\"> ")
outfile.write(" </td><td align=\"center\"> ".join(dates))
outfile.write(" </td><td> Total </td></tr>\n")
for package in packages:
    outfile.write("<tr><td align=\"left\"> <b>" + package + "</b> </td><td align=\"center\"> ")
    outfile.write(" </td><td align=\"center\"> ".join([str(datesums[package][date]) for date in dates]))
    outfile.write(" </td><td align=\"center\"> <b>" + str(totals[package]) + "</b> </td></tr>\n")
outfile.write("<tr><td align=\"left\"> Total </td><td align=\"center\"> ")
outfile.write(" </td><td align=\"center\"> ".join([str(packagesums[date]) for date in dates]))
outfile.write(" </td><td align=\"center\"> <b>" + str(total) + "</b> </td></tr>\n")
outfile.write("</table>\n")
outfile.write("</center>\n\n")

# Print detailed stats
for package in packages:

    packagestats = stats[package]

    # Find all file names and sort them
    filenames = []
    for (filename, date) in packagestats:
        if filename not in filenames:
            filenames += [filename]
    filenames.sort()

    # Compute sum of over file names
    filenamesums = {}
    for (filename, date) in packagestats:
        if filename in filenamesums:
            filenamesums[filename] += packagestats[(filename, date)]
        else:            
            filenamesums[filename] = packagestats[(filename, date)]

    # Compute total sum
    packagetotal = sum([filenamesums[filename] for filename in filenamesums])
    assert(packagetotal == totals[package])

    # Write HTML table    
    outfile.write("<h2>Downloads for %s</h2><p>\n\n" % mapname(package))
    outfile.write("<center>\n")
    outfile.write("<table border=\"1\">\n")
    outfile.write("<tr><td align=\"left\"> Filename </td><td align=\"center\"> ")
    outfile.write(" </td><td align=\"center\"> ".join(dates))
    outfile.write(" </td><td> Total </td></tr>\n")
    for filename in filenames:
        outfile.write("<tr><td align=\"left\"> " + filename + " </td><td align=\"center\"> ")
        counts = []
        for date in dates:
            if (filename, date) in packagestats:
                counts += [str(packagestats[(filename, date)])]
            else:
                counts += ["0"]
        outfile.write(" </td><td align=\"center\"> ".join(counts))
        outfile.write(" </td><td align=\"center\"> " + str(filenamesums[filename]) + " </td></tr>\n")
    outfile.write("<tr><td align=\"left\"> Total </td><td align=\"center\"> ")
    outfile.write(" </td><td align=\"center\"> ".join([str(datesums[package][date]) for date in dates]))
    outfile.write(" </td><td align=\"center\"> <b>" + str(packagetotal) + "</b> </td></tr>\n")
    outfile.write("</table>\n")
    outfile.write("</center>\n\n")