#!/usr/bin/env python
"""
Generate reports from raw comScore market-share data.
Requires that you have gnuplot installed.
-s = tabulate or plot marketshare trends
-u = tabulate or plot userbase trends
-d = tabulate or plot changes in userbase by month
-w = generate HTML table to stdout; without this, make a plot to a file
-t = generate text table to stdout
-T = include totals column
-n = suppress deletion of generated data file
The raw data is assumed to be in comscore.dat.
"""
import os, sys, getopt, tempfile, copy
class comScore:
def __init__(self, data):
self.data = data
#
# Framework code
#
def arithmetize(self):
"Turn data to numeric, excluding top row, left column, and - entries."
for i in range(1, len(self.data)):
for j in range(1, len(self.data[0])):
if self.data[i][j] != '-':
self.data[i][j] = float(self.data[i][j])
def unarithmetize(self):
"Turn numeric table data back to strings."
w = len(self.data[0])
d = len(self.data)
for i in range(1, d):
for j in range(1, w):
if self.data[i][j] != '-':
self.data[i][j] = "%.2f" % self.data[i][j]
def emit(self):
"Ship transformed self.data to a file for plotting."
(h, name) = tempfile.mkstemp()
ofp = open(name, "w")
d = len(self.data)
for i in range(d):
ofp.write("\t".join(self.data[i]) + "\n")
ofp.close()
return name
def textize(self, ofp=sys.stdout):
"Dump data as a tab-separated-values file."
for i in range(len(self.data)):
ofp.write("\t".join(self.data[i]) + "\n")
def webize(self, ofp=sys.stdout):
"Generate a table suitable for web display from specified self.data."
d = len(self.data)
w = len(self.data[0])
for i in range(d):
self.data[i][0] = self.data[i][0][:3] + " " + self.data[i][0][3:]
ofp.write("
\n")
for j in range(w):
ofp.write("")
for i in range(d):
ofp.write("" + self.data[i][j] + " | ")
ofp.write("
\n")
ofp.write("
\n")
def lastmonth(self):
"Return last month for which report is valid."
return self.data[len(self.data)-1][0]
def select(self, platform):
"Select out data for a single platform."
i = self.data[0].index(platform)
for j in range(len(self.data)):
self.data[j] = [self.data[j][0], self.data[j][i]]
self.data.pop(0)
#
# Data reduction
#
def usercount(self):
"Multiply market shares by smartphone userbase size (last column)."
w = len(self.data[0])
d = len(self.data)
self.data[0][w-1] = "Total"
for i in range(1, d):
for j in range(1, w-1):
if self.data[i][j] != '-':
self.data[i][j] *= self.data[i][w-1]
self.data[i][j] /= 100.0
def deltas(self):
"Turn self.data into a differences table."
w = len(self.data[0])
d = len(self.data)
differences = copy.deepcopy(self.data)
for i in range(2, d):
for j in range(1, w):
if self.data[i][j] == '-' or self.data[i-1][j] == '-':
differences [i][j] = '-'
else:
differences [i][j] = self.data[i][j] - self.data[i-1][j]
# Remove first row, for which there is no corresponding delta.
self.data = differences[:1] + differences[2:]
def coreplot(n):
plot = """
set terminal png nocrop enhanced
set key outside right top vertical Right noreverse noenhanced autotitles nobox
set datafile missing '-'
set style data linespoints
set xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0
set xtics norangelimit
set xtics ()
plot '%(input)s' using 2:xtic(1) title columnheader(2), \
"""
for i in range(3, n):
plot += " '' using %d:xtic(1) title columnheader(%d), " % (i, i)
plot += " '' using %d:xtic(1) title columnheader(%d)" % (n, n)
return plot
predictive = """
set terminal png nocrop enhanced
set datafile missing '-'
set style data points
set xtics border in scale 1,0.5 nomirror rotate by -45 offset character 0, 0, 0
set xtics norangelimit
set xtics ()
set xdata time
set timefmt '%%b%%Y'
set xtics format '%%b%%Y'
unset key
set grid
offset=10*365*24*60*60
f(x)=1.e-7*m*(x-offset)+b
fit f(x) '%(input)s' using 1:2 via m,b
plot '%(input)s' using 1:2, f(x)
"""
def gnuplot(inputname, plot):
"Generate a derived plot."
plot = plot % {"input" : inputname}
print plot
ofp = os.popen("gnuplot -", "w")
ofp.write(plot)
ofp.close()
def grab(filename):
"Grab the contents of a data file."
lines = []
for line in open(filename):
if line[0] != '#':
lines.append(line.strip().split("\t"))
return lines
if __name__ == '__main__':
(options, arguments) = getopt.getopt(sys.argv[1:], "suwdtmp:Tn")
plotprefix = ""
tabulate = False
textdump = False
datedump = False
share = False
user = False
deltas = False
total = False
predict = None
remove = True
basedata = grab('comscore.dat')
info = comScore(basedata)
for (opt, val) in options:
if opt == '-s':
share = True
elif opt == '-u':
user = True
elif opt == '-d':
deltas = True
elif opt == '-w':
tabulate = True
elif opt == '-t':
textdump = True
elif opt == '-m':
datedump = True
elif opt == '-p':
predict = val
elif opt == '-n':
remove = False
elif opt == '-T':
total = True
if user:
title = "Userbase by platform, "
else:
title = "Market-share per platform, "
if deltas:
title += "change per month, "
if user:
yformat = "set format y '%%.2fM'\n"
title += "units of 1M users."
else:
yformat = "set format y '%%.0f%%%%'\n"
title += "units of 1%%."
if user or deltas:
info.arithmetize()
if user:
info.usercount()
if deltas:
info.deltas()
info.unarithmetize()
if tabulate:
info.webize()
elif textdump:
info.textize()
elif datedump:
sys.stdout.write(info.lastmonth())
elif predict:
info.select(predict)
title = 'set title "%s: %s (prediction)"\n' % (predict, title)
table = info.emit()
gnuplot(table, title + yformat + predictive)
if remove:
os.remove(table)
else:
print >>sys.stderr, "Table at", table
elif total:
title = "set title '%s'\n" % title
table = info.emit()
gnuplot(table, title + yformat + coreplot(6))
if remove:
os.remove(table)
else:
print >>sys.stderr, "Table at", table
else:
title = "set title '%s'\n" % title
table = info.emit()
gnuplot(table, title + yformat + coreplot(5))
if remove:
os.remove(table)
else:
print >>sys.stderr, "Table at", table
# End