Data Visualization using PlotDevice
PlotDevice is an iOS-based application that lets you write Python scripts to generate 2D graphics using simple drawing commands. In this book I used it to create some of the chapter covers. The codes are listed as follows:
1. Introduction
1.1 Beautiful Data and Human Behavior
size(1000,200)
background('#1f2838')
def flower(x,y):
clr = color(0.1,random(.5,1),random(.5))
with fill(clr),stroke(.6),pen(1):
with transform(CORNER):
translate(x,y)
r = random(10)+5
for i in range(10):
rotate(36)
line(0,0,15,15)
oval(10,10,r,r)
for i in range(40):
flower(random(WIDTH),random(HEIGHT))
1.2 Python for Data Analysis
from matplotlib.finance import quotes_historical_yahoo
size(1000,200)
background(0, 0.1171875, 0.234375)
color(HSV)
names = ['EMES','TSLA','FLT','FRF','SAN',
'MTNOY','XLF','VIG','C','EUM']
date1 = (2010, 1, 1)
date2 = (2014, 7, 1)
Q = {}
C = {}
for i in names:
n = names.index(i)
quotes = quotes_historical_yahoo(i, date1, date2)
close = [j[-2] for j in quotes]
points = [[k,HEIGHT+20-close[k]] for k in range(len(close))]
Q[i] = points
C[i] = stroke(0.12, (n/10.)**0.5, (n/10.)**0.5)
for i in names:
n = names.index(i)
#with nofill(),stroke(1),pen(2),transform(CORNER):
with nofill(),stroke(1),pen(2),transform(CORNER):
rotate(3*n)
bezier(Q[i], stroke=C[i])
fill(C[i]),font("Helvetica"),fontsize(n*3+5)
text(i, Q[i][0].x+n*70, Q[i][0].y-n*27)
2. Data Collection
2.1 Connecting to Twitter API
import numpy as np
size(1000,200)
background('#1f2838')
t = {}
with open('/Users/csid/Desktop/tweets.txt','rb') as f:
for i in f:
author,content,day,time = i.strip().split('\t')
t[author] = [content,day,time]
# fans
d=50
with bezier(100-d, 20, fill=None, stroke=(0.9,random(.5,1),random(.5))) as path1:
curveto(600-d, 0, 400-d, 250, 900-d, 180)
for i in range(len(t)):
fansN = t.values()[i][-1]
author = t.keys()[i]
pt = path1.point(0.1*i)
clr = color(0.9,random(.5,1),random(.5),1-float(i)/len(t))
fill(clr)
stroke(.9)
k=1.2
oval(pt.x-2, pt.y-2, np.log(float(fansN))**k, np.log(float(fansN))**k)
align(RIGHT)
font("Helvetica",8)
text(fansN,pt.x-10, pt.y+10)
align(LEFT)
#font("Zapfino")
fill('#f96f00',1-float(i)/len(t))
text(author,pt.x-2+d, pt.y+2,font=15*(1-float(i)/len(t)))
with bezier(100+d, 20, fill=None, stroke=(0.9,random(.5,1),random(.5))) as path1:
curveto(600+d, 0, 400+d, 220, 900+d, 170)
2.2 Scraping Articles from The Washington Post
size(1000,200)
background(.15, .1, .1)
t = read('/Users/csid/Desktop/blog.txt')
w = t.split(' ')
l = ['problem','Clinton', 'forces', \
'defend', 'interrogation','Afghanistan']
#text
x, y = 0, 10
for i in w:
if i in l:
continue
if x > WIDTH:
x = 0
y += 20
fill(.1, random(.5), random(.5,1.))
font(14)
text(i, x, y)
x += len(i)*9
#highlight text and box
x, y = 0, 10
for i in w:
if x > WIDTH:
x = 0
y += 20
if i in l:
#with nofill(),stroke('#edfdff'):
# rect(x-10,y-25,(2000*len(i))**0.5,35,roundness=0.25)
fill('#edfdff')
font(25)
text(i, x, y)
x += len(i)*9
2.3 Processing the Dataset of Stack Exchange
import numpy as np
size(1000,200)
background(.15,.15,.25)
t = {}
with open('/Users/csid/Desktop/SEsites.txt','rb') as f:
for i in f:
site,age,nq,na = i.strip().split('\t')
t[site] = map(float,[age,nq,na])
names = ['stackoverflow.com','math.stackexchange.com']
#circles
for j in t:
age,nq,na = t[j]
h = np.log(nq+na)**1.9-20
rq = np.log(nq)**0.9
ra = np.log(na)**1.3
n = 2.2
stroke(1,0.5)
with pen(1):
line(WIDTH-age/n,195,WIDTH-age/n,180-h+2*ra+2)
with fill(0.2, random(.5), random(.5),0.8):
arc(WIDTH-age/n,180-h+ra,ra)
with fill(.15,.15,.25):
arc(WIDTH-age/n,180-h+ra,rq)
#labels
for j in names:
age,nq,na = t[j]
h = np.log(nq+na)**1.9-20
rq = np.log(nq)**0.9
ra = np.log(na)**1.3
n = 2.2
j = j.split('.')[0]
fill('#adaff2')
font("Helvetica",20)
text(j,WIDTH-age/n+20,180-h+ra)
2.4 Retrieving Raw Data from Figures
The above figure was not created by PlotDevice.
3. Data Analysis
3.1 Clustering Countries by Constitutions
The above figure was not created by PlotDevice.
3.2 Determining Influential Papers in Citation Networks
import numpy as np
from collections import Counter
size(1000,200)
background(0,.1,.2)
e = []
with open('/Users/csid/Desktop/citations.txt','rb') as f:
for i in f:
x,y = i.strip().split('\t')
e.append([int(x),int(y)])
k = Counter([i for j in e for i in j])
l = len(k)
u = WIDTH/sum(k.values())
x = 0;y=180
pos = {}
for n in range(l):
i = k.keys()[n]
kn = k[i]
x += kn*u
pos[i] = [x,y]
for i in pos:
with fill(.9, random(.5,1), random(.5,1.),0.7),stroke(.8):
arc(pos[i][0],pos[i][1],k[i]*2)
for i,j in e:
x1,y1 = pos[i]
x2,y2 = pos[j]
if x1 < x2:
sx,sy = x1,y1
ex,ey = x2,y2
else:
sx,sy = x2,y2
ex,ey = x1,y1
d = (ex-sx)/3
h = d/1.4
with nofill(),stroke(.3, random(.5,1), random(.5,1.)),pen(30/d):
with bezier(sx,sy) as path:
curveto(sx+d,y-h,sx+2*d,y-h,ex,ey)
3.3 Measuring the Difficulty of Questions in Q&A sites
import numpy as np
from scipy.stats import norm
size(1000,200)
background(.05, .1, .15)
mu=WIDTH/2;sigma=250
x = np.linspace(mu-3*sigma, mu+3*sigma, 100)
y = np.array(norm.pdf(x, mu, sigma))
noise = []
for i in range(len(y)):
if random()>0.85:
noise.append(random(7))
else:
noise.append(0)
y = HEIGHT-y/min(y) + np.array(noise)
points = zip(x,y)
with nofill(),stroke(0.5),pen(1):
with transform(CORNER):
translate(-150,20)
for i in range(17):
rotate(0.6)
stroke(0.2, random(0.5,1.0), random(0.5,1.0))
bezier(points)
3.4 Discovering the Hidden Structure of Global Remittance Flows
import numpy as np
size(1000,200)
background(.1, .1, .15)
s = 100
e = []
with open('/Users/csid/Desktop/moneytree.txt','rb') as f:
for i in f:
x1,y1,x2,y2,w = map(float,i.strip().split('\t'))
e.append([x1*s,y1*s/2,x2*s,y2*s/2,w])
for x1,y1,x2,y2,w in e:
if x1 < x2:
sx,sy = x1,y1
ex,ey = x2,y2
else:
sx,sy = x2,y2
ex,ey = x1,y1
d = (ex-sx)/3
with nofill(),stroke(.3, random(.5,1), \
random(.5,1.),0.2),pen(np.log(w+1)/10):
with bezier(sx,sy) as path:
curveto(sx+d,sy, sx+2*d,ey, ex,ey)
with stroke(0.8),fill(.9, random(.5,1), random(.5,1.),0.6):
r = np.log(w+1)/3
arc(sx,sy,r)
arc(ex,ey,r)
3.5 Modeling the Growth of Cities Using Satellite Images
The above figure was not created by PlotDevice.
4. Data Visualization
4.1 Networks
The above figure was not created by PlotDevice.
4.2 Text
The above figure was not created by PlotDevice.
4.3 Maps
size(1000,200)
#size(1280,1280)
image("/Users/csid/Desktop/tempe.png", 0, 0)
t = {}
with open('/Users/csid/Desktop/tempeRest.text','rb') as f:
for i in f:
name,x,y,score = i.strip().split('\t')
t[name] = map(float,[x,y,score])
names = [i[0] for i in sorted(t.items(),key=lambda x:x[1][2])]
for i in names:
x,y,score = t[i]
stroke(.8)
fill(0.6, 1-(score/5.)**5, 1-(score/5.)**5,0.9)
arc(x,HEIGHT-y,score**1.6)
for i in names[:3]:
x,y,score = t[i]
fill('#f96f00')
font("Zapfino",23)
text(i,x,HEIGHT-y)
4.4 Data Visualization using PlotDevice
size(1000, 200)
background(.1,.1,.2)
data = '123456789xyz'
with fill(1),stroke(.2),font('Zapfino'):
for i in range(50):
font(size=random(300))
rotate(random(3600))
x,y = random(WIDTH),random(2*HEIGHT)
char = choice(data)
text(char,x,y,outline=True)