Data Visualization using PlotDevice

PlotDevice is an iOS-based application that lets you write Python scripts to generate 2D graphics using simple drawing commands. In this book I used it to create some of the chapter covers. The codes are listed as follows:

1. Introduction

1.1 Beautiful Data and Human Behavior

size(1000,200)
background('#1f2838')

def flower(x,y):
    clr = color(0.1,random(.5,1),random(.5))
    with fill(clr),stroke(.6),pen(1):
        with transform(CORNER):
            translate(x,y)
            r = random(10)+5
            for i in range(10):
                rotate(36)
                line(0,0,15,15)
                oval(10,10,r,r)

for i in range(40):
    flower(random(WIDTH),random(HEIGHT))

1.2 Python for Data Analysis

from matplotlib.finance import quotes_historical_yahoo


size(1000,200)
background(0, 0.1171875, 0.234375)
color(HSV)

names = ['EMES','TSLA','FLT','FRF','SAN',
'MTNOY','XLF','VIG','C','EUM']
date1 = (2010, 1, 1)
date2 = (2014, 7, 1)


Q = {}
C = {}
for i in names:
    n = names.index(i)
    quotes = quotes_historical_yahoo(i, date1, date2)
    close = [j[-2] for j in quotes]
    points = [[k,HEIGHT+20-close[k]] for k in range(len(close))]
    Q[i] = points
    C[i] = stroke(0.12, (n/10.)**0.5, (n/10.)**0.5)

for i in names:
    n = names.index(i)
    #with nofill(),stroke(1),pen(2),transform(CORNER):
    with nofill(),stroke(1),pen(2),transform(CORNER):
        rotate(3*n)
        bezier(Q[i], stroke=C[i])
    fill(C[i]),font("Helvetica"),fontsize(n*3+5)
    text(i, Q[i][0].x+n*70, Q[i][0].y-n*27)

2. Data Collection

2.1 Connecting to Twitter API

import numpy as np

size(1000,200)
background('#1f2838')

t = {}
with open('/Users/csid/Desktop/tweets.txt','rb') as f:
    for i in f:
        author,content,day,time = i.strip().split('\t')
        t[author] = [content,day,time]

# fans
d=50
with bezier(100-d, 20, fill=None, stroke=(0.9,random(.5,1),random(.5))) as path1:
    curveto(600-d, 0, 400-d, 250, 900-d, 180)
for i in range(len(t)):
    fansN = t.values()[i][-1]
    author = t.keys()[i]
    pt = path1.point(0.1*i)
    clr = color(0.9,random(.5,1),random(.5),1-float(i)/len(t))
    fill(clr)
    stroke(.9)
    k=1.2
    oval(pt.x-2, pt.y-2, np.log(float(fansN))**k, np.log(float(fansN))**k)
    align(RIGHT)
    font("Helvetica",8)
    text(fansN,pt.x-10, pt.y+10)
    align(LEFT)
    #font("Zapfino")
    fill('#f96f00',1-float(i)/len(t))
    text(author,pt.x-2+d, pt.y+2,font=15*(1-float(i)/len(t)))

with bezier(100+d, 20, fill=None, stroke=(0.9,random(.5,1),random(.5))) as path1:
    curveto(600+d, 0, 400+d, 220, 900+d, 170)

2.2 Scraping Articles from The Washington Post

size(1000,200)
background(.15, .1, .1)


t = read('/Users/csid/Desktop/blog.txt')
w = t.split(' ')
l = ['problem','Clinton', 'forces', \
'defend', 'interrogation','Afghanistan']


#text
x, y = 0, 10
for i in w:
    if i in l:
        continue
    if x > WIDTH:
        x = 0
        y += 20
    fill(.1, random(.5), random(.5,1.))
    font(14)
    text(i, x, y)
    x += len(i)*9

#highlight text and box
x, y = 0, 10
for i in w:
    if x > WIDTH:
        x = 0
        y += 20
    if i in l:
        #with nofill(),stroke('#edfdff'):
        #    rect(x-10,y-25,(2000*len(i))**0.5,35,roundness=0.25)
        fill('#edfdff')
        font(25)
        text(i, x, y)
    x += len(i)*9

2.3 Processing the Dataset of Stack Exchange

import numpy as np

size(1000,200)
background(.15,.15,.25)

t = {}
with open('/Users/csid/Desktop/SEsites.txt','rb') as f:
    for i in f:
        site,age,nq,na = i.strip().split('\t')
        t[site] = map(float,[age,nq,na])

names = ['stackoverflow.com','math.stackexchange.com']

#circles
for j in t:
    age,nq,na = t[j]
    h = np.log(nq+na)**1.9-20
    rq = np.log(nq)**0.9
    ra = np.log(na)**1.3
    n = 2.2
    stroke(1,0.5)
    with pen(1):
        line(WIDTH-age/n,195,WIDTH-age/n,180-h+2*ra+2)
    with fill(0.2, random(.5), random(.5),0.8):
        arc(WIDTH-age/n,180-h+ra,ra)
    with fill(.15,.15,.25):
        arc(WIDTH-age/n,180-h+ra,rq)

#labels
for j in names:
    age,nq,na = t[j]
    h = np.log(nq+na)**1.9-20
    rq = np.log(nq)**0.9
    ra = np.log(na)**1.3
    n = 2.2
    j = j.split('.')[0]
    fill('#adaff2')
    font("Helvetica",20)
    text(j,WIDTH-age/n+20,180-h+ra)

2.4 Retrieving Raw Data from Figures

The above figure was not created by PlotDevice.

3. Data Analysis

3.1 Clustering Countries by Constitutions

The above figure was not created by PlotDevice.

3.2 Determining Influential Papers in Citation Networks

import numpy as np
from collections import Counter

size(1000,200)
background(0,.1,.2)

e = []
with open('/Users/csid/Desktop/citations.txt','rb') as f:
    for i in f:
        x,y = i.strip().split('\t')
        e.append([int(x),int(y)])

k = Counter([i for j in e for i in j])
l = len(k)
u = WIDTH/sum(k.values())

x = 0;y=180
pos = {}
for n in range(l):
    i = k.keys()[n]
    kn = k[i]
    x += kn*u
    pos[i] = [x,y]


for i in pos:
    with fill(.9, random(.5,1), random(.5,1.),0.7),stroke(.8):
        arc(pos[i][0],pos[i][1],k[i]*2)

for i,j in e:
    x1,y1 = pos[i]
    x2,y2 = pos[j]
    if x1 < x2:
        sx,sy = x1,y1
        ex,ey = x2,y2
    else:
        sx,sy = x2,y2
        ex,ey = x1,y1
    d = (ex-sx)/3
    h = d/1.4
    with nofill(),stroke(.3, random(.5,1), random(.5,1.)),pen(30/d):
        with bezier(sx,sy) as path:
            curveto(sx+d,y-h,sx+2*d,y-h,ex,ey)

3.3 Measuring the Difficulty of Questions in Q&A sites

import numpy as np
from scipy.stats import norm

size(1000,200)
background(.05, .1, .15)

mu=WIDTH/2;sigma=250

x = np.linspace(mu-3*sigma, mu+3*sigma, 100)
y = np.array(norm.pdf(x, mu, sigma))
noise = []
for i in range(len(y)):
    if random()>0.85:
        noise.append(random(7))
    else:
        noise.append(0)
y = HEIGHT-y/min(y) + np.array(noise)

points = zip(x,y)

with nofill(),stroke(0.5),pen(1):
    with transform(CORNER):
        translate(-150,20)
        for i in range(17):
            rotate(0.6)
            stroke(0.2, random(0.5,1.0), random(0.5,1.0))
            bezier(points)

3.4 Discovering the Hidden Structure of Global Remittance Flows

import numpy as np

size(1000,200)
background(.1, .1, .15)
s = 100

e = []
with open('/Users/csid/Desktop/moneytree.txt','rb') as f:
    for i in f:
        x1,y1,x2,y2,w = map(float,i.strip().split('\t'))
        e.append([x1*s,y1*s/2,x2*s,y2*s/2,w])

for x1,y1,x2,y2,w in e:
    if x1 < x2:
        sx,sy = x1,y1
        ex,ey = x2,y2
    else:
        sx,sy = x2,y2
        ex,ey = x1,y1
    d = (ex-sx)/3
    with nofill(),stroke(.3, random(.5,1), \
    random(.5,1.),0.2),pen(np.log(w+1)/10):
        with bezier(sx,sy) as path:
            curveto(sx+d,sy,  sx+2*d,ey,  ex,ey)
    with stroke(0.8),fill(.9, random(.5,1), random(.5,1.),0.6):
        r = np.log(w+1)/3
        arc(sx,sy,r)
        arc(ex,ey,r)

3.5 Modeling the Growth of Cities Using Satellite Images

The above figure was not created by PlotDevice.

4. Data Visualization

4.1 Networks

The above figure was not created by PlotDevice.

4.2 Text

The above figure was not created by PlotDevice.

4.3 Maps

size(1000,200)
#size(1280,1280)

image("/Users/csid/Desktop/tempe.png", 0, 0)

t = {}
with open('/Users/csid/Desktop/tempeRest.text','rb') as f:
    for i in f:
        name,x,y,score = i.strip().split('\t')
        t[name] = map(float,[x,y,score])

names = [i[0] for i in sorted(t.items(),key=lambda x:x[1][2])]

for i in names:
    x,y,score = t[i]
    stroke(.8)
    fill(0.6, 1-(score/5.)**5, 1-(score/5.)**5,0.9)
    arc(x,HEIGHT-y,score**1.6)


for i in names[:3]:
    x,y,score = t[i]
    fill('#f96f00')
    font("Zapfino",23)
    text(i,x,HEIGHT-y)

4.4 Data Visualization using PlotDevice

size(1000, 200)
background(.1,.1,.2)

data = '123456789xyz'

with fill(1),stroke(.2),font('Zapfino'):
    for i in range(50):
        font(size=random(300))
        rotate(random(3600))
        x,y = random(WIDTH),random(2*HEIGHT)
        char = choice(data)
        text(char,x,y,outline=True)

results matching ""

    No results matching ""