Machine Learning - Data visualization with R (II)
This article continues presenting different techniques that can be used to communicate data or information by encoding it in graphs.
Scatterplot Matrix
Scatter plots show many points plotted in a Cartesian plane. Each point represents a set of coordinates. A Scatterplot matrix shows a scatter plot for each pair of variables.
# load required packages.
install.packages("ggplot2");library("ggplot2");
install.packages("ggExtra");library("ggExtra");
install.packages("gclus");library("gclus");
install.packages("car");library("car");
install.packages("hexbin");library("hexbin");
install.packages("latticeExtra");library("latticeExtra");
install.packages("rgl");library("rgl");
# 4 quantitative variables.
pairs(~mpg+disp+drat+wt,data=mtcars, main="Simple Scatterplot Matrix")
# 4 quantitative variables and 1 categorical variable.
scatterplot.matrix(~mpg+disp+drat+wt|cyl, data=mtcars,
main="Three Cylinder Options")
# 4 quantitative variables.
dta <- mtcars[c(1,3,5,6)]
dta.r <- abs(cor(dta))
dta.col <- dmat.color(dta.r, cm.colors(10))
dta.o <- order.single(dta.r)
cpairs(dta, dta.o, panel.colors=dta.col, gap=.5,
main="Variables Ordered and Colored by Correlation" )
Simple Scatter Plots
# 2 quantitative variables.
ggplot(mtcars, aes(x=wt, y=mpg))+geom_point()
# 3 quantitative variables.
ggplot(mtcars, aes(x=wt, y=mpg, size=cyl)) +
geom_point(shape=21, fill="red")
# 2 quantitative variables and 1, 2 or 3 categorical variables.
ggplot(mtcars,
aes(x=wt, y=mpg,
color=as.factor(mtcars$cyl),
fill=as.factor(mtcars$cyl),
shape=as.factor(mtcars$cyl))) +
geom_point()
# 3 quantitative variables and 1, 2 or 3 categorical variables.
ggplot(mtcars,
aes(x=wt, y=mpg, size=hp,
color=as.factor(mtcars$cyl),
fill=as.factor(mtcars$cyl),
shape=as.factor(mtcars$cyl))) +
geom_point()
More 2D Scatter Plots
# Scatter plots with smooth lines.
ggplot(mtcars,
aes(x=wt, y=mpg,
color=as.factor(mtcars$cyl),
fill=as.factor(mtcars$cyl),
shape=as.factor(mtcars$cyl))) +
geom_point()+
geom_smooth(method=lm, se=TRUE)+
geom_rug()
# Scatter plots with normal confidence interval ellipses.
ggplot(mtcars,
aes(x=wt, y=mpg,
color=as.factor(mtcars$cyl),
fill=as.factor(mtcars$cyl),
shape=as.factor(mtcars$cyl))) +
geom_point()+
stat_ellipse(type="norm",level=0.9)
# Density courves
ggplot(mtcars, aes(x=wt, y=mpg, colour=as.factor(mtcars$cyl))) +
stat_density2d()
# Density bins
ggplot(mtcars, aes(x=wt, y=mpg)) +
geom_bin2d(bins=10)
# Density hexagons
ggplot(mtcars, aes(x=wt, y=mpg)) +
stat_binhex(bins=10)
# Scatter plot with level plot
levelplot(cyl~wt*mpg, mtcars,
panel = panel.levelplot.points, cex = 1) +
layer_(panel.2dsmoother(..., n = 200))
3D Scatter Plots
In 3D scatter plots, each point represents tree coordinates.
# 3 quantitative variables
scatter3d(
x=iris$Sepal.Length,y=iris$Sepal.Width,z=iris$Petal.Length,
surface=FALSE,
xlab="SL",ylab="SW",zlab="PL")
# 3 quantitative variables and 1 categorical variable
scatter3d(
x=iris$Sepal.Length,y=iris$Sepal.Width,z=iris$Petal.Length,
surface=FALSE,
xlab="SL",ylab="SW",zlab="PL")
# 3 quantitative variables with tendency plane.
scatter3d(x=iris$Sepal.Length, y=iris$Sepal.Width,z=iris$Petal.Length,
xlab="SL",ylab="SW",zlab="PL")
# 3 quantitative variables and 1 categorical variable with confidence interval ellipsoid.
scatter3d(x=iris$Sepal.Length, y=iris$Sepal.Width,
z=iris$Petal.Length,
groups=iris$Species, surface=FALSE, grid=FALSE, ellipsoid=TRUE,
surface.col=brewer.pal(n=3, name="Set1"),
xlab="SL",ylab="SW",zlab="PL")
To be continued...
Share your experience and provide feedback!