R
- 1 Dictionaries in R
- 2 Rounding in R
- 3 Adding two random variables via convolution in R
- 4 Estimating parameters of Log Pearson III distribution in R
- 5 Interactive zooming with basic plotting functions
- 6 Reduce PDF file size of plots by filtering hidden objects
- 7 Data thinning
- 8 Ramer-Douglas-Peucker algorithm for simplifying a polyline
- 9 Moving data to/from the clipboard
- 10 lty chart
- 11 pch chart
- 12 Plot margins
- 13 Tutorials
1 Dictionaries in R
# https://stackoverflow.com/a/62700699
x = data.frame(row.names=c("Hi", "Why", "water"), val=c(1, 5, 4))
x["Why",]
[1] 5
2 Rounding in R
# http://andland.github.io/blog/2012/06/15/rounding-in-r
# rounding in commerce
cround <- function(x, digits=0) {
vorz <- sign(x)
z <- abs(x)*10^digits
z <- z+0.5
z <- trunc(z)
z <- z/10^digits
z*vorz
}
3 Adding two random variables via convolution in R
f.X <- function(x) dnorm(x,1,0.5) # normal (mu=1.5, sigma=0.5)
f.Y <- function(y) dlnorm(y,1.5, 0.75) # log-normal (mu=1.5, sigma=0.75)
# convolution integral
f.Z <- function(z) integrate(function(x,z) f.Y(z-x)*f.X(x),-Inf,Inf,z)$value
f.Z <- Vectorize(f.Z) # need to vectorize the resulting fn.
set.seed(1) # for reproducible example
X <- rnorm(1000,1,0.5)
Y <- rlnorm(1000,1.5,0.75)
Z <- X + Y
# compare the methods
hist(Z,freq=F,breaks=50, xlim=c(0,30))
z <- seq(0,50,0.01)
lines(z,f.Z(z),lty=2,col="red")
4 Estimating parameters of Log Pearson III distribution in R
The probability density function of the Pearson Type III distribution is defined by \[f(x)=\frac{1}{\lvert a\rvert\Gamma(b)}\left(\frac{x-c}{a}\right)^{b-1}\exp\left(-\frac{x-c}{a}\right)\] where \(a\), \(b\), and \(c\) are the scale, shape, and location parameters, respectively. The mean, variance, and coefficient of skewness are \[\mu=c+ba\qquad\sigma^2=ba^2\qquad\gamma=\frac{2a}{\lvert a\rvert\sqrt{b}},\] respectively.
dp3 <- function(x, a, b, c) dgamma(sign(a)*(x-c), b, 1/abs(a))
pp3 <- function(q, a, b, c) pgamma(sign(a)*(q-c), b, 1/abs(a), lower.tail=a>=0)
qp3 <- function(p, a, b, c) sign(a)*qgamma(p, b, 1/abs(a), lower.tail=a>=0)+c
rp3 <- function(n, a, b, c) sign(a)*rgamma(n, b, 1/abs(a))+c
There are some mistakes in this article. Please use the above code.
rm(list=ls())
# Simulation from a log-Pearson III
set.seed(1000)
rlogpearson <- function(n,a,b,c) return( exp(rgamma(n,shape=a,rate=b) - c) )
data<- rlogpearson(1000,3,3,5)
hist(data)
# Transformation of the data to obtain a shifted gamma
datat <- log(data)
# - log-likelihoood
ll <- function(par){
if(par[1]>0 & par[2]>0 & par[3]> -min(datat)) return( -sum(dgamma(datat+par[3],shape=par[1],rate=par[2],log=TRUE)) )
else return(Inf)
}
# optimisation step
optim(c(3,3,5),ll)
# MLE
optim(c(3,3,5),ll)$par
qlogpearson <- function(p,a,b,c) return( exp(qgamma(p,shape=a,rate=b) - c) )
qlogpearson(0.5,3,3,5)
param <- optim(c(3,3,5),ll)$par
qlogpearson(0.5,param[1],param[2],param[3])
samp<- rlogpearson(10000,param[1],param[2],param[3])
quantile(samp,0.5)
plogpearson <- function(x,a,b,c) return(pgamma(log(x)+c,shape=a,rate=b))
5 Interactive zooming with basic plotting functions
7 Data thinning
Handy little snippet of R code for thinning occurrence data thin.max.R
# Original Source: https://gist.github.com/danlwarren/271288d5bab45d2da549#file-thin-max-r-L39
#
# Function to rarefy point data in any number of dimensions. The goal here is to
# take a large data set and reduce it in size in such a way as to approximately maximize the
# difference between points. For instance, if you have 2000 points but suspect a lot of
# spatial autocorrelation between them, you can pass in your data frame, the names (or indices)
# of the lat/lon columns, and the number 200, and you get back 200 points from your original data
# set that are chosen to be as different from each other as possible given a randomly chosen
# starting point
# Input is:
#
# x, a data frame containing the columns to be used to calculate distances along with whatever other data you need
# cols, a vector of column names or indices to use for calculating distances
# npoints, the number of rarefied points to spit out
#
# e.g., thin.max(my.data, c("latitude", "longitude"), 200)
thin.max <- function(x, cols, npoints){
#Create empty vector for output
inds <- vector(mode="numeric")
#Create distance matrix
this.dist <- as.matrix(dist(x[,cols], upper=TRUE))
#Draw first index at random
inds <- c(inds, as.integer(runif(1, 1, length(this.dist[,1]))))
#Get second index from maximally distant point from first one
#Necessary because apply needs at least two columns or it'll barf
#in the next bit
inds <- c(inds, which.max(this.dist[,inds]))
while(length(inds) < npoints){
#For each point, find its distance to the closest point that's already been selected
min.dists <- apply(this.dist[,inds], 1, min)
#Select the point that is furthest from everything we've already selected
this.ind <- which.max(min.dists)
#Get rid of ties, if they exist
if(length(this.ind) > 1){
print("Breaking tie...")
this.ind <- this.ind[1]
}
inds <- c(inds, this.ind)
}
return(x[inds,])
}
8 Ramer-Douglas-Peucker algorithm for simplifying a polyline
9 Moving data to/from the clipboard
http://www.johndcook.com/blog/r_excel_clipboard/
writeClipboard(x)
x <- readClipboard()
x <- scan()
write.table(x, "clipboard", sep="\t", row.names=FALSE)
x <- read.table("clipboard")
# Paste a matrix from the clipboard into R.
paste.matrix <- function() as.matrix(read.table("clipboard", sep="\t"))
# Copy a matrix from R into the clipboard.
copy.matrix <- function(x) write.table(x, "clipboard", sep="\t", col.names=FALSE, row.names=FALSE)
10 lty chart
11 pch chart
http://statisticstoproveanything.blogspot.com/2010/09/charts-of-different-pch-values-in-r.html
12 Plot margins
Clockwise starting from the bottom.
http://research.stowers.org/mcm/efg/R/Graphics/Basics/mar-oma