R

Contents

1 Dictionaries in R
2 Rounding in R
3 Adding two random variables via convolution in R
4 Estimating parameters of Log Pearson III distribution in R
5 Interactive zooming with basic plotting functions
6 Reduce PDF file size of plots by filtering hidden objects
7 Data thinning
8 Ramer-Douglas-Peucker algorithm for simplifying a polyline
9 Moving data to/from the clipboard
10 lty chart
11 pch chart
12 Plot margins
13 Tutorials

3 Adding two random variables via convolution in R

f.X <- function(x) dnorm(x,1,0.5)        # normal (mu=1.5, sigma=0.5)
f.Y <- function(y) dlnorm(y,1.5, 0.75)   # log-normal (mu=1.5, sigma=0.75)
# convolution integral
f.Z <- function(z) integrate(function(x,z) f.Y(z-x)*f.X(x),-Inf,Inf,z)$value
f.Z <- Vectorize(f.Z)                    # need to vectorize the resulting fn.

set.seed(1)                              # for reproducible example
X <- rnorm(1000,1,0.5)
Y <- rlnorm(1000,1.5,0.75)
Z <- X + Y
# compare the methods
hist(Z,freq=F,breaks=50, xlim=c(0,30))
z <- seq(0,50,0.01)
lines(z,f.Z(z),lty=2,col="red")

Edit

4 Estimating parameters of Log Pearson III distribution in R

The probability density function of the Pearson Type III distribution is defined by \[f(x)=\frac{1}{\lvert a\rvert\Gamma(b)}\left(\frac{x-c}{a}\right)^{b-1}\exp\left(-\frac{x-c}{a}\right)\] where \(a\), \(b\), and \(c\) are the scale, shape, and location parameters, respectively. The mean, variance, and coefficient of skewness are \[\mu=c+ba\qquad\sigma^2=ba^2\qquad\gamma=\frac{2a}{\lvert a\rvert\sqrt{b}},\] respectively.

dp3 <- function(x, a, b, c) dgamma(sign(a)*(x-c), b, 1/abs(a))
pp3 <- function(q, a, b, c) pgamma(sign(a)*(q-c), b, 1/abs(a), lower.tail=a>=0)
qp3 <- function(p, a, b, c) sign(a)*qgamma(p, b, 1/abs(a), lower.tail=a>=0)+c
rp3 <- function(n, a, b, c) sign(a)*rgamma(n, b, 1/abs(a))+c

There are some mistakes in this article. Please use the above code.

rm(list=ls())
# Simulation from a log-Pearson III
set.seed(1000)
rlogpearson <- function(n,a,b,c) return( exp(rgamma(n,shape=a,rate=b) - c) )

data<- rlogpearson(1000,3,3,5)

hist(data)

# Transformation of the data to obtain a shifted gamma

datat <- log(data)

# - log-likelihoood

ll <- function(par){
if(par[1]>0 &  par[2]>0 & par[3]> -min(datat)) return( -sum(dgamma(datat+par[3],shape=par[1],rate=par[2],log=TRUE))  )
else return(Inf)
}

# optimisation step

optim(c(3,3,5),ll)

# MLE

optim(c(3,3,5),ll)$par

qlogpearson <- function(p,a,b,c) return( exp(qgamma(p,shape=a,rate=b) - c) )
qlogpearson(0.5,3,3,5)

param <- optim(c(3,3,5),ll)$par
qlogpearson(0.5,param[1],param[2],param[3])

samp<- rlogpearson(10000,param[1],param[2],param[3])
quantile(samp,0.5)

plogpearson <- function(x,a,b,c) return(pgamma(log(x)+c,shape=a,rate=b))

Edit

5 Interactive zooming with basic plotting functions

zoom.R

Edit

6 Reduce PDF file size of plots by filtering hidden objects

This is a good idea for reducing the number of data points.

set.seed(42)
DF <- data.frame(x=x<-runif(1e6),y=x+rnorm(1e6,sd=0.1))
plot(y~x,data=DF,pch=".",cex=4)

DF2 <- data.frame(x=round(DF$x,3),y=round(DF$y,3))
DF2 <- DF[!duplicated(DF2),]
nrow(DF2)
#[1] 373429
plot(y~x,data=DF2,pch=".",cex=4)

Edit

7 Data thinning

Handy little snippet of R code for thinning occurrence data thin.max.R

# Original Source: https://gist.github.com/danlwarren/271288d5bab45d2da549#file-thin-max-r-L39
#
# Function to rarefy point data in any number of dimensions.  The goal here is to 
# take a large data set and reduce it in size in such a way as to approximately maximize the 
# difference between points.  For instance, if you have 2000 points but suspect a lot of 
# spatial autocorrelation between them, you can pass in your data frame, the names (or indices)
# of the lat/lon columns, and the number 200, and you get back 200 points from your original data 
# set that are chosen to be as different from each other as possible given a randomly chosen
# starting point

# Input is:
#
# x, a data frame containing the columns to be used to calculate distances along with whatever other data you need
# cols, a vector of column names or indices to use for calculating distances
# npoints, the number of rarefied points to spit out
#
# e.g., thin.max(my.data, c("latitude", "longitude"), 200)


thin.max <- function(x, cols, npoints){
  #Create empty vector for output
  inds <- vector(mode="numeric")
  
  #Create distance matrix
  this.dist <- as.matrix(dist(x[,cols], upper=TRUE))
  
  #Draw first index at random
  inds <- c(inds, as.integer(runif(1, 1, length(this.dist[,1]))))
  
  #Get second index from maximally distant point from first one
  #Necessary because apply needs at least two columns or it'll barf
  #in the next bit
  inds <- c(inds, which.max(this.dist[,inds]))
  
  while(length(inds) < npoints){
    #For each point, find its distance to the closest point that's already been selected
    min.dists <- apply(this.dist[,inds], 1, min)
    
    #Select the point that is furthest from everything we've already selected
    this.ind <- which.max(min.dists)
    
    #Get rid of ties, if they exist
    if(length(this.ind) > 1){
      print("Breaking tie...")
      this.ind <- this.ind[1]
    }
    inds <- c(inds, this.ind)
  }
  
  return(x[inds,])
}

Edit

8 Ramer-Douglas-Peucker algorithm for simplifying a polyline

Use the DouglasPeucker function in the HistDAWass package.

Edit

9 Moving data to/from the clipboard

http://www.johndcook.com/blog/r_excel_clipboard/

writeClipboard(x)
x <- readClipboard()
x <- scan()
write.table(x, "clipboard", sep="\t", row.names=FALSE)
x <- read.table("clipboard")

# Paste a matrix from the clipboard into R.
paste.matrix <- function() as.matrix(read.table("clipboard", sep="\t"))

# Copy a matrix from R into the clipboard.
copy.matrix <- function(x) write.table(x, "clipboard", sep="\t", col.names=FALSE, row.names=FALSE)

Edit