 Repository for Ideas & Research
Open Source GIS, Hydrologic Modeling, Optimization

# R

## 1   Dictionaries in R

# https://stackoverflow.com/a/62700699
x = data.frame(row.names=c("Hi", "Why", "water"), val=c(1, 5, 4))
x["Why",]
 5

## 2   Rounding in R

# http://andland.github.io/blog/2012/06/15/rounding-in-r
# rounding in commerce
cround <- function(x, digits=0) {
vorz <- sign(x)
z <- abs(x)*10^digits
z <- z+0.5
z <- trunc(z)
z <- z/10^digits
z*vorz
}

## 3   Adding two random variables via convolution in R

f.X <- function(x) dnorm(x,1,0.5)        # normal (mu=1.5, sigma=0.5)
f.Y <- function(y) dlnorm(y,1.5, 0.75)   # log-normal (mu=1.5, sigma=0.75)
# convolution integral
f.Z <- function(z) integrate(function(x,z) f.Y(z-x)*f.X(x),-Inf,Inf,z)$value f.Z <- Vectorize(f.Z) # need to vectorize the resulting fn. set.seed(1) # for reproducible example X <- rnorm(1000,1,0.5) Y <- rlnorm(1000,1.5,0.75) Z <- X + Y # compare the methods hist(Z,freq=F,breaks=50, xlim=c(0,30)) z <- seq(0,50,0.01) lines(z,f.Z(z),lty=2,col="red") ## 4 Estimating parameters of Log Pearson III distribution in R The probability density function of the Pearson Type III distribution is defined by $f(x)=\frac{1}{\lvert a\rvert\Gamma(b)}\left(\frac{x-c}{a}\right)^{b-1}\exp\left(-\frac{x-c}{a}\right)$ where $a$, $b$, and $c$ are the scale, shape, and location parameters, respectively. The mean, variance, and coefficient of skewness are $\mu=c+ba\qquad\sigma^2=ba^2\qquad\gamma=\frac{2a}{\lvert a\rvert\sqrt{b}},$ respectively. dp3 <- function(x, a, b, c) dgamma(sign(a)*(x-c), b, 1/abs(a)) pp3 <- function(q, a, b, c) pgamma(sign(a)*(q-c), b, 1/abs(a), lower.tail=a>=0) qp3 <- function(p, a, b, c) sign(a)*qgamma(p, b, 1/abs(a), lower.tail=a>=0)+c rp3 <- function(n, a, b, c) sign(a)*rgamma(n, b, 1/abs(a))+c There are some mistakes in this article. Please use the above code. rm(list=ls()) # Simulation from a log-Pearson III set.seed(1000) rlogpearson <- function(n,a,b,c) return( exp(rgamma(n,shape=a,rate=b) - c) ) data<- rlogpearson(1000,3,3,5) hist(data) # Transformation of the data to obtain a shifted gamma datat <- log(data) # - log-likelihoood ll <- function(par){ if(par>0 & par>0 & par> -min(datat)) return( -sum(dgamma(datat+par,shape=par,rate=par,log=TRUE)) ) else return(Inf) } # optimisation step optim(c(3,3,5),ll) # MLE optim(c(3,3,5),ll)$par

qlogpearson <- function(p,a,b,c) return( exp(qgamma(p,shape=a,rate=b) - c) )
qlogpearson(0.5,3,3,5)

param <- optim(c(3,3,5),ll)$par qlogpearson(0.5,param,param,param) samp<- rlogpearson(10000,param,param,param) quantile(samp,0.5) plogpearson <- function(x,a,b,c) return(pgamma(log(x)+c,shape=a,rate=b)) ## 5 Interactive zooming with basic plotting functions zoom.R ## 6 Reduce PDF file size of plots by filtering hidden objects This is a good idea for reducing the number of data points. set.seed(42) DF <- data.frame(x=x<-runif(1e6),y=x+rnorm(1e6,sd=0.1)) plot(y~x,data=DF,pch=".",cex=4) DF2 <- data.frame(x=round(DF$x,3),y=round(DF\$y,3))
DF2 <- DF[!duplicated(DF2),]
nrow(DF2)
# 373429
plot(y~x,data=DF2,pch=".",cex=4)

## 7   Data thinning

# Original Source: https://gist.github.com/danlwarren/271288d5bab45d2da549#file-thin-max-r-L39
#
# Function to rarefy point data in any number of dimensions.  The goal here is to
# take a large data set and reduce it in size in such a way as to approximately maximize the
# difference between points.  For instance, if you have 2000 points but suspect a lot of
# spatial autocorrelation between them, you can pass in your data frame, the names (or indices)
# of the lat/lon columns, and the number 200, and you get back 200 points from your original data
# set that are chosen to be as different from each other as possible given a randomly chosen
# starting point

# Input is:
#
# x, a data frame containing the columns to be used to calculate distances along with whatever other data you need
# cols, a vector of column names or indices to use for calculating distances
# npoints, the number of rarefied points to spit out
#
# e.g., thin.max(my.data, c("latitude", "longitude"), 200)

thin.max <- function(x, cols, npoints){
#Create empty vector for output
inds <- vector(mode="numeric")

#Create distance matrix
this.dist <- as.matrix(dist(x[,cols], upper=TRUE))

#Draw first index at random
inds <- c(inds, as.integer(runif(1, 1, length(this.dist[,1]))))

#Get second index from maximally distant point from first one
#Necessary because apply needs at least two columns or it'll barf
#in the next bit
inds <- c(inds, which.max(this.dist[,inds]))

while(length(inds) < npoints){
#For each point, find its distance to the closest point that's already been selected
min.dists <- apply(this.dist[,inds], 1, min)

#Select the point that is furthest from everything we've already selected
this.ind <- which.max(min.dists)

#Get rid of ties, if they exist
if(length(this.ind) > 1){
print("Breaking tie...")
this.ind <- this.ind
}
inds <- c(inds, this.ind)
}

return(x[inds,])
}

## 9   Moving data to/from the clipboard

http://www.johndcook.com/blog/r_excel_clipboard/

• writeClipboard(x)
• x <- readClipboard()
• x <- scan()
• write.table(x, "clipboard", sep="\t", row.names=FALSE)
• x <- read.table("clipboard")
# Paste a matrix from the clipboard into R.
copy.matrix <- function(x) write.table(x, "clipboard", sep="\t", col.names=FALSE, row.names=FALSE)