#set workspace setwd("D:/GIS/Projekte/EO2H/sandbox/01_AQModel/Saxony") #load libraries library(maptools) library(rgdal) library(raster) library(fields) ####input definitions #set vector of pollutants vector.pollutants <- c('o3','pm10','no2','so2') #input stations for each pollutant list.stations <- list() list.stations[[vector.pollutants[1]]] <- list(sort(c('DESN019','DESN004','DESN014','DESN017','DESN001','DESN059','DESN053','DESN011','DESN052','DESN045','DESN051','DESN050','DESN049','DESN012','DESN024','DESN082','DESN080','DESN081','DESN085','DESN074','DESN079','DESN061','DESN076'))) list.stations[[vector.pollutants[2]]] <- list(sort(c('DESN019','DESN004','DESN014','DESN017','DESN059','DESN011','DESN045','DESN051','DESN050','DESN049','DESN012','DESN025','DESN024','DESN047','DESN083','DESN084','DESN077','DESN085','DESN075','DESN074','DESN060','DESN079','DESN061','DESN076','DESN020','DESN006'))) list.stations[[vector.pollutants[3]]] <- list(sort(c('DESN019','DESN004','DESN014','DESN017','DESN001','DESN059','DESN011','DESN052','DESN045','DESN051','DESN050','DESN012','DESN025','DESN024','DESN047','DESN083','DESN084','DESN077','DESN085','DESN075','DESN074','DESN060','DESN061','DESN076','DESN020','DESN006'))) list.stations[[vector.pollutants[4]]] <- list(sort(c('DESN014','DESN001','DESN053','DESN011','DESN052','DESN045','DESN051','DESN049','DESN025','DESN024','DESN047','DESN085','DESN074','DESN076','DESN020','DESN006'))) #station code attribute chr.stations.code <- 'code' #attribute vector vector.attributes <- c("betaO3","betaPM10","betaNO2","betaSO2","elev_mean","popdens","trafdens","trafcens") #weights for attribute vector (weights for attribute vector + weight for geometric distance) list.attributes.weight <- list() list.attributes.weight[[vector.pollutants[1]]] <- list(c(38,0,0,0,28,0,1,16,17)) list.attributes.weight[[vector.pollutants[2]]] <- list(c(0,19,0,0,30,5,3,18,25)) list.attributes.weight[[vector.pollutants[3]]] <- list(c(0,0,1,0,25,22,17,18,17)) list.attributes.weight[[vector.pollutants[4]]] <- list(c(0,0,0,12,29,4,4,8,43)) #boolean - if attribute for stations have to be calculated from surroundings bool.station.weightening <- T #input attributes for in situ station weighting (must correspond to vector.attributes) list.stations.attributes <- list() list.stations.attributes[[vector.attributes[1]]] <- list(c('betaO3_100','betaO3_1km','betaO3_2km','betaO3_3km')) list.stations.attributes[[vector.attributes[2]]] <- list(c('betaPM_100','betaPM_1km','betaPM_2km','betaPM_3km')) list.stations.attributes[[vector.attributes[3]]] <- list(c('betaNO_100','betaNO_1km','betaNO_2km','betaNO_3km')) list.stations.attributes[[vector.attributes[4]]] <- list(c('betaSO_100','betaSO_1km','betaSO_2km','betaSO_3km')) list.stations.attributes[[vector.attributes[5]]] <- list(c('elev_100m','elev_1km','elev_2km','elev_3km')) list.stations.attributes[[vector.attributes[6]]] <- list(c('popdens_10','popdens_1k','popdens_2k','popdens_3k')) list.stations.attributes[[vector.attributes[7]]] <- list(c('trafdens_1','trafdens_2','trafdens_3','trafdens_4')) list.stations.attributes[[vector.attributes[8]]] <- list(c('trafcens_1','trafcens_2','trafcens_3','trafcens_4')) #weights for station attributes (must correspond to list.stations.attributes) list.stations.attributes.weight <- list() list.stations.attributes.weight[[vector.attributes[1]]] <- list(c(0,0.2,0.6,0.2)) list.stations.attributes.weight[[vector.attributes[2]]] <- list(c(0,0.2,0.6,0.2)) list.stations.attributes.weight[[vector.attributes[3]]] <- list(c(0,0.2,0.6,0.2)) list.stations.attributes.weight[[vector.attributes[4]]] <- list(c(0,0.2,0.6,0.2)) list.stations.attributes.weight[[vector.attributes[5]]] <- list(c(0,0.2,0.5,0.3)) list.stations.attributes.weight[[vector.attributes[6]]] <- list(c(0.5,0,0.5,0)) list.stations.attributes.weight[[vector.attributes[7]]] <- list(c(0.5,0.3,0.2,0)) list.stations.attributes.weight[[vector.attributes[8]]] <- list(c(0.5,0.3,0.2,0)) #IDW power num.p <- 2 #set if only min 5 distances shall be used for pollutant estimtion bool.min5 <- T #load polygon raster shape (reference areas) shape.fishnet <- readOGR("shape", "sn_raster") #load in situ sensor stations shape shape.stations <- readOGR("shape", "sn_stations") ####pre-processing #replace whitespace in in situ station names shape.stations@data[,chr.stations.code] <- gsub(' ', '', shape.stations@data[,chr.stations.code]) #add coordinates to insitu stations (as attribute; used for distance calculation) centroids.stations <- coordinates(shape.stations) colnames(centroids.stations) <- c('coords.x','coords.y') shape.stations@data <- cbind(shape.stations@data, centroids.stations) rm(centroids.stations) #convert polygon raster shape to sp pixel file (intermediate step for creating rasters, must be gridded) centroids.fishnet <- coordinates(shape.fishnet) colnames(centroids.fishnet) <- c('coords.x','coords.y') shape.fishnet.point <- cbind(shape.fishnet@data, centroids.fishnet) coordinates(shape.fishnet.point) <- c('coords.x','coords.y') shape.fishnet.pixel <- as(shape.fishnet.point, "SpatialPixelsDataFrame") proj4string(shape.fishnet.pixel) = proj4string(shape.fishnet) rm(centroids.fishnet, shape.fishnet.point) #convert pixel file to raster (one for each attribute) raster.stack.fishnet <- stack() for(att in vector.attributes){ raster.stack.fishnet <- addLayer(raster.stack.fishnet, raster(shape.fishnet.pixel[,att])) } rm(shape.fishnet.pixel) ####calculate station attributes #function: calculate weighted mean for attribute columns function.calculateWeightedMean <- function(dataframe, newcol, vector.col, vector.weight){ dataframe[newcol] <- 0 for(i in 1:length(vector.col)){ dataframe[newcol] <- dataframe[newcol] + (vector.weight[i] * dataframe[, vector.col[i]]) } #calculate new column dataframe[newcol] <- dataframe[newcol] / sum(vector.weight) #remove old columns dataframe <- dataframe[, !names(dataframe) %in% vector.col] return(dataframe) } #calculate new station attributes from weighted surroundings if(bool.station.weightening){ for(att in vector.attributes){ shape.stations@data <- function.calculateWeightedMean(shape.stations@data, att, unlist(list.stations.attributes[[att]]), unlist(list.stations.attributes.weight[[att]])) } } #get max distances for each attribute for normalization list.attributes.max <- list() for(att in vector.attributes){ list.attributes.max[[att]] <- max(maxValue(raster.stack.fishnet[[att]]), max(shape.stations@data[att])) } #add column for geometry distance list.attributes.max[['geomdist']] <- max(rdist(coordinates(shape.stations), coordinates(shape.fishnet)) / 1000) #normalize station attributes shape.stations.norm <- shape.stations for(att in vector.attributes){ shape.stations.norm@data[att] <- shape.stations@data[att] / list.attributes.max[att] } rm(shape.stations) #normalize fishnet attributes raster.stack.fishnet.norm <- stack() for(att in vector.attributes){ raster.stack.fishnet.norm <- addLayer(raster.stack.fishnet.norm, raster.stack.fishnet[[att]] / list.attributes.max[[att]]) } #set layer names layerNames(raster.stack.fishnet.norm) <- layerNames(raster.stack.fishnet) rm(raster.stack.fishnet) ####calculate attribute distance between fishnet and stations #set list containing the weight rasters for every pollutant list.distances <- list() for(pol in vector.pollutants){ #log cat(paste('calculating attribute distance between fishnet and stations for', pol, '\n', sep=' ')) #set list containing the weights for every station for a single pollutant list.distances.pollutant <- list() #iterate through stations for(sta in unlist(list.stations[[pol]])){ #create raster for every attribute + geometric distance #iterate through attributes for(att in c(vector.attributes)){ #get attribute id for weight assignment att.id <- which(vector.attributes == att) #init raster in first run if(!exists("raster.dist")){ raster.dist <- unlist(list.attributes.weight[[pol]])[att.id] * (raster.stack.fishnet.norm[[att]] - shape.stations.norm@data[shape.stations.norm@data[,chr.stations.code] == sta, att])^num.p #add raster } else raster.dist <- raster.dist + unlist(list.attributes.weight[[pol]])[att.id] * (raster.stack.fishnet.norm[[att]] - shape.stations.norm@data[shape.stations.norm@data[,chr.stations.code] == sta, att])^num.p } #add geometry layer weight.geom <- unlist(list.attributes.weight[[pol]])[length(vector.attributes) + 1] raster.dist <- raster.dist + (weight.geom * ((distanceFromPoints(raster.stack.fishnet.norm[[att]], coordinates(shape.stations.norm[shape.stations.norm@data[,chr.stations.code] == sta, att]))/1000)/list.attributes.max[['geomdist']])^num.p) #get sum of weights num.weights.sum <- sum(unlist(list.attributes.weight[[pol]])) #divide raster.dist by sum of weigths raster.dist <- raster.dist / num.weights.sum #add raster to list list.distances.pollutant[[sta]] <- raster.dist #remove raster.dist to not influence run for next station rm(raster.dist) } #stack list and add to final list list.distances[[pol]] <- stack(list.distances.pollutant) } #clean up the mess rm(list.distances.pollutant,num.weights.sum,pol,sta,att,att.id,weight.geom) ####test plots (comment out when running the whole script at once) #plot distances # colours.dist <- colorRampPalette(c("green","yellow", "orange","red","violet", "violet"))(35) # cuts.dist <- seq(-0.1, 0.6, by=0.02) # for(pol in vector.pollutants){ # for(sta in unlist(list.stations[[pol]])){ # png(filename=paste(pol,'_',sta,'.png',sep=""), height=500, width=500, bg="white") # plot((list.distances[[pol]])[[sta]],legend=F, xaxt="n", yaxt="n", breaks=cuts.dist, col=colours.dist, main=sta) # dev.off() # } # } # rm(pol,sta,colours.dist,cuts.dist) ####get the 5th minimum distance to an in situ station for each raster cell (if boolean.min5 = T) #iterate through list.distances if(bool.min5 == T){ list.distances.min5 <- list() for(pol in vector.pollutants){ #log cat(paste('calculating 5th minimum distance for', pol, '\n', sep=' ')) #set raster stack stack.distances <- list.distances[[pol]] #get the 5th minimum distance raster.min5 <- stackApply(stack.distances, indices=seq(1,1,length.out=nlayers(stack.distances)), fun=function(vector,na.rm){ vector.sort = sort(vector, na.last=TRUE) return(vector.sort[5]) }, na.rm=TRUE) #subtract 5th minimum distance from stack stack.distances.min5 <- stack.distances - raster.min5 #set positive values in stack to NA stack.distances.min5 <- stackApply(stack.distances.min5, indices=c(1:nlayers(stack.distances.min5)), fun=function(value,na.rm){ value[value > 0] <- NA; return(value) }, na.rm=FALSE) #add 5th minimum distance to stack stack.distances.min5 <- stack.distances.min5 + raster.min5 #set layerNames layerNames(stack.distances.min5) <- layerNames(stack.distances) #add stack to list.distances.min5 list.distances.min5[[pol]] <- stack.distances.min5 } #clean up the mess rm(pol,stack.distances,raster.min5,stack.distances.min5) } #select list based on boolean.min5 if(bool.min5 == T){ list.distances.fin <- list.distances.min5 } else list.distances.fin <- list.distances ####get inverse distance weights for each station list.weights <- list() for(pol in vector.pollutants){ #log cat(paste('calculating inverse distances for', pol, '\n', sep=' ')) #get inverse distance stack.inv <- (1 / list.distances.fin[[pol]]) #get sum of distances stack.inv.sum <- stackApply(stack.inv, indices=seq(1, 1, length.out=nlayers(stack.inv)), fun=sum) #calculate weights stack.weights <- stack.inv / stack.inv.sum #set NA values to 0 stack.weights <- stackApply(stack.weights, indices=c(1:nlayers(stack.weights)), fun=function(value,na.rm){ value[is.na(value)] <- 0; return(value) }, na.rm=FALSE) #set layer names layerNames(stack.weights) <- layerNames(list.distances.fin[[pol]]) #add stack to weights list list.weights[[pol]] <- stack.weights } #clean up the whole workspace (everything is removed but list.weights) rm(list = setdiff(ls(), c("list.weights"))) ####functions for calculating actual pollutant concentrations #function calculate pollutant concentration from observations function.getPollutantConcentration <- function(vector.stations, vector.measurements, chr.pollutant){ #check if pollutant is supported if(!chr.pollutant %in% names(list.weights)) stop(paste('pollutant',chr.pollutant,'is not supported', sep=' ')) #check if length of stations and observations are equal if(length(vector.stations) != length(vector.measurements)) stop('station and measurement vectors must be of same length') dataframe.observations <- data.frame(vector.stations, vector.measurements, stringsAsFactors=F) #check if at least 60% of station measurements are available if(sum(vector.stations %in% layerNames(list.weights[[chr.pollutant]])) < round(length(layerNames(list.weights[[chr.pollutant]])) * 0.6)) stop('at least 60% of station measurements must be available') #calculate result raster for(raster.station in unstack(list.weights[[chr.pollutant]])){ #check if weigth raster stack contains station id if(layerNames(raster.station) %in% dataframe.observations[,1]){ #check if raster already exists, if not create it if(!exists("raster.result")){ raster.result <- dataframe.observations[dataframe.observations[,1] == layerNames(raster.station),2] * raster.station raster.weights <- raster.station #add weighted raster to result } else { raster.result <- raster.result + (dataframe.observations[dataframe.observations[,1] == layerNames(raster.station),2] * raster.station) raster.weights <- raster.weights + raster.station } } } #set 0 to NA raster.result[raster.result == 0] <- NA #divide by raster weights to outweights missing observations raster.result <- raster.result / raster.weights #return return(raster.result) } #function: get pollutant concentration from observations function.getPollutantConentrationAsGeoTiff <- function(vector.stations, vector.measurements, chr.pollutant, chr.file){ #calculate raster raster.result <- function.getPollutantConcentration(vector.stations, vector.measurements, chr.pollutant) #write result raster writeRaster(raster.result, filename=chr.file, format="GTiff", overwrite=TRUE) return(chr.file) } ####save image (pre-processing ended here...now we can calculate pollutant concentrations) gc() save.image(file="AirQualityMapping.RData") #test configuration chr.pollutant <- 'o3' vector.stations <- c('DESN019','DESN004','DESN014','DESN017','DESN001','DESN059','DESN053','DESN011','DESN052','DESN045','DESN051','DESN050','DESN049','DESN012','DESN024','DESN082','DESN080','DESN081','DESN085','DESN074','DESN079','DESN061','DESN076') vector.measurements <- c(10:32) plot(function.getPollutantConcentration(vector.stations,vector.measurements,chr.pollutant))