Exploring the structure of national consumption
This entry is a direct continuation of my:
The script organized for further exploration following work on March 10
#######################################
# LOADING PACKAGES
#######################################
# LOADING PACKAGES
library(ggplot2);library(Hmisc)
#######################################
# DEFINING FUNCTIONS
#######################################
# DEFINING FUNCTIONS
##### function to prepare data frame withs annual time series for summary analysis
prepare.dframe.fct<-function(i){
### cleaning data frame
i<-i[which(as.character(i$Country)!="Former USSR"),]#!(df1$id %in% idNums1)
i<-i[-which( i[,"y"] %in% c(1991,2000,2011)),]
i <- i[which(i[,"y"] < 2008), ]
i<-i[ which( i[,"Country"] %in% setdiff( i[,"Country"], Remove.country.vec) ), ]
i<-i[ -which( i[,"Country"] %in% From.1991.vec & i[,"y"] < 1991), ]
i<-i[ -which( i[,"Country"] == From.1993.vec & i[,"y"] < 1993), ]
### calculating structure of consumption
#### Territorial emissions as extraction
i[,"Extraction"]<-i[,"TerritorialEmissions"]
#### consumed resources that come from domestic extraction
i[,"Consum.extract"]<- (i[,"Extraction"]) -
(i[,"Exports"] * ((i[,"Extraction"]) /
(i[,"Extraction"] + i[,"Imports"])))
#### consumed resources that come from imports
i[,"Consum.import"]<- (i[,"Imports"]) -
(i[,"Exports"] * ((i[,"Imports"]) /
(i[,"Extraction"] + i[,"Imports"])))
#### consumption balance index
i[,"Consum.balance"]<-(i[,"Consum.extract"]-i[,"Consum.import"])/i[,"Consumption"]
i<-i[which(abs(i[,"Consum.balance"])<1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted or imported)
### associating population size and gdp data with data frame - then calculating basic metrics for later analysis
#### merging the gdp and population size data onto the energy consumption data frame
i<-merge(i,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)
i <- i[order(i[,"Country"],i[,"y"]),]
i[,"mean.gdp"]<-unlist(tapply(i[,"GDP"],i[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
i[,"mean.val"]<-unlist(tapply(i[,"val"],i[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
i[,"mean.gdp.per.cap"]<-i[,"mean.gdp"]/i[,"mean.val"]
i[,"log10.mean.gdp.per.cap"]<-log10(i[,"mean.gdp.per.cap"])
val.membership.df<-data.frame("mean.val"=unique(i[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))
i<-merge(i,val.membership.df,by="mean.val",all.x=TRUE)
### Calculating mean consumption balance
### mean consumption.balance index
i <- i[order(i[,"Country"],i[,"y"]),]
i[,"Consum.balance.avg"]<-unlist(by(i, i[,"Country"], function(x) rep(mean(x[,"Consum.balance"]),nrow(x))))
### Calculating trend in consumption balance
i <- i[order(i[,"Country"],i[,"y"]),]
i[,"ConsumBalance.y.trend"]<-unlist(by(i,i[,"Country"],function(x) rep(coef(lm(x[,"Consum.balance"]~x[,"y"]))[2],nrow(x))))
return(i)
}
##### function to apply 3d-plane and scatter plots to multiple summary data frames
PerspPlot.fct<-function(i){
attach(i)
model <- lm(ConsumBalance.y.trend ~ log10.mean.gdp.per.cap + Consum.balance.avg)
x <-range(log10.mean.gdp.per.cap)
x <- seq(x[1], x[2], length.out=50)
y <- range(Consum.balance.avg)
y <- seq(y[1], y[2], length.out=50)
z <- outer(x,y,
function(log10.mean.gdp.per.cap,Consum.balance.avg)
predict(model, data.frame(log10.mean.gdp.per.cap,Consum.balance.avg)))
p <- persp(x,y,z, theta=120, phi=10,
col="lightblue",expand = 0.5,shade = 0.2,
xlab="log gdp/cap", ylab="cons avg", zlab="cons trend",main=NAME[1])
#Project 3d points to 2d, so you can use points and segment:
obs <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg, ConsumBalance.y.trend, p)
pred <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg,fitted(model),p)
points(obs, col="green",pch=16,cex=0.5)
#segments(obs$x, obs$y, pred$x, pred$y,col="white",lwd=0.25)
detach(i)
rm("obs","pred","model","x","y","z","p")
}
#######################################
# READING IN DATA
#######################################
# READING IN DATA
## SETTING DIRECTORY FOR EORA DATA ON LOCAL HARD DRIVE
wd<-"G:/Documents/PostDocKVA/Data/Eora" ### data directory
setwd(wd)
### MATERIAL USE DATA - ENERGY DATASET
energy.df<-read.csv("TradeBalance_I-ENERGY.csv",header=TRUE)
### Reading in .csv file with annual gdp and population sizes
gdppop.df<-read.csv("gdppop.csv",header=TRUE,skip=1) #skipping the first line which includes a description of the file
### Reading in .csv file with various regional memberships
region.df<-read.csv("regionmembership.csv",header=T,skip=1) #skipping the first line which includes a description of the file
### Create continent data frame
continent.df<-region.df[which(region.df[,"region_type"]=="Continents"),c("region_name","name")]
### READING IN Wiedmann FOOTPRINT DATASETS
#### SETTING DIRECTORY FOR Wiedmann - material footprint - EORA DATA ON LOCAL HARD DRIVE
setwd(paste(wd,"/Wiedmann",sep=""))
#### I will read in the following files
#####"TradeBalance_CONSTMA.csv","TradeBalance_ORES.csv","TradeBalance_FFUEL.csv","TradeBalance_BIOMASS.csv","TradeBalance_I-CMFA-TOT.csv"
dframes.list<-list() ### We will read the files we are interested in into a list
#### FOSSIL FUELS
dframes.list[["FFUEL"]]<-read.csv("TradeBalance_FFUEL.csv",header=T)
dframes.list[["FFUEL"]][,"NAME"]<-"FFUEL"
#### BIOMASS
dframes.list[["BIOMASS"]]<-read.csv("TradeBalance_BIOMASS.csv",header=T)
dframes.list[["BIOMASS"]][,"NAME"]<-"BIOMASS"
#### CONSTRUNCTION MATERIALS
dframes.list[["CONSTMA"]]<-read.csv("TradeBalance_CONSTMA.csv",header=T)
dframes.list[["CONSTMA"]][,"NAME"]<-"CONSTMA"
#### ORES
dframes.list[["ORES"]]<-read.csv("TradeBalance_ORES.csv" ,header=T)
dframes.list[["ORES"]][,"NAME"]<-"ORES"
#### ALL MATERIALS - FOSSIL FUELS, BIOMASS, CONSTRUCTION MATERIALS, ORES
dframes.list[["TOTAL"]]<-read.csv("TradeBalance_I-CMFA-TOT.csv",header=T)
dframes.list[["TOTAL"]][,"NAME"]<-"TOTAL"
#######################################
# Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS
######################################
# Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS
## two ways of estimating domestic extraction
energy.df[,"Extraction"]<- energy.df[,"TerritorialEmissions"] #1
#energy.df[,"Extraction"]<- (energy.df[,"TerritorialEmissions"] + energy.df[,"DirectEmissions"]) #2 - These two categories both refer to domestically extracted resources, i thik. Check up with Wiedmann dataset.
### consumed resources that come from domestic extraction
##### Subtracting exports from imports and domestic extraction in proportion to their size.
energy.df[,"Consum.extract"]<- (energy.df[,"Extraction"]) -
(energy.df[,"Exports"] * ((energy.df[,"Extraction"]) /
(energy.df[,"Extraction"] + energy.df[,"Imports"]))) #### consum.extract <- extraction - (export * extraction/(import+extraction))
### consumed resources that come from imports
energy.df[,"Consum.import"]<- (energy.df[,"Imports"]) -
(energy.df[,"Exports"] * ((energy.df[,"Imports"]) /
(energy.df[,"Extraction"] + energy.df[,"Imports"]))) #### consum.import <- import - (export * import/(import+extraction))
### consumption balance index
energy.df[,"Consum.balance"]<-(energy.df[,"Consum.extract"]-energy.df[,"Consum.import"])/energy.df[,"Consumption"]
energy.df<-energy.df[-which(energy.df[,"Consum.balance"]>1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted)
#######################################
#CLEANING AND PREPARATION OF DATASET energy footprint data frame
######################################
## CLEANING AND PREPARATION OF DATASET energy footprint data frame
## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[which(energy.df[,"Consumption"]>0),]
energy.df<-energy.df[-which( energy.df[,"y"] %in% c(1991,2000,2011)),] # ecxluding the years 1991, 2000, 2011 with general large annual fluctuations
Consum.balance.BA2006.df<-do.call(rbind,by(energy.df,energy.df[,"Country"],function(x) c(mean(x[which(x[,"y"]%in%c(1970:2007)),"Consum.balance"]),
mean(x[which(x[,"y"]%in%c(2008:2010)),"Consum.balance"])))
)
Remove.0810.vec<-rownames(Consum.balance.BA2006.df[which(Consum.balance.BA2006.df[,1]>-1 & Consum.balance.BA2006.df[,2]==-1),]) # Exclude 2008-2010 for countries where it suddently drops to -1
Remove.country.vec<-c("Sudan","South Sudan","Montenegro","Gaza Strip","Former USSR","Statistical Discrepancies") ### exclude the two sudan's - Sudan, South Sudan - exclude Montenegro, Gaza Strip
### include Andorra, Monaco, San Marino, Liechtenstein despite constant -1 index
From.1991.vec<-c("Eritrea","Greenland") ## Eritrea 1991-2010, Greenland 1991-2007
From.1993.vec<-c("Czech Republic") ## Czech Repulibc 1993-2010
energy.df<-energy.df[-which( energy.df[,"Country"] %in% Remove.0810.vec & energy.df[,"y"] > 2007 ), ]
energy.df<-energy.df[ which( energy.df[,"Country"] %in% setdiff( energy.df[,"Country"], Remove.country.vec) ), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] %in% From.1991.vec & energy.df[,"y"] < 1991), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] == From.1993.vec & energy.df[,"y"] < 1993), ]
### We still see a lot of strange dynamics in 2008 possibly related to the financial crisis - so for this first analysis let's onlylook at years up to that time.
energy.df <- energy.df[which(energy.df[,"y"] < 2008), ]
#######################################
#calculate first order moments (means)
#######################################
# with the dataset cleaned up, we can now calculate first order moments (means)
### merging the gdp and population size data onto the energy consumption data frame
energy.df<-merge(energy.df,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)
### calculating mean gdp and population size
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"mean.gdp"]<-unlist(tapply(energy.df[,"GDP"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.val"]<-unlist(tapply(energy.df[,"val"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.gdp.per.cap"]<-energy.df[,"mean.gdp"]/energy.df[,"mean.val"]
energy.df[,"log10.mean.gdp.per.cap"]<-log10(energy.df[,"mean.gdp.per.cap"])
### country membership of grouped population size
val.membership.df<-data.frame("mean.val"=unique(energy.df[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))
energy.df<-merge(energy.df,val.membership.df,by="mean.val",all.x=TRUE)
### mean consumption.balance index
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"Consum.balance.avg"]<-unlist(by(energy.df, energy.df[,"Country"], function(x) rep(mean(x[,"Consum.balance"]),nrow(x))))
### trend in energy consumption balance
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"ConsumBalance.y.trend"]<-unlist(by(energy.df,energy.df[,"Country"],function(x) rep(coef(lm(x[,"Consum.balance"]~x[,"y"]))[2],nrow(x))))
#### consumption avg - data frame
energy.ConsumAvg.df<-unique(energy.df[,c("Country","Consum.balance.avg","mean.val","mean.gdp")])
#### consumption balance trend - data frame
energy.consumtrend.df<-unique(energy.df[,c("Consum.balance.avg","mean.gdp.per.cap","ConsumBalance.y.trend","log10.mean.gdp.per.cap"),])
#######################################
# prepare wiedmann datasets for analysis and add energy dataset to their list
#######################################
# prepare wiedmann datasets for analysis
## apply the data frame preparation function
dframes.list<-lapply(dframes.list,prepare.dframe.fct)
## include long-term energy footprint data frame
dframes.list[["ENERGY"]]<-energy.df
dframes.list[["ENERGY"]][,"NAME"]<-"ENERGY"
#######################################
# summarize wiedmann and energy footprint datasets
#######################################
# prepare wiedmann datasets for analysis
## Get the calculated averages and trends for each data frame
ConsumTrend.df.list<-lapply(dframes.list,function(x) unique(x[,c("Country","Consum.balance.avg","ConsumBalance.y.trend","mean.val","mean.gdp","mean.gdp.per.cap","log10.mean.gdp.per.cap","NAME")]))
## Combine summary data frames in the list into one big dataframe
ConsumTrend.df<-do.call(rbind,ConsumTrend.df.list)
### associate continent membership with ConsumTrend.df
ConsumTrend.df<-merge(ConsumTrend.df,continent.df,by.x="Country",by.y="name",all.z=TRUE)
#### create new grouping of continents (lumping North and South America into the Americas and Australia and Asia into Australasia)
ConsumTrend.df[,"new_region_fac"] <- factor(ConsumTrend.df[,"region_name"])
levels(ConsumTrend.df[,"new_region_fac"]) <- c("Africa","Australasia","Australasia","Europe","Americas","Americas")
#######################################
Consistency of relationships among resource groups
# Relationship of consumption balance trend wtih gdp/cap and mean consumption balance
par(mfrow=c(3,2))
plots<-lapply(ConsumTrend.df.list,PerspPlot.fct)
# Let's just look at log gdp per cap to visualize the shared realtionship in a more graphically simple way.
# Ores is still the resource (along with ffuels) that diverges mostly from the hypothesized shared relationship.
ggplot(ConsumTrend.df,aes(y=ConsumBalance.y.trend,x=log10.mean.gdp.per.cap)) + geom_point(color="grey50",size=1) + ylab("Consumption balance trend") + xlab("log10 gdp/cap") + geom_smooth() + facet_wrap(~NAME, scales="free_y") + xlim(c(-1,2.1))# + ylim(c(-1,1))
# How about the average consumption balance?
### Fossil fuels and ores don't follow the hypothesis
ggplot(ConsumTrend.df,aes(y=Consum.balance.avg,x=log10(mean.val))) + geom_point(color="grey50",size=1) + ylab("Consumption balance") + xlab("log10 Population size") + geom_smooth() + facet_wrap(~NAME) + ylim(c(-1,1))
Consistency of relationships among continents
ggplot(ConsumTrend.df,aes(y=ConsumBalance.y.trend,x=log10.mean.gdp.per.cap)) + geom_point(color="grey50",size=1) + ylab("Consumption balance trend") + xlab("log10 gdp/cap") + geom_smooth(method="lm") + facet_grid(NAME ~ new_region_fac, scales="free") + xlim(c(-1,2.1)) + ylim(c(-0.03,0.03))
ggplot(ConsumTrend.df,aes(y=Consum.balance.avg,x=log10(mean.val))) + geom_point(color="grey50",size=1) + ylab("Consumption balance trend") + xlab("log10 population size") + geom_smooth(method="lm") + facet_grid(NAME ~ new_region_fac, scales="free") + ylim(c(-1,1))
Great to see that ores and fossil fuels break the relationship with log population size in Africa, Australasia and Europe. Obvisouly something else is going on. Endowments or area effects I think. Next time…