Exploring the structure of national consumption
This entry is a direct continuation of my first, second, third and fourth entry on the structure of national resource consumption.
This entry will be a more formal analysis of trends in the consumption index as function of country area, population size and GDP per capita.
library(ggplot2);library(Hmisc)
# READING IN DATA
## SETTING DIRECTORY FOR EORA DATA ON LOCAL HARD DRIVE
wd<-"G:/Documents/PostDocKVA/Data/Eora" ### data directory
setwd(wd)
## READING IN DATA
### MATERIAL USE DATA - ENERGY DATASET
energy.df<-read.csv("TradeBalance_I-ENERGY.csv",header=TRUE)
### Reading in .csv file with annual gdp and population sizes
gdppop.df<-read.csv("gdppop.csv",header=TRUE,skip=1) #skipping the first line which includes a description of the file
## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[which(energy.df[,"Consumption"]>0),]
## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[-which(as.character(energy.df$Country)=="Former USSR"),]
## merging the gdp and population size data onto the energy consumption data frame
energy.df<-merge(energy.df,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)
## To make consumption more comparable let's calculate per capita consumption by associating population data
### calculate per capita consumption and gdp consumption intensity by associating population data
energy.df[,"Consum.pop.int"]<-energy.df[,"Consumption"]/energy.df[,"val"]
energy.df[,"Consum.gdp.int"]<-energy.df[,"Consumption"]/energy.df[,"GDP"]
### calculating scaled consumption intensitities with respect to population size and gdp
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"Consum.pop.int.scale"]<-unlist(by(energy.df,energy.df[,"Country"], function(x) scale(x[,"Consum.pop.int"],center=TRUE,scale=TRUE)))
energy.df[,"Consum.gdp.int.scale"]<-unlist(by(energy.df,energy.df[,"Country"], function(x) scale(x[,"Consum.gdp.int"],center=TRUE,scale=TRUE)))
energy.df<-energy.df[-which( energy.df[,"y"] %in% c(1991,2000,2011)),]
## Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS
### Subtracting exports from imports and domestic extraction in proportion to their size.
### Conceptual code
#### consum.extract <- extraction - (export * extraction/(import+extraction))
#### consum.import <- import - (export * import/(import+extraction))
### two ways of estimating domestic extraction
#energy.df[,"Extraction"]<- (energy.df[,"TerritorialEmissions"] + energy.df[,"DirectEmissions"]) ## These two categories both refer to domestically extracted resources, i thik. Check up with Wiedmann dataset.
energy.df[,"Extraction"]<- energy.df[,"TerritorialEmissions"]
### consumed resources that come from domestic extraction
energy.df[,"Consum.extract"]<- (energy.df[,"Extraction"]) -
(energy.df[,"Exports"] * ((energy.df[,"Extraction"]) /
(energy.df[,"Extraction"] + energy.df[,"Imports"])))
### consumed resources that come from imports
energy.df[,"Consum.import"]<- (energy.df[,"Imports"]) -
(energy.df[,"Exports"] * ((energy.df[,"Imports"]) /
(energy.df[,"Extraction"] + energy.df[,"Imports"])))
### consumption balance index
energy.df[,"Consum.balance"]<-(energy.df[,"Consum.extract"]-energy.df[,"Consum.import"])/energy.df[,"Consumption"]
energy.df<-energy.df[-which(energy.df[,"Consum.balance"]>1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted)
### calculating mean gdp and population size
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"mean.gdp"]<-unlist(tapply(energy.df[,"GDP"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.val"]<-unlist(tapply(energy.df[,"val"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
### country membership of grouped population size
val.membership.df<-data.frame("mean.val"=unique(energy.df[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))
energy.df<-merge(energy.df,val.membership.df,by="mean.val",all.x=TRUE)
#####
## CLEANING OF DATASET
### exclude the two sudan's - Sudan, South Sudan
# exclude Montenegro, Gaza Strip
### include Andorra, Monaco, San Marino, Liechtenstein despite constant -1 index
## Include Czech Republic 1993 - 2010
## Eritrea 1991-2010, Greenland 1991-2007
# Exclude 2008-2010 for countries where it suddently drops to -1
Consum.balance.BA2006.df<-do.call(rbind,by(energy.df,energy.df[,"Country"],function(x) c(mean(x[which(x[,"y"]%in%c(1970:2007)),"Consum.balance"]),
mean(x[which(x[,"y"]%in%c(2008:2010)),"Consum.balance"])))
)
Remove.0810.vec<-rownames(Consum.balance.BA2006.df[which(Consum.balance.BA2006.df[,1]>-1 & Consum.balance.BA2006.df[,2]==-1),])
Remove.country.vec<-c("Sudan","South Sudan","Montenegro","Gaza Strip")
From.1991.vec<-c("Eritrea","Greenland")
From.1993.vec<-c("Czech Republic")
energy.df<-energy.df[-which( energy.df[,"Country"] %in% Remove.0810.vec & energy.df[,"y"] > 2007 ), ]
energy.df<-energy.df[ which( energy.df[,"Country"] %in% setdiff( energy.df[,"Country"], Remove.country.vec) ), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] %in% From.1991.vec & energy.df[,"y"] < 1991), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] == From.1993.vec & energy.df[,"y"] < 1993), ]
This was the plot we left off with in the last entry.
# including years after 2007
ggplot(energy.df[,],aes(x=y,y=Consum.balance)) + geom_line(aes(Group=Country,alpha=mean.gdp/mean.val)) + facet_wrap(~val.membership)
Now let’s assess the statistical support for two assumptions. Countries with largest populations are more self-sufficient. Countries with high GDP per capita are undergoing increasing resource subsidization
### We still see a lot of strange dynamics in 2008 possibly related to the financial crisis - so for this first analysis let's onlylook at years up to that time.
energy.df <- energy.df[which(energy.df[,"y"] < 2008), ]
### to look at whether countries with the larger population size are more self-sufficient we need the mean consumption.balance index
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"Consum.balance.avg"]<-unlist(by(energy.df, energy.df[,"Country"], function(x) rep(mean(x[,"Consum.balance"]),nrow(x))))
energy.ConsumAvg.df<-unique(energy.df[,c("Country","Consum.balance.avg","mean.val","mean.gdp")])
energy.ConsumAvg.df<-unique(energy.df[,c("Country","Consum.balance.avg","mean.val","mean.gdp")])
# raw population size
ggplot(energy.ConsumAvg.df,aes(y=Consum.balance.avg,x=mean.val)) + geom_point(color="grey50",sie=2) + ylab("Consumption balance") + xlab("log10 Population size")
## log population size
ggplot(energy.ConsumAvg.df,aes(y=Consum.balance.avg,x=log10(mean.val))) + geom_point(color="grey50",size=2) + ylab("Consumption balance") + xlab("log10 Population size")
ConBalAvg.logpop.model<-lm(Consum.balance.avg~log10(mean.val),data=energy.ConsumAvg.df)
summary(ConBalAvg.logpop.model)
##
## Call:
## lm(formula = Consum.balance.avg ~ log10(mean.val), data = energy.ConsumAvg.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.12964 -0.16127 0.02947 0.20275 0.77392
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.57936 0.18098 -8.727 1.65e-15 ***
## log10(mean.val) 0.30028 0.02687 11.176 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3269 on 182 degrees of freedom
## Multiple R-squared: 0.407, Adjusted R-squared: 0.4037
## F-statistic: 124.9 on 1 and 182 DF, p-value: < 2.2e-16
There is good support for some variable related to the magnitude of populations size (could be many other things related to country area and reosurces etc) - allows a population to be self-sufficient.
energy.df[,"mean.gdp.per.cap"]<-energy.df[,"mean.gdp"]/energy.df[,"mean.val"]
ConBalTrend.GDPperCAP.model<-lm(Consum.balance~y*mean.gdp.per.cap,data=energy.df)
summary(ConBalTrend.GDPperCAP.model)
##
## Call:
## lm(formula = Consum.balance ~ y * mean.gdp.per.cap, data = energy.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4062 -0.2025 0.1107 0.3096 0.9391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.281e+00 1.067e+00 2.137 0.03264 *
## y -8.855e-04 5.368e-04 -1.650 0.09907 .
## mean.gdp.per.cap 1.907e-01 6.190e-02 3.081 0.00208 **
## y:mean.gdp.per.cap -1.016e-04 3.113e-05 -3.263 0.00111 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4207 on 6530 degrees of freedom
## Multiple R-squared: 0.1437, Adjusted R-squared: 0.1433
## F-statistic: 365.2 on 3 and 6530 DF, p-value: < 2.2e-16
The here we see three thingts (1) that there is only a weak overall tendency for resource subsidization, (2) countries with higher gdp per capita are overall less subsized, but (3) have a larger trend toward subsidization.
Let’s visualize it
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"ConsumBalance.y.trend"]<-unlist(by(energy.df,energy.df[,"Country"],function(x) rep(coef(lm(x[,"Consum.balance"]~x[,"y"]))[2],nrow(x))))
## GDP capita
ggplot(energy.df,aes(y=ConsumBalance.y.trend,x=mean.gdp.per.cap)) + geom_point(color="grey50",size=2) + ylab("Temporal trend in consumption balance") + xlab("GDP per capita")
## log GDP per capita
ggplot(energy.df,aes(y=ConsumBalance.y.trend,x=log10(mean.gdp.per.cap))) + geom_point(color="grey50",size=2) + ylab("Temporal trend in consumption balance") + xlab("log 10 GDP per capita")
library(scatterplot3d)
#library(Rcmdr);library(rgl)
energy.consumtrend.df<-unique(energy.df[,c("Consum.balance.avg","mean.gdp.per.cap","ConsumBalance.y.trend"),])
attach(energy.consumtrend.df)
## The following objects are masked from energy.consumtrend.df (pos = 3):
##
## Consum.balance.avg, ConsumBalance.y.trend, mean.gdp.per.cap
s3d <- scatterplot3d(Consum.balance.avg, log10(mean.gdp.per.cap), ConsumBalance.y.trend, col.axis="blue",
col.grid="lightblue", main="Three-dimensional scatterplot",
color="blue", pch=19, box=F, cex.symbols=0.5, scale.y=0.5,angle=20)
fit <- lm(ConsumBalance.y.trend ~ log10(mean.gdp.per.cap)+Consum.balance.avg)
s3d$plane3d(fit)
#scatter3d(x=Consum.balance.avg, z=log10(mean.gdp.per.cap), y=ConsumBalance.y.trend,data=energy.consumtrend.df)
detach(energy.consumtrend.df)
energy.consumtrend.df[,"log10.mean.gdp.per.cap"]<-log10(energy.consumtrend.df[,"mean.gdp.per.cap"])
attach(energy.consumtrend.df)
## The following objects are masked from energy.consumtrend.df (pos = 3):
##
## Consum.balance.avg, ConsumBalance.y.trend, mean.gdp.per.cap
model <- lm(ConsumBalance.y.trend ~ log10.mean.gdp.per.cap + Consum.balance.avg)
x <-range(log10.mean.gdp.per.cap)
x <- seq(x[1], x[2], length.out=50)
y <- range(Consum.balance.avg)
y <- seq(y[1], y[2], length.out=50)
z <- outer(x,y,
function(log10.mean.gdp.per.cap,Consum.balance.avg)
predict(model, data.frame(log10.mean.gdp.per.cap,Consum.balance.avg)))
p <- persp(x,y,z, theta=120, phi=10,
col="lightblue",expand = 0.5,shade = 0.2,
xlab="log10 mean gdp per capita", ylab="Mean consumption balance", zlab="Consumption balance trend")
#Project 3d points to 2d, so you can use points and segment:
obs <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg, ConsumBalance.y.trend, p)
pred <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg,fitted(model),p)
points(obs, col="green",pch=16,cex=0.5)
#segments(obs$x, obs$y, pred$x, pred$y,col="black",lwd=0.5)
detach(energy.consumtrend.df)