09 March 2015

Exploring the structure of national consumption

This entry is a direct continuation of my first, second, third and fourth entry on the structure of national resource consumption.

This entry will be a more formal analysis of trends in the consumption index as function of country area, population size and GDP per capita.

library(ggplot2);library(Hmisc)
# READING IN DATA

## SETTING DIRECTORY FOR EORA DATA ON LOCAL HARD DRIVE
wd<-"G:/Documents/PostDocKVA/Data/Eora" ### data directory
setwd(wd)

## READING IN DATA
### MATERIAL USE DATA - ENERGY DATASET
energy.df<-read.csv("TradeBalance_I-ENERGY.csv",header=TRUE)
### Reading in .csv file with annual gdp and population sizes
gdppop.df<-read.csv("gdppop.csv",header=TRUE,skip=1) #skipping the first line which includes a description of the file

## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[which(energy.df[,"Consumption"]>0),]

## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[-which(as.character(energy.df$Country)=="Former USSR"),]


## merging the gdp and population size data onto the energy consumption data frame
energy.df<-merge(energy.df,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)


## To make consumption more comparable let's calculate per capita consumption by associating population data

### calculate per capita consumption and gdp consumption intensity by associating population data
energy.df[,"Consum.pop.int"]<-energy.df[,"Consumption"]/energy.df[,"val"]
energy.df[,"Consum.gdp.int"]<-energy.df[,"Consumption"]/energy.df[,"GDP"]

### calculating scaled consumption intensitities with respect to population size and gdp
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"Consum.pop.int.scale"]<-unlist(by(energy.df,energy.df[,"Country"], function(x) scale(x[,"Consum.pop.int"],center=TRUE,scale=TRUE)))
energy.df[,"Consum.gdp.int.scale"]<-unlist(by(energy.df,energy.df[,"Country"], function(x) scale(x[,"Consum.gdp.int"],center=TRUE,scale=TRUE)))

energy.df<-energy.df[-which( energy.df[,"y"] %in% c(1991,2000,2011)),]


## Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS

### Subtracting exports from imports and domestic extraction in proportion to their size.

### Conceptual code
#### consum.extract <- extraction - (export * extraction/(import+extraction))
#### consum.import <- import - (export * import/(import+extraction))

### two ways of estimating domestic extraction

#energy.df[,"Extraction"]<- (energy.df[,"TerritorialEmissions"] + energy.df[,"DirectEmissions"]) ## These two categories both refer to domestically extracted resources, i thik. Check up with Wiedmann dataset.

energy.df[,"Extraction"]<- energy.df[,"TerritorialEmissions"]

### consumed resources that come from domestic extraction

energy.df[,"Consum.extract"]<- (energy.df[,"Extraction"]) - 
    (energy.df[,"Exports"] * ((energy.df[,"Extraction"]) /
       (energy.df[,"Extraction"] + energy.df[,"Imports"])))


### consumed resources that come from imports

energy.df[,"Consum.import"]<- (energy.df[,"Imports"]) - 
    (energy.df[,"Exports"] * ((energy.df[,"Imports"]) /
       (energy.df[,"Extraction"] + energy.df[,"Imports"])))


### consumption balance index

energy.df[,"Consum.balance"]<-(energy.df[,"Consum.extract"]-energy.df[,"Consum.import"])/energy.df[,"Consumption"]
energy.df<-energy.df[-which(energy.df[,"Consum.balance"]>1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted)


### calculating mean gdp and population size

energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"mean.gdp"]<-unlist(tapply(energy.df[,"GDP"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.val"]<-unlist(tapply(energy.df[,"val"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))


### country membership of grouped population size

val.membership.df<-data.frame("mean.val"=unique(energy.df[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))

energy.df<-merge(energy.df,val.membership.df,by="mean.val",all.x=TRUE)

#####

## CLEANING OF DATASET

### exclude the two sudan's - Sudan, South Sudan
# exclude Montenegro, Gaza Strip

### include Andorra, Monaco, San Marino, Liechtenstein despite constant -1 index

## Include Czech Republic 1993 - 2010
## Eritrea 1991-2010, Greenland 1991-2007

# Exclude 2008-2010 for countries where it suddently drops to -1


Consum.balance.BA2006.df<-do.call(rbind,by(energy.df,energy.df[,"Country"],function(x) c(mean(x[which(x[,"y"]%in%c(1970:2007)),"Consum.balance"]),
                                                mean(x[which(x[,"y"]%in%c(2008:2010)),"Consum.balance"])))
        )
Remove.0810.vec<-rownames(Consum.balance.BA2006.df[which(Consum.balance.BA2006.df[,1]>-1 & Consum.balance.BA2006.df[,2]==-1),])
Remove.country.vec<-c("Sudan","South Sudan","Montenegro","Gaza Strip")
From.1991.vec<-c("Eritrea","Greenland")
From.1993.vec<-c("Czech Republic")

energy.df<-energy.df[-which( energy.df[,"Country"] %in% Remove.0810.vec & energy.df[,"y"] > 2007 ), ]
energy.df<-energy.df[ which( energy.df[,"Country"] %in% setdiff( energy.df[,"Country"], Remove.country.vec) ), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] %in% From.1991.vec &  energy.df[,"y"] < 1991), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] == From.1993.vec &  energy.df[,"y"] < 1993), ]

This was the plot we left off with in the last entry.

# including years after 2007
ggplot(energy.df[,],aes(x=y,y=Consum.balance)) + geom_line(aes(Group=Country,alpha=mean.gdp/mean.val)) + facet_wrap(~val.membership)

Now let’s assess the statistical support for two assumptions. Countries with largest populations are more self-sufficient. Countries with high GDP per capita are undergoing increasing resource subsidization

### We still see a lot of strange dynamics in 2008 possibly related to the financial crisis - so for this first analysis let's onlylook at years up to that time.
energy.df <- energy.df[which(energy.df[,"y"] < 2008), ]

### to look at whether countries with the larger population size are more self-sufficient we need the mean consumption.balance index
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]

energy.df[,"Consum.balance.avg"]<-unlist(by(energy.df, energy.df[,"Country"], function(x) rep(mean(x[,"Consum.balance"]),nrow(x))))

energy.ConsumAvg.df<-unique(energy.df[,c("Country","Consum.balance.avg","mean.val","mean.gdp")])



energy.ConsumAvg.df<-unique(energy.df[,c("Country","Consum.balance.avg","mean.val","mean.gdp")])

# raw population size
ggplot(energy.ConsumAvg.df,aes(y=Consum.balance.avg,x=mean.val)) + geom_point(color="grey50",sie=2) + ylab("Consumption balance") + xlab("log10 Population size")

## log population size
ggplot(energy.ConsumAvg.df,aes(y=Consum.balance.avg,x=log10(mean.val))) + geom_point(color="grey50",size=2) + ylab("Consumption balance") + xlab("log10 Population size")

ConBalAvg.logpop.model<-lm(Consum.balance.avg~log10(mean.val),data=energy.ConsumAvg.df)

summary(ConBalAvg.logpop.model)
## 
## Call:
## lm(formula = Consum.balance.avg ~ log10(mean.val), data = energy.ConsumAvg.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.12964 -0.16127  0.02947  0.20275  0.77392 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.57936    0.18098  -8.727 1.65e-15 ***
## log10(mean.val)  0.30028    0.02687  11.176  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3269 on 182 degrees of freedom
## Multiple R-squared:  0.407,  Adjusted R-squared:  0.4037 
## F-statistic: 124.9 on 1 and 182 DF,  p-value: < 2.2e-16

There is good support for some variable related to the magnitude of populations size (could be many other things related to country area and reosurces etc) - allows a population to be self-sufficient.

energy.df[,"mean.gdp.per.cap"]<-energy.df[,"mean.gdp"]/energy.df[,"mean.val"]

ConBalTrend.GDPperCAP.model<-lm(Consum.balance~y*mean.gdp.per.cap,data=energy.df)

summary(ConBalTrend.GDPperCAP.model)
## 
## Call:
## lm(formula = Consum.balance ~ y * mean.gdp.per.cap, data = energy.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4062 -0.2025  0.1107  0.3096  0.9391 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         2.281e+00  1.067e+00   2.137  0.03264 * 
## y                  -8.855e-04  5.368e-04  -1.650  0.09907 . 
## mean.gdp.per.cap    1.907e-01  6.190e-02   3.081  0.00208 **
## y:mean.gdp.per.cap -1.016e-04  3.113e-05  -3.263  0.00111 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4207 on 6530 degrees of freedom
## Multiple R-squared:  0.1437, Adjusted R-squared:  0.1433 
## F-statistic: 365.2 on 3 and 6530 DF,  p-value: < 2.2e-16

The here we see three thingts (1) that there is only a weak overall tendency for resource subsidization, (2) countries with higher gdp per capita are overall less subsized, but (3) have a larger trend toward subsidization.

Let’s visualize it

energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]

energy.df[,"ConsumBalance.y.trend"]<-unlist(by(energy.df,energy.df[,"Country"],function(x) rep(coef(lm(x[,"Consum.balance"]~x[,"y"]))[2],nrow(x))))

## GDP capita
ggplot(energy.df,aes(y=ConsumBalance.y.trend,x=mean.gdp.per.cap)) + geom_point(color="grey50",size=2) + ylab("Temporal trend in consumption balance") + xlab("GDP per capita")

## log GDP per capita
ggplot(energy.df,aes(y=ConsumBalance.y.trend,x=log10(mean.gdp.per.cap))) + geom_point(color="grey50",size=2) + ylab("Temporal trend in consumption balance") + xlab("log 10 GDP per capita")

library(scatterplot3d)
#library(Rcmdr);library(rgl)

energy.consumtrend.df<-unique(energy.df[,c("Consum.balance.avg","mean.gdp.per.cap","ConsumBalance.y.trend"),])

attach(energy.consumtrend.df)
## The following objects are masked from energy.consumtrend.df (pos = 3):
## 
##     Consum.balance.avg, ConsumBalance.y.trend, mean.gdp.per.cap
s3d <- scatterplot3d(Consum.balance.avg, log10(mean.gdp.per.cap), ConsumBalance.y.trend, col.axis="blue",
col.grid="lightblue", main="Three-dimensional scatterplot", 
color="blue", pch=19,  box=F, cex.symbols=0.5, scale.y=0.5,angle=20)

fit <- lm(ConsumBalance.y.trend ~ log10(mean.gdp.per.cap)+Consum.balance.avg) 
s3d$plane3d(fit)

#scatter3d(x=Consum.balance.avg, z=log10(mean.gdp.per.cap), y=ConsumBalance.y.trend,data=energy.consumtrend.df)
detach(energy.consumtrend.df)


energy.consumtrend.df[,"log10.mean.gdp.per.cap"]<-log10(energy.consumtrend.df[,"mean.gdp.per.cap"])

attach(energy.consumtrend.df)
## The following objects are masked from energy.consumtrend.df (pos = 3):
## 
##     Consum.balance.avg, ConsumBalance.y.trend, mean.gdp.per.cap
model <- lm(ConsumBalance.y.trend ~ log10.mean.gdp.per.cap + Consum.balance.avg)
x <-range(log10.mean.gdp.per.cap)
x <- seq(x[1], x[2], length.out=50)    
y <- range(Consum.balance.avg)
y <- seq(y[1], y[2], length.out=50)


z <- outer(x,y, 
           function(log10.mean.gdp.per.cap,Consum.balance.avg)
                     predict(model, data.frame(log10.mean.gdp.per.cap,Consum.balance.avg)))

p <- persp(x,y,z, theta=120, phi=10, 
           col="lightblue",expand = 0.5,shade = 0.2,
           xlab="log10 mean gdp per capita", ylab="Mean consumption balance", zlab="Consumption balance trend")
#Project 3d points to 2d, so you can use points and segment:

obs <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg, ConsumBalance.y.trend, p)
pred <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg,fitted(model),p)
points(obs, col="green",pch=16,cex=0.5)