Exploring the structure of national consumption
This entry is a direct continuation of my first, second and third entry on the structure of national resource consumption.
In the last entry we saw from visual inspection that countries with higher per capita GDP had the stronget trends toward increasing subsidization and that countries with the largest population size had the largest proportion of energy consumption coming from domestic extraction.
This entry will consist of further cleaning of the data,
library(ggplot2);library(Hmisc)
# READING IN DATA
## SETTING DIRECTORY FOR EORA DATA ON LOCAL HARD DRIVE
wd<-"G:/Documents/PostDocKVA/Data/Eora" ### data directory
setwd(wd)
## READING IN DATA
### MATERIAL USE DATA - ENERGY DATASET
energy.df<-read.csv("TradeBalance_I-ENERGY.csv",header=TRUE)
### Reading in .csv file with annual gdp and population sizes
gdppop.df<-read.csv("gdppop.csv",header=TRUE,skip=1) #skipping the first line which includes a description of the file
## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[which(energy.df[,"Consumption"]>0),]
## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[-which(as.character(energy.df$Country)=="Former USSR"),]
## merging the gdp and population size data onto the energy consumption data frame
energy.df<-merge(energy.df,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)
## To make consumption more comparable let's calculate per capita consumption by associating population data
### calculate per capita consumption and gdp consumption intensity by associating population data
energy.df[,"Consum.pop.int"]<-energy.df[,"Consumption"]/energy.df[,"val"]
energy.df[,"Consum.gdp.int"]<-energy.df[,"Consumption"]/energy.df[,"GDP"]
### calculating scaled consumption intensitities with respect to population size and gdp
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"Consum.pop.int.scale"]<-unlist(by(energy.df,energy.df[,"Country"], function(x) scale(x[,"Consum.pop.int"],center=TRUE,scale=TRUE)))
energy.df[,"Consum.gdp.int.scale"]<-unlist(by(energy.df,energy.df[,"Country"], function(x) scale(x[,"Consum.gdp.int"],center=TRUE,scale=TRUE)))
energy.df<-energy.df[-which( energy.df[,"y"] %in% c(1991,2000,2011)),]
## Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS
### Subtracting exports from imports and domestic extraction in proportion to their size.
### Conceptual code
#### consum.extract <- extraction - (export * extraction/(import+extraction))
#### consum.import <- import - (export * import/(import+extraction))
### two ways of estimating domestic extraction
#energy.df[,"Extraction"]<- (energy.df[,"TerritorialEmissions"] + energy.df[,"DirectEmissions"]) ## These two categories both refer to domestically extracted resources, i thik. Check up with Wiedmann dataset.
energy.df[,"Extraction"]<- energy.df[,"TerritorialEmissions"]
### consumed resources that come from domestic extraction
energy.df[,"Consum.extract"]<- (energy.df[,"Extraction"]) -
(energy.df[,"Exports"] * ((energy.df[,"Extraction"]) /
(energy.df[,"Extraction"] + energy.df[,"Imports"])))
### consumed resources that come from imports
energy.df[,"Consum.import"]<- (energy.df[,"Imports"]) -
(energy.df[,"Exports"] * ((energy.df[,"Imports"]) /
(energy.df[,"Extraction"] + energy.df[,"Imports"])))
### consumption balance index
energy.df[,"Consum.balance"]<-(energy.df[,"Consum.extract"]-energy.df[,"Consum.import"])/energy.df[,"Consumption"]
energy.df<-energy.df[-which(energy.df[,"Consum.balance"]>1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted)
### calculating mean gdp and population size
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"mean.gdp"]<-unlist(tapply(energy.df[,"GDP"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.val"]<-unlist(tapply(energy.df[,"val"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
### country membership of grouped population size
val.membership.df<-data.frame("mean.val"=unique(energy.df[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))
energy.df<-merge(energy.df,val.membership.df,by="mean.val",all.x=TRUE)
This was the plot we left off with in the last entry.
# excluding years after 2007
ggplot(energy.df[which(energy.df[,"y"]<2008 & is.na(energy.df[,"val.membership"])==FALSE),],aes(x=y,y=Consum.balance)) + geom_line(aes(Group=Country,alpha=mean.gdp/mean.val)) + facet_wrap(~val.membership)
# including years after 2007
ggplot(energy.df[which(is.na(energy.df[,"val.membership"])==FALSE),],aes(x=y,y=Consum.balance)) + geom_line(aes(Group=Country,alpha=mean.gdp/mean.val)) + facet_wrap(~val.membership)
We need to inspect some of the odd time seriees that are completely stable for the entire or a large part of the period.
length(unique(energy.df[which(energy.df[,"Consum.balance"] == -1),c("Country")])) ## 43 countries are completely subsidized at some
## [1] 43
### Completely subsidized all years
## 1970 - 2010
### Andorra, Liechtenstein, Monaco, San Marino, South Sudan, Sudan
## 2008 - 2010
### Antigua, Aruba, Bahamas, Belize, Bermuda, British Virgin Islands, Burkina Faso, Burundi, Cape Verde, Cayman Islands, Central African Republic, Chad, Djibouti, French Polynesia, Gambia, Guinea
#Uganda, Vanuatu
fully.subsidized.vec<-unique(energy.df[which(energy.df[,"Consum.balance"] == -1),c("Country")])
### taking a look at the fully subsidized countries
ggplot(energy.df[which(energy.df[,"Country"] %in% fully.subsidized.vec),],aes(x=y,y=Consum.balance)) + geom_line() + facet_wrap(~Country)
### which countries are fully subsidized before 2008?
fully.subsidized.vec<-unique(energy.df[which(energy.df[,"y"]<2008 & energy.df[,"Consum.balance"] == -1),c("Country")])
ggplot(energy.df[which(energy.df[,"Country"] %in% fully.subsidized.vec),],aes(x=y,y=Consum.balance)) + geom_line() + facet_wrap(~Country)
### which countries are fully subsidized before 2008?
fully.subsidized.vec<-unique(energy.df[which(energy.df[,"y"]<2008 & energy.df[,"y"]>1991 & energy.df[,"Consum.balance"] == -1) ,c("Country")])
ggplot(energy.df[which(energy.df[,"Country"] %in% fully.subsidized.vec),],aes(x=y,y=Consum.balance)) + geom_line() + facet_wrap(~Country)
## Based on inspections of these plots I will
### exclude the two sudan's - Sudan, South Sudan
# exclude Montenegro, Gaza Strip
### include Andorra, Monaco, San Marino, Liechtenstein despite constant -1 index
## Include Czech Republic 1993 - 2010
## Eritrea 1991-2010, Greenland 1991-2007
# Exclude 2008-2010 for countries where it suddently drops to -1
Consum.balance.BA2006.df<-do.call(rbind,by(energy.df,energy.df[,"Country"],function(x) c(mean(x[which(x[,"y"]%in%c(1970:2007)),"Consum.balance"]),
mean(x[which(x[,"y"]%in%c(2008:2010)),"Consum.balance"])))
)
Remove.0810.vec<-rownames(Consum.balance.BA2006.df[which(Consum.balance.BA2006.df[,1]>-1 & Consum.balance.BA2006.df[,2]==-1),])
Remove.country.vec<-c("Sudan","South Sudan","Montenegro","Gaza Strip")
From.1991.vec<-c("Eritrea","Greenland")
From.1993.vec<-c("Czech Republic")
energy.df<-energy.df[-which( energy.df[,"Country"] %in% Remove.0810.vec & energy.df[,"y"] > 2008 ), ]
energy.df<-energy.df[ which( energy.df[,"Country"] %in% setdiff( energy.df[,"Country"], Remove.country.vec) ), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] %in% From.1991.vec & energy.df[,"y"] < 1991), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] == From.1993.vec & energy.df[,"y"] < 1993), ]
### taking a look at the fully self-sufficient countries
length(unique(energy.df[which(energy.df[,"Consum.balance"] == 1),c("Country")]))
## [1] 0
#### No countries are fully self-sufficient with regard to energy consumption
### taking a look at the perfectly balanced countries
length(unique(energy.df[which(energy.df[,"Consum.balance"] == 0),c("Country")]))
## [1] 1
#### No countries are perfectly balanced (import == extraction) with regard to energy consumption