11 March 2015

Exploring the structure of national consumption

This entry is a direct continuation of my:

  1. entry
  2. entry
  3. entry
  4. entry
  5. entry
  6. entry

The script organized for further exploration following work on March 10

#######################################

# LOADING PACKAGES 
#######################################

# LOADING PACKAGES 
library(ggplot2);library(Hmisc)

#######################################

#  DEFINING FUNCTIONS
#######################################

#  DEFINING FUNCTIONS

##### function to prepare data frame withs annual time series for summary analysis
prepare.dframe.fct<-function(i){
### cleaning data frame
i<-i[which(as.character(i$Country)!="Former USSR"),]#!(df1$id %in% idNums1)
i<-i[-which( i[,"y"] %in% c(1991,2000,2011)),]
i <- i[which(i[,"y"] < 2008), ]
i<-i[ which( i[,"Country"] %in% setdiff( i[,"Country"], Remove.country.vec) ), ]
i<-i[ -which( i[,"Country"] %in% From.1991.vec &  i[,"y"] < 1991), ]
i<-i[ -which( i[,"Country"] == From.1993.vec &  i[,"y"] < 1993), ]

### calculating structure of consumption
#### Territorial emissions as extraction
i[,"Extraction"]<-i[,"TerritorialEmissions"]
#### consumed resources that come from domestic extraction
i[,"Consum.extract"]<- (i[,"Extraction"]) - 
    (i[,"Exports"] * ((i[,"Extraction"]) /
       (i[,"Extraction"] + i[,"Imports"])))
#### consumed resources that come from imports
i[,"Consum.import"]<- (i[,"Imports"]) - 
    (i[,"Exports"] * ((i[,"Imports"]) /
       (i[,"Extraction"] + i[,"Imports"])))
#### consumption balance index
i[,"Consum.balance"]<-(i[,"Consum.extract"]-i[,"Consum.import"])/i[,"Consumption"]
i<-i[which(abs(i[,"Consum.balance"])<1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted or imported)


### associating population size and gdp data with data frame - then calculating basic metrics for later analysis
#### merging the gdp and population size data onto the energy consumption data frame
i<-merge(i,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)

i <- i[order(i[,"Country"],i[,"y"]),]

i[,"mean.gdp"]<-unlist(tapply(i[,"GDP"],i[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
i[,"mean.val"]<-unlist(tapply(i[,"val"],i[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
i[,"mean.gdp.per.cap"]<-i[,"mean.gdp"]/i[,"mean.val"]
i[,"log10.mean.gdp.per.cap"]<-log10(i[,"mean.gdp.per.cap"])

val.membership.df<-data.frame("mean.val"=unique(i[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))

i<-merge(i,val.membership.df,by="mean.val",all.x=TRUE)


### Calculating mean consumption balance
### mean consumption.balance index
i <- i[order(i[,"Country"],i[,"y"]),]
i[,"Consum.balance.avg"]<-unlist(by(i, i[,"Country"], function(x) rep(mean(x[,"Consum.balance"]),nrow(x))))

### Calculating trend in consumption balance
i <- i[order(i[,"Country"],i[,"y"]),]
i[,"ConsumBalance.y.trend"]<-unlist(by(i,i[,"Country"],function(x) rep(coef(lm(x[,"Consum.balance"]~x[,"y"]))[2],nrow(x))))

return(i)
}

##### function to apply 3d-plane and scatter plots to multiple summary data frames
PerspPlot.fct<-function(i){
  
attach(i)
model <- lm(ConsumBalance.y.trend ~ log10.mean.gdp.per.cap + Consum.balance.avg)
x <-range(log10.mean.gdp.per.cap)
x <- seq(x[1], x[2], length.out=50)    
y <- range(Consum.balance.avg)
y <- seq(y[1], y[2], length.out=50)


z <- outer(x,y, 
           function(log10.mean.gdp.per.cap,Consum.balance.avg)
                     predict(model, data.frame(log10.mean.gdp.per.cap,Consum.balance.avg)))

p <- persp(x,y,z, theta=120, phi=10, 
           col="lightblue",expand = 0.5,shade = 0.2,
           xlab="log gdp/cap", ylab="cons avg", zlab="cons trend",main=NAME[1])
#Project 3d points to 2d, so you can use points and segment:

obs <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg, ConsumBalance.y.trend, p)
pred <- trans3d(log10.mean.gdp.per.cap, Consum.balance.avg,fitted(model),p)
points(obs, col="green",pch=16,cex=0.5)
#segments(obs$x, obs$y, pred$x, pred$y,col="white",lwd=0.25)


detach(i)

rm("obs","pred","model","x","y","z","p")

}

#######################################

# READING IN DATA
#######################################

# READING IN DATA

## SETTING DIRECTORY FOR EORA DATA ON LOCAL HARD DRIVE
wd<-"G:/Documents/PostDocKVA/Data/Eora" ### data directory
setwd(wd)

### MATERIAL USE DATA - ENERGY DATASET
energy.df<-read.csv("TradeBalance_I-ENERGY.csv",header=TRUE)
### Reading in .csv file with annual gdp and population sizes
gdppop.df<-read.csv("gdppop.csv",header=TRUE,skip=1) #skipping the first line which includes a description of the file
### Reading in .csv file with various regional memberships
region.df<-read.csv("regionmembership.csv",header=T,skip=1) #skipping the first line which includes a description of the file
### Create continent data frame
continent.df<-region.df[which(region.df[,"region_type"]=="Continents"),c("region_name","name")]


###     READING IN Wiedmann FOOTPRINT DATASETS
#### SETTING DIRECTORY FOR Wiedmann - material footprint - EORA DATA ON LOCAL HARD DRIVE
setwd(paste(wd,"/Wiedmann",sep=""))
#### I will read in the following files
#####"TradeBalance_CONSTMA.csv","TradeBalance_ORES.csv","TradeBalance_FFUEL.csv","TradeBalance_BIOMASS.csv","TradeBalance_I-CMFA-TOT.csv"
dframes.list<-list() ### We will read the files we are interested in into a list
#### FOSSIL FUELS
dframes.list[["FFUEL"]]<-read.csv("TradeBalance_FFUEL.csv",header=T)
dframes.list[["FFUEL"]][,"NAME"]<-"FFUEL"
#### BIOMASS
dframes.list[["BIOMASS"]]<-read.csv("TradeBalance_BIOMASS.csv",header=T)
dframes.list[["BIOMASS"]][,"NAME"]<-"BIOMASS"
#### CONSTRUNCTION MATERIALS
dframes.list[["CONSTMA"]]<-read.csv("TradeBalance_CONSTMA.csv",header=T)
dframes.list[["CONSTMA"]][,"NAME"]<-"CONSTMA"
#### ORES
dframes.list[["ORES"]]<-read.csv("TradeBalance_ORES.csv" ,header=T)
dframes.list[["ORES"]][,"NAME"]<-"ORES"
#### ALL MATERIALS - FOSSIL FUELS, BIOMASS, CONSTRUCTION MATERIALS, ORES
dframes.list[["TOTAL"]]<-read.csv("TradeBalance_I-CMFA-TOT.csv",header=T)
dframes.list[["TOTAL"]][,"NAME"]<-"TOTAL"

#######################################

# Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS
######################################


# Calculating CONSUMPTION BALANCE with respect to EXTRACTION and IMPORTS

## two ways of estimating domestic extraction

energy.df[,"Extraction"]<- energy.df[,"TerritorialEmissions"] #1

#energy.df[,"Extraction"]<- (energy.df[,"TerritorialEmissions"] + energy.df[,"DirectEmissions"]) #2 - These two categories both refer to domestically extracted resources, i thik. Check up with Wiedmann dataset.


### consumed resources that come from domestic extraction

##### Subtracting exports from imports and domestic extraction in proportion to their size.

energy.df[,"Consum.extract"]<- (energy.df[,"Extraction"]) - 
    (energy.df[,"Exports"] * ((energy.df[,"Extraction"]) /
       (energy.df[,"Extraction"] + energy.df[,"Imports"]))) #### consum.extract <- extraction - (export * extraction/(import+extraction))


### consumed resources that come from imports

energy.df[,"Consum.import"]<- (energy.df[,"Imports"]) - 
    (energy.df[,"Exports"] * ((energy.df[,"Imports"]) /
       (energy.df[,"Extraction"] + energy.df[,"Imports"]))) #### consum.import <- import - (export * import/(import+extraction))


### consumption balance index

energy.df[,"Consum.balance"]<-(energy.df[,"Consum.extract"]-energy.df[,"Consum.import"])/energy.df[,"Consumption"]
energy.df<-energy.df[-which(energy.df[,"Consum.balance"]>1),] ## removing entries that are above 1 (i.e. all resources being domestically extracted)

#######################################

#CLEANING AND PREPARATION OF DATASET energy footprint data frame
###################################### 

## CLEANING AND PREPARATION OF DATASET energy footprint data frame

## REMOVING NEGATIVE AND ZERO CONSUMPTION ENTRIES
energy.df<-energy.df[which(energy.df[,"Consumption"]>0),]

energy.df<-energy.df[-which( energy.df[,"y"] %in% c(1991,2000,2011)),] # ecxluding the years 1991, 2000, 2011 with general large annual fluctuations 


Consum.balance.BA2006.df<-do.call(rbind,by(energy.df,energy.df[,"Country"],function(x) c(mean(x[which(x[,"y"]%in%c(1970:2007)),"Consum.balance"]),
                                                mean(x[which(x[,"y"]%in%c(2008:2010)),"Consum.balance"])))
        )

Remove.0810.vec<-rownames(Consum.balance.BA2006.df[which(Consum.balance.BA2006.df[,1]>-1 & Consum.balance.BA2006.df[,2]==-1),]) # Exclude 2008-2010 for countries where it suddently drops to -1

Remove.country.vec<-c("Sudan","South Sudan","Montenegro","Gaza Strip","Former USSR","Statistical Discrepancies") ### exclude the two sudan's - Sudan, South Sudan - exclude Montenegro, Gaza Strip

### include Andorra, Monaco, San Marino, Liechtenstein despite constant -1 index

From.1991.vec<-c("Eritrea","Greenland")  ## Eritrea 1991-2010, Greenland 1991-2007
From.1993.vec<-c("Czech Republic")  ## Czech Repulibc 1993-2010

energy.df<-energy.df[-which( energy.df[,"Country"] %in% Remove.0810.vec & energy.df[,"y"] > 2007 ), ]
energy.df<-energy.df[ which( energy.df[,"Country"] %in% setdiff( energy.df[,"Country"], Remove.country.vec) ), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] %in% From.1991.vec &  energy.df[,"y"] < 1991), ]
energy.df<-energy.df[ -which( energy.df[,"Country"] == From.1993.vec &  energy.df[,"y"] < 1993), ]


### We still see a lot of strange dynamics in 2008 possibly related to the financial crisis - so for this first analysis let's onlylook at years up to that time.

energy.df <- energy.df[which(energy.df[,"y"] < 2008), ]

#######################################

#calculate first order moments (means)
#######################################

#      with the dataset cleaned up, we can now calculate first order moments (means)

### merging the gdp and population size data onto the energy consumption data frame
energy.df<-merge(energy.df,gdppop.df,by=c("CountryA3","y","Country"),all.x=TRUE)

### calculating mean gdp and population size
energy.df<-energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"mean.gdp"]<-unlist(tapply(energy.df[,"GDP"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.val"]<-unlist(tapply(energy.df[,"val"],energy.df[,"Country"],function(x) rep(mean(x,na.rm=TRUE),length(x))))
energy.df[,"mean.gdp.per.cap"]<-energy.df[,"mean.gdp"]/energy.df[,"mean.val"]
energy.df[,"log10.mean.gdp.per.cap"]<-log10(energy.df[,"mean.gdp.per.cap"])

### country membership of grouped population size
val.membership.df<-data.frame("mean.val"=unique(energy.df[,"mean.val"]),"val.membership"=NA)
val.membership.df[,"val.membership"]<-as.numeric(cut2(val.membership.df$mean.val,m=20))
energy.df<-merge(energy.df,val.membership.df,by="mean.val",all.x=TRUE)

### mean consumption.balance index
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"Consum.balance.avg"]<-unlist(by(energy.df, energy.df[,"Country"], function(x) rep(mean(x[,"Consum.balance"]),nrow(x))))

### trend in energy consumption balance
energy.df <- energy.df[order(energy.df[,"Country"],energy.df[,"y"]),]
energy.df[,"ConsumBalance.y.trend"]<-unlist(by(energy.df,energy.df[,"Country"],function(x) rep(coef(lm(x[,"Consum.balance"]~x[,"y"]))[2],nrow(x))))

#### consumption avg - data frame
energy.ConsumAvg.df<-unique(energy.df[,c("Country","Consum.balance.avg","mean.val","mean.gdp")])
#### consumption balance trend - data frame
energy.consumtrend.df<-unique(energy.df[,c("Consum.balance.avg","mean.gdp.per.cap","ConsumBalance.y.trend","log10.mean.gdp.per.cap"),])

#######################################

# prepare wiedmann datasets for analysis and add energy dataset to their list
#######################################

# prepare wiedmann datasets for analysis

## apply the data frame preparation function
dframes.list<-lapply(dframes.list,prepare.dframe.fct)

## include long-term energy footprint data frame
dframes.list[["ENERGY"]]<-energy.df
dframes.list[["ENERGY"]][,"NAME"]<-"ENERGY"

#######################################

# summarize wiedmann and energy footprint datasets
#######################################

# prepare wiedmann datasets for analysis

## Get the calculated averages and trends for each data frame
ConsumTrend.df.list<-lapply(dframes.list,function(x) unique(x[,c("Country","Consum.balance.avg","ConsumBalance.y.trend","mean.val","mean.gdp","mean.gdp.per.cap","log10.mean.gdp.per.cap","NAME")]))

                            
## Combine summary data frames in the list into one big dataframe
ConsumTrend.df<-do.call(rbind,ConsumTrend.df.list)

### associate continent membership with ConsumTrend.df
ConsumTrend.df<-merge(ConsumTrend.df,continent.df,by.x="Country",by.y="name",all.z=TRUE)

#### create new grouping of continents (lumping North and South America into the Americas and Australia and Asia into Australasia)
ConsumTrend.df[,"new_region_fac"] <- factor(ConsumTrend.df[,"region_name"])
levels(ConsumTrend.df[,"new_region_fac"]) <- c("Africa","Australasia","Australasia","Europe","Americas","Americas")

#######################################

Consistency of relationships among resource groups

# Relationship of consumption balance trend wtih gdp/cap and mean consumption balance
par(mfrow=c(3,2))
plots<-lapply(ConsumTrend.df.list,PerspPlot.fct)

# Let's just look at log gdp per cap to visualize the shared realtionship in a more graphically simple way.

# Ores is still the resource (along with ffuels) that diverges mostly from the hypothesized shared relationship.

ggplot(ConsumTrend.df,aes(y=ConsumBalance.y.trend,x=log10.mean.gdp.per.cap)) + geom_point(color="grey50",size=1) + ylab("Consumption balance trend") + xlab("log10 gdp/cap") + geom_smooth() + facet_wrap(~NAME, scales="free_y") + xlim(c(-1,2.1))# + ylim(c(-1,1)) 

# How about the average consumption balance?

### Fossil fuels and ores don't follow the hypothesis

ggplot(ConsumTrend.df,aes(y=Consum.balance.avg,x=log10(mean.val))) + geom_point(color="grey50",size=1) + ylab("Consumption balance") + xlab("log10 Population size") + geom_smooth() + facet_wrap(~NAME) + ylim(c(-1,1))


Consistency of relationships among continents

ggplot(ConsumTrend.df,aes(y=ConsumBalance.y.trend,x=log10.mean.gdp.per.cap)) + geom_point(color="grey50",size=1) + ylab("Consumption balance trend") + xlab("log10 gdp/cap") + geom_smooth(method="lm") + facet_grid(NAME ~ new_region_fac, scales="free") + xlim(c(-1,2.1)) + ylim(c(-0.03,0.03))

ggplot(ConsumTrend.df,aes(y=Consum.balance.avg,x=log10(mean.val))) + geom_point(color="grey50",size=1) + ylab("Consumption balance trend") + xlab("log10 population size") + geom_smooth(method="lm") + facet_grid(NAME ~ new_region_fac, scales="free") +  ylim(c(-1,1))

Great to see that ores and fossil fuels break the relationship with log population size in Africa, Australasia and Europe. Obvisouly something else is going on. Endowments or area effects I think. Next time…



blog comments powered by Disqus