02 July 2015
Macro-socio-ecology-variables






0.0.1 UNIQUE VALUES OF SOCIAL VARIABLES

library(ggplot2);library(reshape);library(Hmisc)
#wd<-"G:/Documents/PostDocKVA/Labbook/projects/macro-socio-ecology"
#QoG.wd<-"G:/Documents/PostDocKVA/Data/QOG"
#WB.poverty.wd<-"G:/Documents/PostDocKVA/Data/WorldBank/PovertyEquityDB"
#eora.wd<-"G:/Documents/PostDocKVA/Data/Eora" ### eora data directory

wd<-"G:\\Documents\\PostDocKVA\\Labbook\\projects\\macro-socio-ecology"
QoG.wd<-"G:\\Documents\\PostDocKVA\\Data\\QOG"
WB.poverty.wd<-"G:\\Documents\\PostDocKVA\\Data\\WorldBank\\PovertyEquityDB"
eora.wd<-"G:\\Documents\\PostDocKVA\\Data\\Eora" ### eora data directory


setwd(eora.wd)
### Reading in .csv file with annual gdp and population sizes
gdppop.df<-read.csv("gdppop.csv",header=TRUE,skip=1) #skipping the first line which includes a description of the file
### Reading in .csv file with various regional memberships
region.df<-read.csv("regionmembership.csv",header=T,skip=1) #skipping the first line which includes a description of the file
continent.df<-region.df[which(region.df[,"region_type"]=="Continents"),c("region_name","name","a3")]
GDP.df<-region.df[which(region.df[,"region_type"]=="GDP Regions"),c("region_name","name","a3")]

#ConsumTrend.df<-merge(ConsumTrend.df,continent.df,by.x="Country",by.y="name",all.z=TRUE)

year.min<-1992
year.max<-2008

setwd(wd)

var.df<-read.csv("social_variables.csv",header=T,stringsAsFactors = FALSE,na.strings="")
var.df<-var.df[which(is.na(var.df[,"varcode"])==FALSE),]

data.dirs<-unique(var.df$dir)
data.files<-unique(var.df$file)

## reading in QoG data

if(QoG.wd %in% data.dirs){
  
setwd(QoG.wd)  
QoG.basts.df<-read.csv("qog_bas_ts_jan15.csv",header=T,stringsAsFactors = FALSE,na.strings="")  

QoG.sel.vars<-unique(var.df[which(var.df$dataset == "QoG"),"varcode"])


QoG.var.df<-var.df[which(var.df$dataset == "QoG"),]


QoG.sel.df<-QoG.basts.df[,c("ccode","cname","year","ccodealp","cname_year","ccodealp_year","ccodecow","ccodewb","version",QoG.sel.vars)]

#c("ccode","cname","year","ccodealp","cname_year","ccodealp_year","ccodecow","ccodewb","version",QoG.sel.vars)[c("ccode","cname","year","ccodealp","cname_year","ccodealp_year","ccodecow","ccodewb","version",QoG.sel.vars)%in%names(QoG.basts.df)==FALSE]

for(i in 1:length(QoG.sel.vars)){

row.i<-which(QoG.var.df[,"varcode"]  == QoG.sel.vars[i])  

#print(row.i)

if(is.na(row.i) == FALSE){  
QoG.sel.df[,paste(QoG.sel.vars[i])] <- QoG.sel.df[,QoG.sel.vars[i]]*QoG.var.df[row.i,"multiplier"]
}  




QoG.sel.df<-QoG.sel.df[which(QoG.sel.df$year %in% c(year.min:year.max)),]


}

}

if(WB.poverty.wd %in% data.dirs){
  
setwd(WB.poverty.wd)  
WBpov.df<-read.csv("Poverty-Data.csv",header=T,stringsAsFactors = FALSE,na.strings="")  

WBpov.sel.vars<-unique(var.df[which(var.df$dataset == "Wbpoverty"),"varcode"])


WBpov.var.df<-var.df[which(var.df$dataset == "Wbpoverty"),]


WBpov.sel.df<-WBpov.df[,c("Country.Name","Country.Code","Year",WBpov.sel.vars)]

#c("ccode","cname","year","ccodealp","cname_year","ccodealp_year","ccodecow","ccodewb","version",QoG.sel.vars)[c("ccode","cname","year","ccodealp","cname_year","ccodealp_year","ccodecow","ccodewb","version",QoG.sel.vars)%in%names(QoG.basts.df)==FALSE]

for(i in 1:length(WBpov.sel.vars)){

row.i<-which(WBpov.var.df[,"varcode"]  == WBpov.sel.vars[i])  

#print(row.i)

if(is.na(row.i) == FALSE){  
WBpov.sel.df[,paste(WBpov.sel.vars[i])] <- WBpov.sel.df[,WBpov.sel.vars[i]]*WBpov.var.df[row.i,"multiplier"]
}  

}



WBpov.sel.df<-WBpov.sel.df[which(WBpov.sel.df$Year %in% c(year.min:year.max)),]



}


### merging QoG and WB dataset
if(WB.poverty.wd %in% data.dirs & QoG.wd %in% data.dirs){
QoG.sel.df<-merge(QoG.sel.df,WBpov.sel.df,by.x=c("ccodealp","year"),by.y=c("Country.Code","Year"),all = TRUE)
sel.vars<-c(QoG.sel.vars,WBpov.sel.vars)
}


### CALCULATING WITHIN COUNTRY STANDARD DEVIATION

country.df<-data.frame("country"=sort(unique(QoG.sel.df$ccodealp)))
#country.df[,paste(QoG.sel.vars,"mean",sep=".")]<-NA
#country.df[,paste(QoG.sel.vars,"median",sep=".")]<-NA


QoG.sel.df<-QoG.sel.df[order(QoG.sel.df$ccodealp),]

QoG.sd<-as.data.frame(apply(QoG.sel.df[,sel.vars],2,function(x,y=QoG.sel.df$ccodealp) tapply(x,y,function(z) abs(sd(z,na.rm=TRUE)/mean(z,na.rm=TRUE)))))

#names(QoG.sd)<-paste(names(QoG.sd),"sd",sep=".")

country.df<-as.data.frame(cbind(country.df,QoG.sd))
#country.df[,paste(sel.vars,"sd",sep=".")]<-QoG.sd

country.melt.sd.df<-melt(country.df[,c("country",names(QoG.sd))])

#country.melt.sd.df[,"variable"]<-gsub(".sd","")
country.melt.sd.df<-merge(country.melt.sd.df,var.df,by.x="variable",by.y="varcode",all.x=TRUE)

#x11()
ggplot(country.melt.sd.df[-which(country.melt.sd.df[,"variable"]%in%c("cam_inclusive","wbgi_cce")),],aes(x=variable,y=value))+geom_violin()+facet_wrap(~social,scales="free",ncol=2)+ylab("CV")

####

### CALCULATING WITHIN COUNTRY UNIQUE VALUES

country.df<-data.frame("country"=sort(unique(QoG.sel.df$ccodealp)))
#country.df[,paste(sel.vars,"mean",sep=".")]<-NA
#country.df[,paste(sel.vars,"median",sep=".")]<-NA


QoG.sel.df<-QoG.sel.df[order(QoG.sel.df$ccodealp),]

QoG.unique<-as.data.frame(apply(QoG.sel.df[,sel.vars],2,function(x,y=QoG.sel.df$ccodealp) tapply(x,y,function(z) length(which(is.na(unique(z,na.rm=TRUE))==FALSE)))))

#names(QoG.sd)<-paste(names(QoG.sd),"sd",sep=".")

country.df<-as.data.frame(cbind(country.df,QoG.unique))
#country.df[,paste(sel.vars,"sd",sep=".")]<-QoG.sd

country.melt.unique.df<-melt(country.df[,c("country",names(QoG.unique))])

#country.melt.sd.df[,"variable"]<-gsub(".sd","")
country.melt.unique.df<-merge(country.melt.unique.df,var.df,by.x="variable",by.y="varcode",all.x=TRUE)

#x11()
ggplot(country.melt.unique.df[-which(country.melt.unique.df[,"variable"]%in%c("cam_inclusive","wbgi_cce")),],aes(x=variable,y=value))+geom_violin()+facet_wrap(~social,scales="free",ncol=2)+ylab("unique values")

### CALCULATING WITHIN COUNTRY UNIQUE VALUES PER TIME SERIES LENGTH

country.df<-data.frame("country"=sort(unique(QoG.sel.df$ccodealp)))

QoG.sel.df<-QoG.sel.df[order(QoG.sel.df$ccodealp),]

#QoG.relunique<-as.data.frame(apply(QoG.sel.df[,sel.vars],2,function(x,y=QoG.sel.df$ccodealp) tapply(x,y,function(z) length(which(is.na(unique(z,na.rm=TRUE))==FALSE))/
#                                                                                                       length(which(is.na(z)==FALSE)))))

#QoG.relunique<-as.data.frame(apply(QoG.sel.df[,sel.vars],2,function(x,y=QoG.sel.df$ccodealp) tapply(x,y,function(z,w=length(which(is.na(unique(z))==FALSE))) ifelse(w>0,
#                                                                                                      length(which(is.na(unique(z))==FALSE))/
#                                                                                                       length(which(is.na(z)==FALSE)),NA)
#                                                                                                      )))


QoG.relunique<-as.data.frame(apply(QoG.sel.df[,sel.vars],2,function(x,y=QoG.sel.df$ccodealp) tapply(x,y,function(z)
  length(which(is.na(z)==FALSE))
                                                                                                      )))

#QoG.relunique<-as.data.frame((as.matrix(QoG.unique)-1)/as.matrix(QoG.relunique))
QoG.relunique<-as.data.frame(as.matrix(QoG.unique)/as.matrix(QoG.relunique))



#names(QoG.sd)<-paste(names(QoG.sd),"sd",sep=".")

country.df<-as.data.frame(cbind(country.df,QoG.relunique))
#country.df[,paste(sel.vars,"sd",sep=".")]<-QoG.sd

country.melt.relunique.df<-melt(country.df[,c("country",names(QoG.relunique))])

#country.melt.sd.df[,"variable"]<-gsub(".sd","")
country.melt.relunique.df<-merge(country.melt.relunique.df,var.df,by.x="variable",by.y="varcode",all.x=TRUE)


#x11()
ggplot(country.melt.relunique.df[-which(country.melt.relunique.df[,"variable"]%in%c("cam_inclusive","wbgi_cce")),],
       aes(x=paste(social,variable),y=value))+geom_boxplot()+ylab("unique value ratio")+coord_flip()#+facet_wrap(~social,ncol=2)






0.0.2 MEDIAN VALUES IN FIRST AND SECOND TIME PERIOD

QoG.sel.df[,"period"]<-NA
QoG.sel.df[,"period"]<-ifelse(QoG.sel.df[,"year"]<2000,"a",ifelse(QoG.sel.df[,"year"]>1999,"b",NA))


QoG.sel.df<-QoG.sel