2013-04-04 54 views
3

我想在R中創建人口金字塔。我知道StackOverflow有很多例子,但我想創建一個也包含人口預測的例子,即按性別劃分每個年齡組的條形圖,按照性別和年齡組劃分線條作爲投影。R中的人口金字塔w投影

這裏你可以看到一個例子:http://geographyblog.eu/wp/the-worlds-population-pyramid-is-changing-shape/

enter image description here

如果有就如何更好地說明了一些建議,這(例如用平滑線),他們也歡迎,但我想說明目前的情況和預測。可在聯合國網站上找到示例數據:http://esa.un.org/wpp/population-pyramids/population-pyramids_absolute.htm

任何幫助將不勝感激。

+2

請注意,這是一個誤導性的圖形,因爲酒吧沒有堆疊,但重疊。如果2050(天藍色)值小於2010(深藍色)值,會發生什麼情況?這是發生在最左邊的酒吧嗎?還是2010年恰好等於2050年? – Spacedman 2013-04-04 21:39:58

回答

4

也許少一點的ad-hoc方法使用ggplot2geom_bargeom_step

的數據可以從包wpp2015被提取(或wpp2012wpp2010wpp2008如果你喜歡較舊版本)。

library("dplyr") 
library("tidyr") 
library("wpp2015") 

#load data in wpp2015 
data(popF) 
data(popM) 
data(popFprojMed) 
data(popMprojMed) 

#combine past and future female population 
df0 <- popF %>% 
    left_join(popFprojMed) %>% 
    mutate(gender = "female") 

#combine past and future male population, add on female population 
df1 <- popM %>% 
    left_join(popMprojMed) %>% 
    mutate(gender = "male") %>% 
    bind_rows(df0) %>% 
    mutate(age = factor(age, levels = unique(age))) 

#stack data for ggplot, filter World population and required years 
df2 <- df1 %>% 
    gather(key = year, value = pop, -country, -country_code, -age, -gender) %>% 
    mutate(pop = pop/1e03) %>% 
    filter(country == "World", year %in% c(1950, 2000, 2050, 2100)) 

#add extra rows and numeric age variable for geom_step used for 2100 
df2 <- df2 %>% 
    mutate(ageno = as.numeric(age) - 0.5) 

df2 <- df2 %>% 
    bind_rows(df2 %>% filter(year==2100, age=="100+") %>% mutate(ageno = ageno + 1)) 

df2 
# Source: local data frame [170 x 7] 
# 
# country country_code age gender year  pop ageno 
#  (fctr)  (int) (fctr) (chr) (chr)  (dbl) (dbl) 
# 1 World   900 0-4 male 1950 171.85124 0.5 
# 2 World   900 5-9 male 1950 137.99242 1.5 
# 3 World   900 10-14 male 1950 133.27428 2.5 
# 4 World   900 15-19 male 1950 121.69274 3.5 
# 5 World   900 20-24 male 1950 112.39438 4.5 
# 6 World   900 25-29 male 1950 96.59408 5.5 
# 7 World   900 30-34 male 1950 83.38595 6.5 
# 8 World   900 35-39 male 1950 80.59671 7.5 
# 9 World   900 40-44 male 1950 73.08263 8.5 
# 10 World   900 45-49 male 1950 63.13648 9.5 
# ..  ...   ... ... ... ...  ... ... 

使用標準ggplot功能,你可以得到類似的東西,從答案here適應:

enter image description here

library("ggplot2") 
ggplot(data = df2, aes(x = age, y = pop, fill = year)) + 
    #bars for all but 2100 
    geom_bar(data = df2 %>% filter(gender == "female", year != 2100) %>% arrange(rev(year)), 
      stat = "identity", 
      position = "identity") + 
    geom_bar(data = df2 %>% filter(gender == "male", year != 2100) %>% arrange(rev(year)), 
      stat = "identity", 
      position = "identity", 
      mapping = aes(y = -pop)) + 
    #steps for 2100 
    geom_step(data = df2 %>% filter(gender == "female", year == 2100), 
      aes(x = ageno)) + 
    geom_step(data = df2 %>% filter(gender == "male", year == 2100), 
      aes(x = ageno, y = -pop)) + 
    coord_flip() + 
    scale_y_continuous(labels = abs) 

注意:你需要做的arrange(rev(year))的酒吧覆蓋。

隨着ggthemes包,你可以非常接近原始的經濟學家的陰謀。

enter image description here

library("ggthemes") 
ggplot(data = df2, aes(x = age, y = pop, fill = year)) + 
    #bars for all but 2100 
    geom_bar(data = df2 %>% filter(gender == "female", year != 2100) %>% arrange(rev(year)), 
      stat = "identity", 
      position = "identity") + 
    geom_bar(data = df2 %>% filter(gender == "male", year != 2100) %>% arrange(rev(year)), 
      stat = "identity", 
      position = "identity", 
      mapping = aes(y = -pop)) + 
    #steps for 2100 
    geom_step(data = df2 %>% filter(gender == "female", year == 2100), 
     aes(x = ageno), size = 1) + 
    geom_step(data = df2 %>% filter(gender == "male", year == 2100), 
     aes(x = ageno, y = -pop), size = 1) + 
    coord_flip() + 
    #extra style shazzaz 
    scale_y_continuous(labels = abs, limits = c(-400, 400), breaks = seq(-400, 400, 100)) + 
    geom_hline(yintercept = 0) + 
    theme_economist(horizontal = FALSE) + 
    scale_fill_economist() + 
    labs(fill = "", x = "", y = "") 

(我相信你可以得到更接近,但我已經在這裏停了)。

2

你可以使用this question(這裏我使用了@ timriffle的回答以及我的答案)的答案輕鬆地做出了一些事情。
第一(從鏈接您提供的)一些數據:

wp <- structure(list(M.1990 = c(325814, 295272, 269351, 265163, 249651, 220027, 196523, 178295, 141789, 115097, 106579, 91763, 77150, 56845, 38053, 25716, 19442), M.2000 = c(319675, 317296, 317072, 290827, 262992, 256378, 241401, 212924, 188905, 169133, 131813, 103162, 90921, 72231, 53449, 32707, 25868), M.2010 = c(328759, 315119, 311456, 312831, 311077, 284258, 255596, 248575, 232217, 202633, 176241, 153494, 114194, 83129, 65266, 43761, 39223), F.1990 = c(308121, 281322, 257432, 254065, 238856, 211943, 188433, 170937, 138358, 112931, 106510, 93425, 82667, 67057, 47679, 37435, 36724), F.2000 = c(298455, 297012, 299757, 277706, 252924, 248127, 233583, 207518, 183646, 165444, 132307, 105429, 96681, 80227, 64956, 45832, 46413), F.2010 = c(307079, 293664, 290598, 293313, 295739, 273379, 247383, 241938, 226914, 201142, 176440, 156283, 121200, 92071, 77990, 56895, 66029)), .Names = c("M.1990", "M.2000", "M.2010", "F.1990", "F.2000", "F.2010"), row.names = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80+"), class = "data.frame") 

wp 
     M.1990 M.2000 M.2010 F.1990 F.2000 F.2010 
0-4 325814 319675 328759 308121 298455 307079 
5-9 295272 317296 315119 281322 297012 293664 
10-14 269351 317072 311456 257432 299757 290598 
15-19 265163 290827 312831 254065 277706 293313 
20-24 249651 262992 311077 238856 252924 295739 
25-29 220027 256378 284258 211943 248127 273379 
30-34 196523 241401 255596 188433 233583 247383 
35-39 178295 212924 248575 170937 207518 241938 
40-44 141789 188905 232217 138358 183646 226914 
45-49 115097 169133 202633 112931 165444 201142 
50-54 106579 131813 176241 106510 132307 176440 
55-59 91763 103162 153494 93425 105429 156283 
60-64 77150 90921 114194 82667 96681 121200 
65-69 56845 72231 83129 67057 80227 92071 
70-74 38053 53449 65266 47679 64956 77990 
75-79 25716 32707 43761 37435 45832 56895 
80+ 19442 25868 39223 36724 46413 66029 

xrange <- range(c(0,wp)) 
yrange <- range(c(0,nrow(wp))) 

然後是繪製部(兩組):

par(mfcol=c(1,2)) 
par(mar=c(5,4,4,0)) 
plot(NA,type="n", main="Men", xlab="", ylab="", xaxs="i", 
    xlim=rev(xrange), ylim=yrange, axes=FALSE, yaxs="i") 
rect(xrange[1],yrange[1],xrange[2],yrange[2], col="cadetblue") 
abline(v=seq(0,xrange[2],by=1e5), col="white") 
# All years with bars you want to represent filled 
# should be entered in reverse order 
polygon(c(0,rep(wp$M.2000,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col="lightblue",border="lightblue") 
polygon(c(0,rep(wp$M.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col="darkblue",border="darkblue") 
# And those you want with just a border, afterwards: 
polygon(c(0,rep(wp$M.2010,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col=NA,border="darkred",lwd=2) 
axis(1, at=c(0,1e5,2e5,3e5), labels=format(c(0,1e5,2e5,3e5),scientific=FALSE)) 
axis(2, at=1:nrow(wp)-0.5,labels=row.names(wp),las=2) 
box() 

par(mar=c(5,0,4,4)) 
plot(NA,type="n", main="Women", xlab="", ylab="", xaxs="i", 
    xlim=xrange, ylim=yrange, axes=FALSE, yaxs="i") 
rect(xrange[1],yrange[1],xrange[2],yrange[2], col="cadetblue") 
abline(v=seq(0,xrange[2],by=1e5), col="white") 
polygon(c(0,rep(wp$F.2000,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col="lightblue",border="lightblue") 
polygon(c(0,rep(wp$F.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col="darkblue",border="darkblue") 
polygon(c(0,rep(wp$F.2010,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col=NA,border="darkred",lwd=2) 
axis(1, at=c(0,1e5,2e5,3e5), labels=format(c(0,1e5,2e5,3e5),scientific=FALSE)) 
axis(4, at=1:nrow(wp)-0.5,labels=row.names(wp),las=2) 
box() 

enter image description here

爲了規避@突出問題Spacedman在他的評論中說,你可以在某些年份使用alpha。

library(scales) 
[...] 
polygon(c(0,rep(wp$M.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col=alpha("darkblue",0.4),border="darkblue") 
[...] 
polygon(c(0,rep(wp$F.1990,each=2), 0), c(0,0,rep(1:nrow(wp),each=2)), 
     col=alpha("darkblue",0.4),border="darkblue") 
[...] 

enter image description here