2016-03-03 77 views
1

我有將嵌套列表轉換爲data.frame的問題。將嵌套列表轉換爲R中的data.frame

首先,我已經在JSON格式下載數據集從數據API:

request2 <- 
    POST(
    url = "https://xxxx", 
    add_headers('x-dataapi-key' = "xxxx", 'content-type' = "application/json"), 
    body = list(oib = oibreq), 
    encode = "json" 
) 
jsonContent2 <- content(request2, type = "application/json") 
json2 <- 
    fromJSON(toJSON(jsonContent2, null = "null"), flatten = TRUE) 

對象json2具有嵌套列表的形式。這裏有一個數據集:

> sample <- dput(json2) 
structure(
    list(
    datumStanja = list(
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00", 
     "2016-03-02T00:00:00+01:00" 
    ), 
    oib = list(
     "00045103869", 
     "92680516748", 
     "18527887472", 
     "18527887472", 
     "18527887472", 
     "18527887472", 
     "00045103869", 
     "00045103869", 
     "18527887472", 
     "92680516748" 
    ), 
    iban = list(
     "HR0424840081101570980", 
     "HR8623400091110462926", 
     "HR9123400091110714260", 
     "HR5124850031100201015", 
     "HR4224910051100006698", 
     "HR7524810001100101268", 
     "HR8225000091101167416", 
     "HR3223400091110156505", 
     "HR6323400091110193874", 
     "HR4223300031100429609" 
    ), 
    blokada = list(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
        FALSE, FALSE), 
    vbdi = list(
     "2484008", 
     "2340009", 
     "2340009", 
     "2485003", 
     "2491005", 
     "2481000", 
     "2500009", 
     "2340009", 
     "2340009", 
     "2330003" 
    ), 
    brojRacuna = list(
     "1101570980", 
     "1110462926", 
     "1110714260", 
     "1100201015", 
     "1100006698", 
     "1100101268", 
     "1101167416", 
     "1110156505", 
     "1110193874", 
     "1100429609" 
    ), 
    banka = list(
     "RAIFFEISENBANK AUSTRIA d.d.", 
     "PRIVREDNA BANKA ZAGREB d.d.", 
     "PRIVREDNA BANKA ZAGREB d.d.", 
     "CROATIA BANKA d.d.", 
     "CREDO BANKA d.d.", 
     "KREDITNA BANKA ZAGREB d.d.", 
     "HYPO ALPE-ADRIA-BANK d.d.", 
     "PRIVREDNA BANKA ZAGREB d.d.", 
     "PRIVREDNA BANKA ZAGREB d.d.", 
     "SOCIETE GENERALE - SPLITSKA BANKA d.d." 
    ), 
    datumOtvaranja = list(
     "2003-02-19T00:00:00+01:00", 
     "2011-02-08T00:00:00+01:00", 
     "2015-03-30T00:00:00+02:00", 
     "2002-02-21T00:00:00+01:00", 
     "2002-04-16T00:00:00+02:00", 
     "2002-06-24T00:00:00+02:00", 
     "2004-07-19T00:00:00+02:00", 
     "2004-09-08T00:00:00+02:00", 
     "2005-09-28T00:00:00+02:00", 
     "2009-12-21T00:00:00+01:00" 
    ), 
    datumZatvaranja = list(
     NULL, 
     NULL, 
     NULL, 
     "2009-11-06T00:00:00+01:00", 
     "2009-02-17T00:00:00+01:00", 
     "2009-03-18T00:00:00+01:00", 
     "2008-08-14T00:00:00+02:00", 
     "2009-07-13T00:00:00+02:00", 
     "2013-09-18T00:00:00+02:00", 
     "2013-07-09T00:00:00+02:00" 
    ), 
    povijestBlokada = list(
     structure(
     list(
      pocetak = list(
      "2011-08-04T00:00:00+02:00", 
      "2011-09-06T00:00:00+02:00", 
      "2011-11-25T00:00:00+01:00", 
      "2011-12-30T00:00:00+01:00", 
      "2012-02-20T00:00:00+01:00", 
      "2012-03-23T00:00:00+01:00", 
      "2012-05-21T00:00:00+02:00" 
     ), 
      kraj = list(
      "2011-08-10T00:00:00+02:00", 
      "2011-09-13T00:00:00+02:00", 
      "2011-12-28T00:00:00+01:00", 
      "2012-01-16T00:00:00+01:00", 
      "2012-03-16T00:00:00+01:00", 
      "2012-05-16T00:00:00+02:00", 
      NULL 
     ), 
      brojDana = list(6L, 7L, 33L, 17L, 25L, 54L, 
          1381L) 
     ), 
     .Names = c("pocetak", "kraj", "brojDana"), 
     class = "data.frame", 
     row.names = c(NA, 7L) 
    ), 
     structure(
     list(
      pocetak = list(
      "2012-05-30T00:00:00+02:00", 
      "2012-06-21T00:00:00+02:00", 
      "2012-06-29T00:00:00+02:00", 
      "2012-09-06T00:00:00+02:00", 
      "2014-06-09T00:00:00+02:00" 
     ), 
      kraj = list(
      "2012-06-05T00:00:00+02:00", 
      "2012-06-26T00:00:00+02:00", 
      "2012-07-03T00:00:00+02:00", 
      "2013-03-06T00:00:00+01:00", 
      NULL 
     ), 
      brojDana = list(6L, 5L, 4L, 181L, 632L) 
     ), 
     .Names = c("pocetak", "kraj", 
        "brojDana"), 
     class = "data.frame", 
     row.names = c(NA, 5L) 
    ), 

     structure(
     list(
      pocetak = list("2015-03-31T00:00:00+02:00"), 
      kraj = list("2015-09-30T00:00:00+02:00"), 
      brojDana = list(183L) 
     ), 
     .Names = c("pocetak", "kraj", "brojDana"), 
     class = "data.frame", 
     row.names = 1L 
    ), 
     structure(
     list(), 
     .Names = character(0), 
     row.names = integer(0), 
     class = "data.frame" 
    ), 
     structure(
     list(), 
     .Names = character(0), 
     row.names = integer(0), 
     class = "data.frame" 
    ), 
     structure(
     list(), 
     .Names = character(0), 
     row.names = integer(0), 
     class = "data.frame" 
    ), 
     structure(
     list(), 
     .Names = character(0), 
     row.names = integer(0), 
     class = "data.frame" 
    ), 
     structure(
     list(), 
     .Names = character(0), 
     row.names = integer(0), 
     class = "data.frame" 
    ), 
     structure(
     list(), 
     .Names = character(0), 
     row.names = integer(0), 
     class = "data.frame" 
    ), 
     structure(
     list(
      pocetak = list(
      "2012-05-30T00:00:00+02:00", 
      "2012-06-21T00:00:00+02:00", 
      "2012-06-29T00:00:00+02:00", 
      "2012-09-06T00:00:00+02:00" 
     ), 
      kraj = list(
      "2012-06-05T00:00:00+02:00", 
      "2012-06-26T00:00:00+02:00", 
      "2012-07-03T00:00:00+02:00", 
      "2013-03-06T00:00:00+01:00" 
     ), 
      brojDana = list(6L, 
          5L, 4L, 181L) 
     ), 
     .Names = c("pocetak", "kraj", "brojDana"), 
     class = "data.frame", 
     row.names = c(NA, 4L) 
    ) 
    ), 
    isActive = list(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
        FALSE, FALSE) 
), 
    .Names = c(
    "datumStanja", 
    "oib", 
    "iban", 
    "blokada", 
    "vbdi", 
    "brojRacuna", 
    "banka", 
    "datumOtvaranja", 
    "datumZatvaranja", 
    "povijestBlokada", 
    "isActive" 
), 
    class = "data.frame", 
    row.names = c(NA, 
       10L) 
) 

這是第一個生爲例結構:

str(json2[1,]) 
'data.frame': 1 obs. of 11 variables: 
$ datumStanja :List of 1 
    ..$ : chr "2016-03-02T00:00:00+01:00" 
$ oib   :List of 1 
    ..$ : chr "00045103869" 
$ iban   :List of 1 
    ..$ : chr "HR0424840081101570980" 
$ blokada  :List of 1 
    ..$ : logi TRUE 
$ vbdi   :List of 1 
    ..$ : chr "2484008" 
$ brojRacuna  :List of 1 
    ..$ : chr "1101570980" 
$ banka   :List of 1 
    ..$ : chr "RAIFFEISENBANK AUSTRIA d.d." 
$ datumOtvaranja :List of 1 
    ..$ : chr "2003-02-19T00:00:00+01:00" 
$ datumZatvaranja:List of 1 
    ..$ : NULL 
$ povijestBlokada:List of 1 
    ..$ :'data.frame': 7 obs. of 3 variables: 
    .. ..$ pocetak :List of 7 
    .. .. ..$ : chr "2011-08-04T00:00:00+02:00" 
    .. .. ..$ : chr "2011-09-06T00:00:00+02:00" 
    .. .. ..$ : chr "2011-11-25T00:00:00+01:00" 
    .. .. ..$ : chr "2011-12-30T00:00:00+01:00" 
    .. .. ..$ : chr "2012-02-20T00:00:00+01:00" 
    .. .. ..$ : chr "2012-03-23T00:00:00+01:00" 
    .. .. ..$ : chr "2012-05-21T00:00:00+02:00" 
    .. ..$ kraj :List of 7 
    .. .. ..$ : chr "2011-08-10T00:00:00+02:00" 
    .. .. ..$ : chr "2011-09-13T00:00:00+02:00" 
    .. .. ..$ : chr "2011-12-28T00:00:00+01:00" 
    .. .. ..$ : chr "2012-01-16T00:00:00+01:00" 
    .. .. ..$ : chr "2012-03-16T00:00:00+01:00" 
    .. .. ..$ : chr "2012-05-16T00:00:00+02:00" 
    .. .. ..$ : NULL 
    .. ..$ brojDana:List of 7 
    .. .. ..$ : int 6 
    .. .. ..$ : int 7 
    .. .. ..$ : int 33 
    .. .. ..$ : int 17 
    .. .. ..$ : int 25 
    .. .. ..$ : int 54 
    .. .. ..$ : int 1381 
$ isActive  :List of 1 
    ..$ : logi TRUE 

正如你所看到的,變量「povijestBlokada」有內部列表清單。我的目標是將此嵌套列表對象轉換爲data.frame,每個不同類型的值包含一列

。我嘗試過使用data.tree包,但我無法使用as.Node函數轉換ii。你有什麼建議嗎?

我加入原始JSON數據(我已經使用dput,肯定有更好的辦法做到的,但我不知道如何):

structure("[{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR0424840081101570980\"],\"blokada\":[true],\"vbdi\":[\"2484008\"],\"brojRacuna\":[\"1101570980\"],\"banka\":[\"RAIFFEISENBANK AUSTRIA d.d.\"],\"datumOtvaranja\":[\"2003-02-19T00:00:00+01:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2011-08-04T00:00:00+02:00\"],\"kraj\":[\"2011-08-10T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2011-09-06T00:00:00+02:00\"],\"kraj\":[\"2011-09-13T00:00:00+02:00\"],\"brojDana\":[7]},{\"pocetak\":[\"2011-11-25T00:00:00+01:00\"],\"kraj\":[\"2011-12-28T00:00:00+01:00\"],\"brojDana\":[33]},{\"pocetak\":[\"2011-12-30T00:00:00+01:00\"],\"kraj\":[\"2012-01-16T00:00:00+01:00\"],\"brojDana\":[17]},{\"pocetak\":[\"2012-02-20T00:00:00+01:00\"],\"kraj\":[\"2012-03-16T00:00:00+01:00\"],\"brojDana\":[25]},{\"pocetak\":[\"2012-03-23T00:00:00+01:00\"],\"kraj\":[\"2012-05-16T00:00:00+02:00\"],\"brojDana\":[54]},{\"pocetak\":[\"2012-05-21T00:00:00+02:00\"],\"kraj\":null,\"brojDana\":[1389]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"92680516748\"],\"iban\":[\"HR8623400091110462926\"],\"blokada\":[true],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110462926\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2011-02-08T00:00:00+01:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2012-05-30T00:00:00+02:00\"],\"kraj\":[\"2012-06-05T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2012-06-21T00:00:00+02:00\"],\"kraj\":[\"2012-06-26T00:00:00+02:00\"],\"brojDana\":[5]},{\"pocetak\":[\"2012-06-29T00:00:00+02:00\"],\"kraj\":[\"2012-07-03T00:00:00+02:00\"],\"brojDana\":[4]},{\"pocetak\":[\"2012-09-06T00:00:00+02:00\"],\"kraj\":[\"2013-03-06T00:00:00+01:00\"],\"brojDana\":[181]},{\"pocetak\":[\"2014-06-09T00:00:00+02:00\"],\"kraj\":null,\"brojDana\":[640]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR9123400091110714260\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110714260\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2015-03-30T00:00:00+02:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2015-03-31T00:00:00+02:00\"],\"kraj\":[\"2015-09-30T00:00:00+02:00\"],\"brojDana\":[183]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR5124850031100201015\"],\"blokada\":[false],\"vbdi\":[\"2485003\"],\"brojRacuna\":[\"1100201015\"],\"banka\":[\"CROATIA BANKA d.d.\"],\"datumOtvaranja\":[\"2002-02-21T00:00:00+01:00\"],\"datumZatvaranja\":[\"2009-11-06T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR4224910051100006698\"],\"blokada\":[false],\"vbdi\":[\"2491005\"],\"brojRacuna\":[\"1100006698\"],\"banka\":[\"CREDO BANKA d.d.\"],\"datumOtvaranja\":[\"2002-04-16T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-02-17T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR7524810001100101268\"],\"blokada\":[false],\"vbdi\":[\"2481000\"],\"brojRacuna\":[\"1100101268\"],\"banka\":[\"KREDITNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2002-06-24T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-03-18T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR8225000091101167416\"],\"blokada\":[false],\"vbdi\":[\"2500009\"],\"brojRacuna\":[\"1101167416\"],\"banka\":[\"HYPO ALPE-ADRIA-BANK d.d.\"],\"datumOtvaranja\":[\"2004-07-19T00:00:00+02:00\"],\"datumZatvaranja\":[\"2008-08-14T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR3223400091110156505\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110156505\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2004-09-08T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-07-13T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR6323400091110193874\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110193874\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2005-09-28T00:00:00+02:00\"],\"datumZatvaranja\":[\"2013-09-18T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"92680516748\"],\"iban\":[\"HR4223300031100429609\"],\"blokada\":[false],\"vbdi\":[\"2330003\"],\"brojRacuna\":[\"1100429609\"],\"banka\":[\"SOCIETE GENERALE - SPLITSKA BANKA d.d.\"],\"datumOtvaranja\":[\"2009-12-21T00:00:00+01:00\"],\"datumZatvaranja\":[\"2013-07-09T00:00:00+02:00\"],\"povijestBlokada\":[{\"pocetak\":[\"2012-05-30T00:00:00+02:00\"],\"kraj\":[\"2012-06-05T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2012-06-21T00:00:00+02:00\"],\"kraj\":[\"2012-06-26T00:00:00+02:00\"],\"brojDana\":[5]},{\"pocetak\":[\"2012-06-29T00:00:00+02:00\"],\"kraj\":[\"2012-07-03T00:00:00+02:00\"],\"brojDana\":[4]},{\"pocetak\":[\"2012-09-06T00:00:00+02:00\"],\"kraj\":[\"2013-03-06T00:00:00+01:00\"],\"brojDana\":[181]}],\"isActive\":[false]}]", class = "json") 
+0

你見過第二次[此帖](http://stackoverflow.com/questions/35444968/read-json-file -INTO-A-數據幀,而無需-嵌套列表)?開始的數據結構似乎有點不同,但也許有一個答案會指出你在正確的方向。 – A5C1D2H2I1M1N2O1R2T1

+0

ananda mahto我看到了答案。我想有一些像面板數據一樣的東西。對於嵌套列表的不同值集合,將會出現其他clomuns的相同值 – Mislav

+0

您能否提供原始json? –

回答

0

我設法UNNEST json2$povijestBlokada與庫dplyr

  • 轉換列表爲tbl_dfas_data_frame
  • 選擇嵌套元素povijestBlokada和UNNEST它。
  • 問題是你的NULL在某些列中的值,但用"NA"代替它們的伎倆。如果你在第一個地方有NA,我覺得你可以UNNEST
library(dplyr) 
DT <- as_data_frame(json2) 
DT1 <- DT %>% select(povijestBlokada) %>% unnest() 
DT1 %>% rowwise() %>% mutate_each(funs(replace(., is.null(.), "NA"))) 
#> Source: local data frame [17 x 3] 
#> Groups: <by row> 
#> 
#>      pocetak      kraj brojDana 
#>      (chr)      (chr) (chr) 
#> 1 2011-08-04T00:00:00+02:00 2011-08-10T00:00:00+02:00  6 
#> 2 2011-09-06T00:00:00+02:00 2011-09-13T00:00:00+02:00  7 
#> 3 2011-11-25T00:00:00+01:00 2011-12-28T00:00:00+01:00  33 
#> 4 2011-12-30T00:00:00+01:00 2012-01-16T00:00:00+01:00  17 
#> 5 2012-02-20T00:00:00+01:00 2012-03-16T00:00:00+01:00  25 
#> 6 2012-03-23T00:00:00+01:00 2012-05-16T00:00:00+02:00  54 
#> 7 2012-05-21T00:00:00+02:00      NA  1381 
#> 8 2012-05-30T00:00:00+02:00 2012-06-05T00:00:00+02:00  6 
#> 9 2012-06-21T00:00:00+02:00 2012-06-26T00:00:00+02:00  5 
#> 10 2012-06-29T00:00:00+02:00 2012-07-03T00:00:00+02:00  4 
#> 11 2012-09-06T00:00:00+02:00 2013-03-06T00:00:00+01:00  181 
#> 12 2014-06-09T00:00:00+02:00      NA  632 
#> 13 2015-03-31T00:00:00+02:00 2015-09-30T00:00:00+02:00  183 
#> 14 2012-05-30T00:00:00+02:00 2012-06-05T00:00:00+02:00  6 
#> 15 2012-06-21T00:00:00+02:00 2012-06-26T00:00:00+02:00  5 
#> 16 2012-06-29T00:00:00+02:00 2012-07-03T00:00:00+02:00  4 
#> 17 2012-09-06T00:00:00+02:00 2013-03-06T00:00:00+01:00  181 
+0

我不知道這有助於。首先,每個元素都是列表內的列表。例如'DT [['brojDana']]'的第一個元素是'[[1]] [[1]] 6'。其次,我希望這個表格成爲「父表格」的一部分,其中重複了「pocetak」,「kraj」或「brojDana」行的colone重複值。似乎難題。 – Mislav

相關問題