2015-07-21 65 views
2

我有一個爲我準備好的數據框,顯然有些列由一些基礎機制組合在一起。如何以這種方式對列名進行分組,以及如何再次將它們分開?如何在此數據框中對列名進行分組

y.1$Address訪問,與一個「Address.XXX」開頭的所有列

> y.1 
     Address.streetAddress Address.position.latitude Address.position.longitude Address.namedAreas Address.region.municipalityName Address.region.countyName Address.ocean nothing rent floor livingArea 
    19 Västmannagatan 85C     59.34500     18.04370   Vasastan      Stockholm   Stockholms län   2325 4100000 1586 1.0  40.0 
    29   Redargatan 3     59.30279     18.09048 Hammarby Sjöstad      Stockholm   Stockholms län   1570 2800000 2829 4.0  43.5 
    18 Doktor Abelins gata 6     59.31596     18.05454   Södermalm      Stockholm   Stockholms län   1223 4875000 3092 NA  70.0 
    75  Sibeliusgången 34     59.41581     17.91272    Akalla      Stockholm   Stockholms län   NA 1800000 4876 4.0  80.9 
    16  Standarvägen 1     59.27604     18.00459  Gamla Älvsjö      Stockholm   Stockholms län   6360 2950000 3983 1.0  91.0 
    32 Kungsbro Strand 17     59.33027     18.05143  Kungsholmen      Stockholm   Stockholms län   1086 1995000 2017 1.0  25.5 
    54  Pipersgatan 16     59.33057     18.04588  Kungsholmen      Stockholm   Stockholms län   1405 2195000 2105 3.0  27.0 
    22 Alva Myrdals gata 4     59.28650     17.95199 Fruängen-Hägersten      Stockholm   Stockholms län   NA 1995000 2587 3.0  37.0 
    35 Norr Mälarstrand 24     59.32687     18.04522  Kungsholmen      Stockholm   Stockholms län   1437 2195000 910 4.0  23.0 
    4 Beckbrännarbacken 7     59.31487     18.08901   Södermalm      Stockholm   Stockholms län   329 1395000 520 0.5  11.0 
> colnames(y.1)[1] <- "nothing" 
> y.1 
    nothing.streetAddress nothing.position.latitude nothing.position.longitude nothing.namedAreas nothing.region.municipalityName nothing.region.countyName nothing.ocean listPrice rent floor livingArea 
19 Västmannagatan 85C     59.34500     18.04370   Vasastan      Stockholm   Stockholms län   2325 4100000 1586 1.0  40.0 
29   Redargatan 3     59.30279     18.09048 Hammarby Sjöstad      Stockholm   Stockholms län   1570 2800000 2829 4.0  43.5 
18 Doktor Abelins gata 6     59.31596     18.05454   Södermalm      Stockholm   Stockholms län   1223 4875000 3092 NA  70.0 
75  Sibeliusgången 34     59.41581     17.91272    Akalla      Stockholm   Stockholms län   NA 1800000 4876 4.0  80.9 
16  Standarvägen 1     59.27604     18.00459  Gamla Älvsjö      Stockholm   Stockholms län   6360 2950000 3983 1.0  91.0 
32 Kungsbro Strand 17     59.33027     18.05143  Kungsholmen      Stockholm   Stockholms län   1086 1995000 2017 1.0  25.5 
54  Pipersgatan 16     59.33057     18.04588  Kungsholmen      Stockholm   Stockholms län   1405 2195000 2105 3.0  27.0 
22 Alva Myrdals gata 4     59.28650     17.95199 Fruängen-Hägersten      Stockholm   Stockholms län   NA 1995000 2587 3.0  37.0 
35 Norr Mälarstrand 24     59.32687     18.04522  Kungsholmen      Stockholm   Stockholms län   1437 2195000 910 4.0  23.0 
4 Beckbrännarbacken 7     59.31487     18.08901   Södermalm      Stockholm   Stockholms län   329 1395000 520 0.5  11.0 

> dput(y.1) 
structure(list(Address = structure(list(address = structure(list(
    streetAddress = c("Västmannagatan 85C", "Redargatan 3", "Doktor Abelins gata 6", 
    "Sibeliusgången 34", "Standarvägen 1", "Kungsbro Strand 17", 
    "Pipersgatan 16", "Alva Myrdals gata 4", "Norr Mälarstrand 24", 
    "Beckbrännarbacken 7")), .Names = "streetAddress", row.names = c(19L, 
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame"), 
    position = structure(list(latitude = c(59.3449965, 59.3027897, 
    59.3159556, 59.4158109, 59.27603539, 59.33027358, 59.330567, 
    59.28649604, 59.326869, 59.314867), longitude = c(18.0437004, 
    18.0904824, 18.054536, 17.91271847, 18.00459327, 18.05143325, 
    18.045882, 17.95199275, 18.045217, 18.089009)), .Names = c("latitude", 
    "longitude"), row.names = c(19L, 29L, 18L, 75L, 16L, 32L, 
    54L, 22L, 35L, 4L), class = "data.frame"), namedAreas = list(
     "Vasastan", "Hammarby Sjöstad", "Södermalm", "Akalla", 
     "Gamla Älvsjö", "Kungsholmen", "Kungsholmen", "Fruängen-Hägersten", 
     "Kungsholmen", "Södermalm"), region = structure(list(
     municipalityName = c("Stockholm", "Stockholm", "Stockholm", 
     "Stockholm", "Stockholm", "Stockholm", "Stockholm", "Stockholm", 
     "Stockholm", "Stockholm"), countyName = c("Stockholms län", 
     "Stockholms län", "Stockholms län", "Stockholms län", 
     "Stockholms län", "Stockholms län", "Stockholms län", 
     "Stockholms län", "Stockholms län", "Stockholms län")), .Names = c("municipalityName", 
    "countyName"), row.names = c(19L, 29L, 18L, 75L, 16L, 32L, 
    54L, 22L, 35L, 4L), class = "data.frame"), distance = structure(list(
     ocean = c(2325L, 1570L, 1223L, NA, 6360L, 1086L, 1405L, 
     NA, 1437L, 329L)), .Names = "ocean", row.names = c(19L, 
    29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame")), .Names = c("address", 
"position", "namedAreas", "region", "distance"), row.names = c(19L, 
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame"), 
    nothing = c(4100000L, 2800000L, 4875000L, 1800000L, 2950000L, 
    1995000L, 2195000L, 1995000L, 2195000L, 1395000L), rent = c(1586L, 
    2829L, 3092L, 4876L, 3983L, 2017L, 2105L, 2587L, 910L, 520L 
    ), floor = c(1, 4, NA, 4, 1, 1, 3, 3, 4, 0.5), livingArea = c(40, 
    43.5, 70, 80.9, 91, 25.5, 27, 37, 23, 11), source = structure(list(
     name = c("BOSTHLM", "Fastighetsbyrån", "Gripsholms Fastighetsförmedling", 
     "Fastighetsbyrån", "Fastighetsbyrån", "Mäklarhuset", 
     "SkandiaMäklarna", "Svenska Mäklarhuset", "Svensk Fastighetsförmedling", 
     "Svensk Fastighetsförmedling"), id = c(1499L, 1573L, 
     9895524L, 1573L, 1573L, 204L, 1570L, 58L, 713L, 713L), 
     type = c("Broker", "Broker", "Broker", "Broker", "Broker", 
     "Broker", "Broker", "Broker", "Broker", "Broker"), url = c("http://www.bosthlm.se/", 
     "http://www.fastighetsbyran.se/", "http://gripsholms.se/", 
     "http://www.fastighetsbyran.se/", "http://www.fastighetsbyran.se/", 
     "http://www.maklarhuset.se/", "http://www.skandiamaklarna.se/", 
     "http://www.svenskamaklarhuset.se/", "http://www.svenskfast.se/", 
     "http://www.svenskfast.se/")), .Names = c("name", "id", 
    "type", "url"), row.names = c(19L, 29L, 18L, 75L, 16L, 32L, 
    54L, 22L, 35L, 4L), class = "data.frame"), rooms = c(2, 1.5, 
    2.5, 3, 3.5, 1, 1, 2, 1, 1), published = structure(c(16632, 
    16631, 16631, 16629, 16626, 16626, 16626, 16626, 16626, 16626 
    ), class = "Date"), constructionYear = c(NA, 2008L, 1929L, 
    1977L, 1937L, 1934L, 1934L, NA, 1907L, 1929L), objectType = c("Lägenhet", 
    "Lägenhet", "Lägenhet", "Lägenhet", "Lägenhet", "Lägenhet", 
    "Lägenhet", "Lägenhet", "Lägenhet", "Lägenhet"), booliId = c(1919949L, 
    1893141L, 1896584L, 1898347L, 1917520L, 1918305L, 1918270L, 
    1918145L, 1918063L, 1918049L), soldDate = structure(c(16635, 
    16633, 16636, 16630, 16636, 16632, 16632, 16635, 16632, 16636 
    ), class = "Date"), soldPrice = c(4100000L, 2950000L, 5175000L, 
    1800000L, 4200000L, 2510000L, 2610000L, 2500000L, 2950000L, 
    1850000L), url = c("https://www.booli.se/bostad/lagenhet/vasastan/vastmannagatan+85c/1919949", 
    "https://www.booli.se/bostad/lagenhet/hammarby+sjostad/redargatan+3/1893141", 
    "https://www.booli.se/bostad/lagenhet/sodermalm/doktor+abelins+gata+6/1896584", 
    "https://www.booli.se/bostad/lagenhet/akalla/sibeliusgangen+34/1898347", 
    "https://www.booli.se/bostad/lagenhet/gamla+alvsjo/standarvagen+1/1917520", 
    "https://www.booli.se/bostad/lagenhet/kungsholmen/kungsbro+strand+17/1918305", 
    "https://www.booli.se/bostad/lagenhet/kungsholmen/pipersgatan+16/1918270", 
    "https://www.booli.se/bostad/lagenhet/fruangen-hagersten/alva+myrdals+gata+4/1918145", 
    "https://www.booli.se/bostad/lagenhet/kungsholmen/norr+malarstrand+24/1918063", 
    "https://www.booli.se/bostad/lagenhet/sodermalm/beckbrannarbacken+7/1918049" 
    ), isNewConstruction = c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_), plotArea = c(NA, NA, NA, NA, NA, 
    0L, NA, 0L, NA, NA), additionalArea = c(NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), AreaSize = structure(c(4L, 
    4L, 7L, 8L, 9L, 2L, 2L, 3L, 2L, 1L), .Label = c("10", "20", 
    "30", "40", "50", "60", "70", "80", "90", "100", "110", "120", 
    "130"), class = "factor"), PriceDiff = c(0L, 150000L, 300000L, 
    0L, 1250000L, 515000L, 415000L, 505000L, 755000L, 455000L 
    )), .Names = c("Address", "nothing", "rent", "floor", "livingArea", 
"source", "rooms", "published", "constructionYear", "objectType", 
"booliId", "soldDate", "soldPrice", "url", "isNewConstruction", 
"plotArea", "additionalArea", "AreaSize", "PriceDiff"), row.names = c(19L, 
29L, 18L, 75L, 16L, 32L, 54L, 22L, 35L, 4L), class = "data.frame") 
+0

所以它不是一個data.frame,HTTP://計算器.com/questions/31533936/knitr-error-in-usemethodround-any-no-applicable-method-for-round-any-appl – zx8754

+0

用'dput(y)'更新你的帖子,所以我們可以重現完全相同的數據集。 – zx8754

+1

好吧。寫作課(y.1)會生成一個data.frame響應。我已根據您的要求更新了dput(y)輸出的帖子。 – uncool

回答

0

它看起來像該數據結構從JSON解析出來。你可以把它從jsonlite

library(jsonlite) 
flat <- flatten(y.1) 
str(flat) 
# 'data.frame': 10 obs. of 28 variables: 
# $ nothing      : int 4100000 2800000 4875000 1800000 2950000 1995000 2195000 1995000 2195000 1395000 
# $ rent       : int 1586 2829 3092 4876 3983 2017 2105 2587 910 520 
# $ floor       : num 1 4 NA 4 1 1 3 3 4 0.5 
# ... 

所以flatten到正規data.frame與功能,嵌套data.frames已經擴大到各自的列。以另一種方式回去似乎更難,它看起來並不像這個包提供這種功能。您可以使用生成嵌套列表的遞歸函數獲得類似的結構,其中基本元素可以是data.framesdata.frames基本上都是列表,所以這個結構不算太遠。這裏有兩個函數,一個產生嵌套data.frames,另一個產生嵌套列表(我認爲更標準的結構)。

## A function to create the nested data.frames structure 
collapse <- function(dat) { 
    prefixes <- gsub("([A-Za-z]+)\\..*", "\\1", names(dat)) 
    ns <- table(prefixes) 
    out <- dat[, !(prefixes %in% names(ns)[ns>1])] 
    for (n in names(ns)[ns>1]) { 
     inds <- grepl(n, names(dat)) 
     subDat <- setNames(dat[, inds], gsub(sprintf("^%s\\.", n), "", names(dat[, inds]))) 
     res <- collapse(subDat) 
     out[[n]] <- res 
    } 
    return(out) 
} 

out <- collapse(flat) 
str(out) 

# 'data.frame': 10 obs. of 19 variables: 
# $ nothing   : int 4100000 2800000 4875000 1800000 2950000 1995000 2195000 1995000 2195000 1395000 
# $ rent    : int 1586 2829 3092 4876 3983 2017 2105 2587 910 520 
# $ floor   : num 1 4 NA 4 1 1 3 3 4 0.5 
# ... 
# $ source   :'data.frame': 10 obs. of 4 variables: 
# ..$ name: chr "BOSTHLM" "Fastighetsbyrån" "Gripsholms Fastighetsförmedling" "Fastighetsbyrån" ... 
# ..$ id : int 1499 1573 9895524 1573 1573 204 1570 58 713 713 
# ..$ type: chr "Broker" "Broker" "Broker" "Broker" ... 
# ... 

## Function to produce nested lists that may contain data.frames as 
## base elements 
collapseNestedLists <- function(dat) { 
    prefixes <- gsub("([A-Za-z]+)\\..*", "\\1", names(dat)) 
    ns <- table(prefixes) 
    out <- as.list(dat[, !(prefixes %in% names(ns)[ns>1])]) 
    for (n in names(ns)[ns>1]) { 
     inds <- grepl(n, names(dat)) 
     subDat <- setNames(dat[, inds], gsub(sprintf("^%s\\.", n), "", names(dat[, inds]))) 
     res <- collapse(subDat) 
     out[[n]] <- as.list(res) 
    } 
    return(out) 
} 

outList <- collapseNestedLists(flat) 

這兩種數據結構都可以以基本相同的方式訪問。如果你想拔出地址或這些(所以在嵌套在data.frame內的原有結構的data.frame元素)的位置,

all.equal(
    outList$Address$position$latitude[1], 
    out$Address$position$latitude[1], 
    y.1$Address$position$latitude[1], 
    y.1[["Address"]][["position"]][["latitude"]][1] 
) 
# TRUE 
相關問題