2013-04-23 73 views
1

我有一個list「hhvrs」長度爲2的名稱和值。列表中這兩個元素的名稱是「1920」和「1929」年。奇怪的lapply問題我不明白

$`1920` 
     Nykvarn - 147 - 211920  Nykvarn - 262 - 211920 ... 
        1.235629      1.013191 ... 
$`1929` 
     Långed - 125 - 11929   Långed - 126 - 11929 ... 
        1.316499      1.026785 ... 

我也有一個data.frame「數據」由兩年,1920年,1929年見dput在這個崗位的底部。
然後,我想要消極地匹配名稱(即不包括上述list中存在的那些名稱)。換句話說,我想在我的數據框中保留上一列uniquezCorrectCG中的名稱,這些名稱不在上面的list中。然後,我想計算每個公司的名單中沒有名稱的效率。

這裏是我的代碼:

hhvrsu=lapply(unique(data$year),function(x){ 
library(Benchmarking) 
datat=data[data$year==x,] 
datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[x]])),] 

# 
y <- datat2[,"Ouput_ton",drop=FALSE] 
rownames(y)=paste(datat2[,5],"-",datat2[,4]) 

#inputs 
    x=with(datat2, 
    cbind(Labour_input_1000_hour, 
      Capital_input_1000_sek, 
      Electric_input_Mwh, 
      Rawmaterial_input_M3)) 

rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3]) 
e <- dea(x,y,RTS="vrs") 
return(e$eff) }    
)  
names(hhvrsu)=unique(data$year) 

但是失敗了。例如,公司Långed - 125 - 11929年1929年是我的代碼的輸出仍然存在,而應該被丟棄,因爲Långed - 125 - 11929出現在上面的列表...

head(hhvrsu[["1929"]]) 

Billingsfors - 123 - 11929 Billingsfors - 124 - 11929  Långed - 125 - 11929  Långed - 126 - 11929  Långed - 127 - 11929 
       0.9975506     1.0000000     1.0000000     1.0000000     1.0000000 
    Hånsfors - 183 - 21929 
       0.9928677 

it still works if i do it manually

datat=data[data$year==1929,] 


    datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[["1929"]])),] 

    # 
    y <- datat2[,"Ouput_ton",drop=FALSE] 
    rownames(y)=paste(datat2[,5],"-",datat2[,4]) 


    #inputs 
    x=with(datat2,cbind(Labour_input_1000_hour,Capital_input_1000_sek,Electric_input_Mwh,Rawmaterial_input_M3)) 

    rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3]) 

    e <- dea(x,y,RTS="vrs") 

head(e$eff) 
Billingsfors - 123 - 11929 Billingsfors - 124 - 11929  Hånsfors - 183 - 21929 Hällefors - 237 - 21929  Grycksbo - 350 - 21929 
       0.9984071     1.0000000     1.0000000     0.5863832     0.9813024 
    Brättne - 100 - 31929 
       0.9915349 

在e $ eff上面Långed - 125 - 11929被丟棄!

EDIT

它的工作原理,如果我把,而不是簡單x

hhvrsu=lapply(unique(data$year),function(x){ 
    library(Benchmarking) 
    datat=data[data$year==x,] 
    datat2=datat[!(datat$uniquezCorrectCG %in% names(hhvrs[[**as.character(x)**]])),] 

    # 
    y <- datat2[,"Ouput_ton",drop=FALSE] 
    rownames(y)=paste(datat2[,5],"-",datat2[,4]) 

    #inputs 
     x=with(datat2, 
     cbind(Labour_input_1000_hour, 
       Capital_input_1000_sek, 
       Electric_input_Mwh, 
       Rawmaterial_input_M3)) 

    rownames(x)=paste(datat2[,5],"-",datat2[,4],"-",datat2[,3]) 
    e <- dea(x,y,RTS="vrs") 
    return(e$eff) }    
    )  
    names(hhvrsu)=unique(data$year) 

任何建議如下as.character(x)

Dputs:

dput(hhvrs) 
structure(list(`1920` = structure(c(1.23562876282578, 1.01319073788091, 
1.55783496400001, 1.06191988898698, 1.12744927131341, 1.08504615635299, 
1.25725741409574, 2.03370195312046, 1.00667697472372, 1.00260726981462, 
1.3050604346423, 1.3594555255334, 1.55671945006842, 1.0072581093466, 
1.65164991096899, 2.47385616808447, 1.18471196771314, 1.24186522915967, 
1.65133103063843, Inf, 1.16498198151401, 1.07017484481922), .Names = c("Nykvarn - 147 - 211920", 
"Nykvarn - 262 - 211920", "Tumba - 68 - 381920", "Byske - 294 - 451920", 
"Långed - 127 - 571920", "Väja - 270 - 691920", "Ljusfors - 141 - 731920", 
"Skärblacka - 370 - 731920", "Sätra - 152 - 781920", "Krokfors - 129 - 871920", 
"Åsen - 207 - 1011920", "Åsen - 208 - 1011920", "Lagerfors - 225 - 10121920", 
"Lindefors - 243 - 10281920", "Munksjö - 253 - 10281920", "Qvill - 211 - 10431920", 
"Esseltewell - 375 - 10521920", "Esseltewell - 376 - 10521920", 
"Ulriksfors - 205 - 10541920", "Sellnäs - 352 - 10541920", "Vivstavarv - 314 - 10751920", 
"Älvsborg - 369 - 10791920")), `1929` = structure(c(1.31649939189229, 
1.02678542256861, 1.50667886828221, 1.06101596031178, 1.00477142430659, 
Inf, 1.00038550231904, 1.10347307305662, 1.53782048667181, 1.80890790261425, 
1.06103833744605, 1.00036736526695, 1.01053736983199, 1.01119078294682, 
1.00295000872313, 1.01778128036389, 1.22049428994262, 1.15078822074877, 
1.00346763843347, 1.2192497185324, 1.03195112444193, 1.71491513543284, 
1.00168840525869, 1.00575972592046, 1.105483053952, 1.00427057272637, 
1.94482017228275, 1.00388363163126), .Names = c("Långed - 125 - 11929", 
"Långed - 126 - 11929", "Långed - 127 - 11929", "Hällefors - 234 - 21929", 
"Göteborg-Dals - 156 - 91929", "Papyrus - 280 - 231929", "Sofiehem - 330 - 271929", 
"Tollare - 66 - 361929", "Tumba - 68 - 381929", "Alstermo - 4 - 491929", 
"Billerud - 106 - 571929", "Fengersfors - 135 - 711929", "Gamlestaden - 153 - 821929", 
"Gransholm - 228 - 851929", "Åsen - 207 - 1011929", "Nykvarn - 262 - 1101929", 
"Haga - 24 - 10041929", "Ljusne - 218 - 10181929", "Husum - 232 - 10251929", 
"Munksjö - 253 - 10281929", "Pauliström - 239 - 10311929", "Qvill - 211 - 10431929", 
"Esseltewell - 375 - 10521929", "Ställdalen - 356 - 10531929", 
"Kvarnsveden - 343 - 10541929", "Skutskär - 345 - 10541929", 
"Sellnäs - 352 - 10541929", "Vivstavarv - 314 - 10751929"))), .Names = c("1920", 
"1929")) 

Dput data.frame

dput(data[data$year==1929,][1:5,]) 

structure(list(company_code = c(1L, 1L, 1L, 1L, 1L), company_name = c("AB Billingsfors-Långed", 
"AB Billingsfors-Långed", "AB Billingsfors-Långed", "AB Billingsfors-Långed", 
"AB Billingsfors-Långed"), year_cg_code = c(11929L, 11929L, 11929L, 
11929L, 11929L), plant_code = 123:127, plant_name = c("Billingsfors", 
"Billingsfors", "Långed", "Långed", "Långed"), plant_location = c("Billingsfors", 
"Billingsfors", "Dals Långed", "Dals Långed", "Dals Långed"), 
    plant_location_by_municipal = c("Bengtsfors", "Bengtsfors", 
    "Bengtsfors", "Bengtsfors", "Bengtsfors"), year = c(1929L, 
    1929L, 1929L, 1929L, 1929L), Output_value_1000_sek = c(720L, 
    2304L, 531L, 3040L, 2079L), Labour_cost_1000_sek = c(102L, 
    348L, 93L, 199L, 225L), Capital_cost_1000_sek = c(108L, 468L, 
    126L, 304L, 180L), Electricity_cost_1000_sek = c(130L, 90L, 
    10L, 120L, 40L), Raw_material_cost_1000_sek = c(174L, 744L, 
    177L, 1824L, 1080L), Output_price_1_sek.ton = c(220L, 220L, 
    220L, 220L, 220L), Output_price__sek.ton = c(196L, 196L, 
    196L, 196L, 196L), Labour_price_sek.hour = c(1, 1.208333333, 
    2.657142857, 1.093406593, 2.083333333), Capital_price_interest.rate = c(4.556666667, 
    4.556666667, 4.556666667, 4.556666667, 4.556666667), Motive_Power_pricekr.MwH = c(43.10344828, 
    67.61833208, 31.54574132, 93.45794393, 45.14672686), Electricity_price_kr.MwH = c(24.34456929, 
    24.19354839, 13.88888889, 25.26315789, 22.22222222), Raw_Material_price_kr.m3 = c(14.5, 
    15.5, 11.8, 19, 12), Mean_raw.material_price = c(14.3, 14.3, 
    14.3, 14.3, 14.3), Output_capacity_ton = c(6000L, 12000L, 
    3000L, 9500L, 9000L), Ouput_ton = c(3272L, 10472L, 2413L, 
    13818L, 9450L), Labour_input_1000_hour = c(102L, 288L, 35L, 
    182L, 108L), Capital_input_1000_sek = c(2853L, 1975L, 219L, 
    2634L, 878L), Motive_Power_Mwh = c(3016L, 1331L, 317L, 1284L, 
    886L), Electric_input_Mwh = c(5340, 3720, 720, 4750, 1800 
    ), Rawmaterial_input_M3 = c(12000, 48000, 15000, 96000, 90000 
    ), Capacity_Utilization = c(54.53333333, 87.26666667, 80.43333333, 
    145.4526316, 105), Labour_cost_share = c(14.16666667, 15.10416667, 
    17.51412429, 6.546052632, 10.82251082), Capital_cost_share = c(15, 
    20.3125, 23.72881356, 10, 8.658008658), Electricity_cost_share = c(18.05555556, 
    3.90625, 1.883239171, 3.947368421, 1.924001924), Raw_Material_cost_share = c(24.16666667, 
    32.29166667, 33.33333333, 60, 51.94805195), Labour_productivity = c(1.433165382, 
    1.624502304, 3.080154233, 3.392008925, 3.909230144), Capital_productivity = c(4.8, 
    22.1, 45.8, 21.9, 44.8), Power_productivity = c(0.24, 1.73, 
    1.68, 2.37, 2.35), Electricity_productivity = c(0.303469526, 
    1.39421497, 1.659846295, 1.440769899, 2.60017364), Raw.material.productivity = c(1.439189112, 
    1.151527229, 0.849086388, 0.759730866, 0.554210966), uniquezCorrect = c("Billingsfors - 123", 
    "Billingsfors - 124", "Långed - 125", "Långed - 126", "Långed - 127" 
    ), uniquezCorrectCG = c("Billingsfors - 123 - 11929", "Billingsfors - 124 - 11929", 
    "Långed - 125 - 11929", "Långed - 126 - 11929", "Långed - 127 - 11929" 
    )), .Names = c("company_code", "company_name", "year_cg_code", 
"plant_code", "plant_name", "plant_location", "plant_location_by_municipal", 
"year", "Output_value_1000_sek", "Labour_cost_1000_sek", "Capital_cost_1000_sek", 
"Electricity_cost_1000_sek", "Raw_material_cost_1000_sek", "Output_price_1_sek.ton", 
"Output_price__sek.ton", "Labour_price_sek.hour", "Capital_price_interest.rate", 
"Motive_Power_pricekr.MwH", "Electricity_price_kr.MwH", "Raw_Material_price_kr.m3", 
"Mean_raw.material_price", "Output_capacity_ton", "Ouput_ton", 
"Labour_input_1000_hour", "Capital_input_1000_sek", "Motive_Power_Mwh", 
"Electric_input_Mwh", "Rawmaterial_input_M3", "Capacity_Utilization", 
"Labour_cost_share", "Capital_cost_share", "Electricity_cost_share", 
"Raw_Material_cost_share", "Labour_productivity", "Capital_productivity", 
"Power_productivity", "Electricity_productivity", "Raw.material.productivity", 
"uniquezCorrect", "uniquezCorrectCG"), row.names = 6:10, class = "data.frame") 
+1

你能縮短你的文章嗎(只要'dput'就夠了,你不必粘貼輸出並顯示dput)?另外,你能否更好地解釋一下你解釋你需要的部分「然後我想負面匹配名字......」? – Arun 2013-04-23 14:33:11

+0

@阿倫完成。我想知道爲什麼手動方式有效,但不是'低速'方式。即爲什麼'lapply'仍然包含'list'中的名字,而手動方式正確地刪除它們。 – user1665355 2013-04-23 14:36:02

+0

@ user1665355你刪除了'dput'!這是前兩個代碼塊太大/沒有增加太多的問題 - 只是發佈相關位 - 這個問題目前很難讀,主要是因爲它太長了。 – 2013-04-23 14:39:06

回答

1

我會有點不同(不使用lapply在所有)做的。我會用stackhhvrs構建data.frame如下,第一:

my.df <- stack(hhvrs)[, c("ind"), drop = FALSE] 
names(my.df) <- c("year") 
my.df <- transform(my.df, uniquezCorrectCG = rownames(my.df)) 
rownames(my.df) <- NULL 

現在檢查那些yearuniquezCorrectCG存在於datamy.df條目。

data[!duplicated(rbind(my.df, data[, c("year", 
      "uniquezCorrectCG")]))[-seq_len(nrow(my.df))], ] 
+0

不錯,但我也想創建一個效率列表,上面有'e $ eff':)所以我在'lapply'結尾使用了 – user1665355 2013-04-23 14:58:03

+0

的lapply ..''return(e $ eff)' – user1665355 2013-04-23 15:00:05

+0

不,我的意思是你的'hhvrs'和'data'列中的'eff'? – Arun 2013-04-23 15:00:38