2017-08-02 112 views
-3

如何使用R. click to view the spreadsheet從電子表格(其中也包含垃圾數據)提取表格數據。使用r提取Excel表格中的表格數據

我可以在R-

xcelfile<-read.xlsx("LT257-Refuel 3 March2017.xlsx",sheetIndex=1,startRow=9,colIndex=c(1,2,3))) 

寫這一點,但希望有一個更廣義的方式,將任何這樣的文件工作,而實際上提的起始行從閱讀。

回答

0

根據您的屏幕截圖,看起來您的文件頂部沒有比實際數據列更靠右的任何數據。如果這種假設是真的,你可以使用下面的代碼來動態設置的startRow值:

min(which(complete.cases(read_xlsx("LT257-Refuel3March2017.xlsx")))) 

爲什麼它的工作原理

test <- read_xlsx("test.xlsx") 
# See the bottom of this answer for the dput output from test 

cc <- complete.cases(test) 
# Create a vector showing where there's missing data 

cc <- which(complete.cases(test)) 
# Get a vector showing where there are TRUE values 

cc <- min(cc) 
# Find the earliest row that has a value in each column 

實例數據

structure(
    list(
    `No copyright 2017` = c(
     "Made by Andrew Breza", 
     NA, 
     NA, 
     "a", 
     "0.20163499083126724", 
     "0.22825468034535024", 
     "5.0982213542989485E-2", 
     "0.10281072980496486", 
     "0.64909268750884885", 
     "0.70355689088752538", 
     "0.86041365822441651", 
     "0.17703894784688834", 
     "0.17436477104724901", 
     "7.8210087926938621E-2", 
     "0.62840230879652859", 
     "0.47390519345520465", 
     "0.17709432355627197", 
     "0.21680508964340539", 
     "0.42013016672732617", 
     "0.27558526667241368", 
     "0.73421400056622432", 
     "0.3008573777830944" 
    ), 
    X__1 = c(
     NA, 
     NA, 
     NA, 
     "b", 
     "0.10530273672328228", 
     "0.57362069130780791", 
     "8.6190788221684689E-2", 
     "0.93970062859291503", 
     "0.98313592372911485", 
     "0.69934659461539217", 
     "0.67775289068563083", 
     "0.18487308246868062", 
     "0.13727414072992095", 
     "0.97886937472912772", 
     "0.33494682878761595", 
     "0.38691583143160924", 
     "0.22538745867671395", 
     "0.78993890542175837", 
     "0.54424684747789254", 
     "0.6211545954458173", 
     "0.52315172220522255", 
     "0.9249434890424244" 
    ), 
    X__2 = c(
     NA, 
     NA, 
     NA, 
     "c", 
     "1.798394389715019E-2", 
     "0.48852685697950471", 
     "0.511135945041712", 
     "0.15620166014476378", 
     "0.18400930969357332", 
     "0.56153224896656539", 
     "0.20932061985098849", 
     "0.73934609434573473", 
     "0.6563390016659193", 
     "0.44542823830895195", 
     "0.40431302050322948", 
     "0.69817110559609552", 
     "0.22619605667502185", 
     "0.31567993543368122", 
     "0.66165692952282684", 
     "0.49190019375595861", 
     "0.85287531618000434", 
     "0.95659649277422809" 
    ), 
    X__3 = c(
     NA, 
     NA, 
     NA, 
     "d", 
     "0.52978180616887405", 
     "0.26248657154304644", 
     "0.92492633566696458", 
     "0.28845813234934969", 
     "0.45675400607977801", 
     "9.6399518093580605E-2", 
     "0.70848994854955472", 
     "0.47467197462769017", 
     "0.54266110899628384", 
     "0.15242767673773072", 
     "0.62962185530884729", 
     "0.79948863441696005", 
     "0.97921292674380367", 
     "0.95155896584036115", 
     "0.61046228678401782", 
     "0.36630750428703296", 
     "0.92034619245493643", 
     "0.30251636944016935" 
    ), 
    X__4 = c(
     NA, 
     NA, 
     NA, 
     "e", 
     "0.44520622146439093", 
     "0.25763202918954686", 
     "0.57157196576664482", 
     "0.13855752627816653", 
     "0.36486076569906845", 
     "0.59198411405761164", 
     "0.11101644469753646", 
     "0.44819906683171207", 
     "0.84090939808845988", 
     "0.35965505274703058", 
     "0.84780342228842287", 
     "0.16713792608147138", 
     "7.9855497761844196E-2", 
     "0.59425596116302071", 
     "0.20810432506625398", 
     "0.50403916185284026", 
     "0.349651850034845", 
     "0.67659808045569292" 
    ), 
    X__5 = c(
     NA, 
     NA, 
     NA, 
     "f", 
     "9.8930955361685835E-2", 
     "0.96547240641836818", 
     "0.46440533344650992", 
     "0.74076741685833791", 
     "0.30181122840373564", 
     "0.56538643074614503", 
     "0.56788732925203778", 
     "0.56375788819366213", 
     "0.43462600598328394", 
     "0.14261157268114866", 
     "0.62537286750674947", 
     "0.7437112765392242", 
     "0.71769176920920108", 
     "0.55005462908684066", 
     "0.80882942650644041", 
     "0.87673165877980208", 
     "0.25589697540607192", 
     "7.8168903385850808E-2" 
    ), 
    X__6 = c(
     NA, 
     NA, 
     NA, 
     "g", 
     "0.26582177818716768", 
     "0.38097648784489491", 
     "0.68959666363898542", 
     "0.3320245351431016", 
     "0.88082273061309824", 
     "0.16884962926524261", 
     "0.69780989348413147", 
     "0.10538108497255883", 
     "0.201521157744733", 
     "0.11884857868727472", 
     "0.23764189907094002", 
     "0.16247009289992365", 
     "0.50209884800617577", 
     "0.72617734144415036", 
     "0.28696520971352502", 
     "0.31238776292429182", 
     "0.98877064276115301", 
     "0.11280008220991433" 
    ), 
    X__7 = c(
     NA, 
     "Yup here", 
     NA, 
     "h", 
     "0.14841467723151391", 
     "3.9819114921856968E-2", 
     "0.56837064320267927", 
     "0.85395489379904677", 
     "0.69661506623045577", 
     "0.98474764955059735", 
     "2.5098690275612845E-2", 
     "0.94616226982864171", 
     "0.80519284764792409", 
     "0.89294962679085987", 
     "0.1608941340883776", 
     "0.89888281402638381", 
     "0.98458500475763688", 
     "0.11892983339802443", 
     "0.16990338725067256", 
     "0.89301913292795732", 
     "0.48977127816954613", 
     "0.481585804429011" 
    ) 
), 
    .Names = c(
    "No copyright 2017", 
    "X__1", 
    "X__2", 
    "X__3", 
    "X__4", 
    "X__5", 
    "X__6", 
    "X__7" 
), 
    class = c("tbl_df", 
      "tbl", "data.frame"), 
    row.names = c(NA,-22L) 
) 
0

試試這個:

xcelfile<-read.delim("LT257-Refuel3March2017.xlsx") 

,並嘗試寫你的.xlsx文件的名稱之間沒有空格。

相關問題