2016-11-27 60 views
-1

我手上有任務將文件轉換爲「.chs」擴展到CSV在R.由於谷歌是沒有太大的幫助。或者請告訴我是否有直接在R中調用「.chs」文件?CHS轉換爲csv中的R

+0

如果你能提供一個示例文件,這將是一件好事。如果用'* .chs'文件表示一個[Photoshop文件](http://www.openthefile.net/extension/csh),你可以參考[這個討論](https://www.ps-scripts.com /viewtopic.php?f=53&t=3795&sid=6dc6ee6d1570ed27e56fc8df368cba4a),可能更容易閱讀其一個[二進制文件(http://www.ats.ucla.edu/stat/r/faq/read_binary.htm)。另外,請考慮參考[本討論](http://stackoverflow.com/q/5963269/1655567),並使您的文章可重現。將Photoshop文件讀取到R不是常見任務;我認爲你的文件可能是別的東西(?) – Konrad

回答

0

假設這是一個ASCII bufkit整體模型(例如http://weather.uncc.edu/data/bufkit/ascii/2016021917_hrrr.prof.chs):

library(purrr) 

URL <- "http://weather.uncc.edu/data/bufkit/ascii/2016021917_hrrr.prof.chs" 
fil <- basename(URL) 
if (!file.exists(fil)) download.file(URL, fil) 

l <- readLines(fil) 
starts <- which(grepl("^STATION", l)) 
ends <- which(grepl("^\\*\\*\\*", l)) 
ends <- ends[seq(2, length(ends), 2)] 

map2_df(starts, ends, function(start, end) { 

    dat <- l[start:end] 

    dat_se <- which(grepl("^\\*\\*\\*", dat)) + c(2, -2) 

    tab <- dat[dat_se[1]:dat_se[2]] 

    df <- read.table(text=tab, header=TRUE, stringsAsFactors=FALSE) 

    scan(text=dat[1], what=character(), quiet=TRUE)[3:5] %>% 
    setNames(c("lat", "lon", "station")) %>% 
    as.list() -> st 

    df$lat <- st$lat 
    df$lon <- st$lon 
    df$station <- st$station 

    scan(text=dat[2], what=character(), quiet=TRUE) %>% 
    setNames(c("timestamp", "X1")) %>% 
    as.list() -> ts 

    sprintf("20%s-%s-%s %s:%s", 
      substr(ts$timestamp, 1, 2), 
      substr(ts$timestamp, 3, 4), 
      substr(ts$timestamp, 5, 6), 
      substr(ts$timestamp, 7, 8), 
      substr(ts$timestamp, 9, 10)) %>% 
    as.POSIXct() -> ts$timestamp 

    df$timestamp <- ts$timestamp 
    df$X1 <- ts$X1 

    scan(text=dat[4], what=character(), quiet=TRUE) %>% 
    tail(1) %>% 
    gsub("PROJECTION=", "", .) -> df$projection 

    stri_match_all_regex(dat[length(dat)-1], "([[:digit:]\\.]+)")[[1]][,2] %>% 
    as.numeric() %>% 
    setNames(c("prcp_hr", "prcp_tot", "sfc_pres")) %>% 
    as.list() -> prcp 

    df$prcp_hr <- prcp$prcp_hr 
    df$prcp_tot <- prcp$prcp_tot 
    df$sf_pres <- prcp$sfc_pres 

    setNames(df, tolower(colnames(df))) 

}) %>% dplyr::glimpse() 
## Observations: 800 
## Variables: 17 
## $ lyr  <int> 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 3... 
## $ temp  <dbl> -56.3, -57.8, -59.3, -61.3, -62.9, -64.6, -67.7, ... 
## $ depr  <dbl> 30.3, 27.6, 25.1, 22.1, 99.0, 99.0, 99.0, 99.0, 9... 
## $ kts  <dbl> 7.2, 9.0, 7.6, 5.7, 6.0, 8.1, 15.4, 27.5, 36.9, 4... 
## $ dir  <dbl> 88, 62, 23, 329, 291, 303, 313, 305, 289, 293, 29... 
## $ pres  <dbl> 22, 27, 33, 38, 44, 51, 58, 65, 73, 82, 93, 106, ... 
## $ rh   <dbl> 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 18,... 
## $ omeg  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 
## $ lat  <chr> "32.90N", "32.90N", "32.90N", "32.90N", "32.90N",... 
## $ lon  <chr> "80.03W", "80.03W", "80.03W", "80.03W", "80.03W",... 
## $ station <chr> "CHS", "CHS", "CHS", "CHS", "CHS", "CHS", "CHS", ... 
## $ timestamp <dttm> 2016-02-19 17:00:00, 2016-02-19 17:00:00, 2016-0... 
## $ x1   <chr> "722080", "722080", "722080", "722080", "722080",... 
## $ projection <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",... 
## $ prcp_hr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 
## $ prcp_tot <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0... 
## $ sf_pres <dbl> 1033, 1033, 1033, 1033, 1033, 1033, 1033, 1033, 1... 

說明類可用,如果這是所述文件類型(不會花週期上說明性如果這不是正確的數據) 。