2012-04-03 160 views
1

例如:如何將字符串拆分爲字符串數組?

輸入(字符串):foo $$ foo ## foo []

搜索(字符串):foo

輸出(數組):$$ ,## ,[]

我嘗試了:

char * str = "foo $$ foo ## foo []"; 
    char * s = "foo"; 

    int buf_len = 0; 
    int len = strlen(s); 
    int i = 0; 

    char ** buffer = malloc(MAX_BUFFER_SIZE); 
    char * tmpbuf = malloc(MAX_BUFFER_SIZE); 
    char * p = str; 
    char ** buf = buffer; 
    char * tbuf = tmpbuf; 

    while(*p) 
    { 
     if(*p == *s) 
     { 
      while(*p == *(s + i)) 
      { 
       i++; 
       p++; 
      } 

      if(i == len) 
      { 
       *buf ++ = tbuf; 
       memset(tbuf,0,buf_len); 
       i = buf_len = 0; 
      } 
     } 
     else 
     { 
      *tbuf ++= *p; 
      buf_len ++; 
     } 

     p++; 
    } 

    *buf ++= NULL; 

    int x; 
    for(x = 0; buffer[x]; x++) 
    { 
     printf("%s\n", buffer[x]); 
    } 

    free(buffer); 
    free(tmpbuf); 

顯示以下輸出:

$$ ## [] 
## [] 
[] 

,但預期是:

$$ 
## 
[] 

如何解決這一問題?

+0

why -1?可以解釋一下嗎? – Jack 2012-04-03 20:20:55

+0

可能的重複[在C#中拆分字符串](http://stackoverflow.com/questions/4545621/split-string-at-in-c) – 2012-04-03 22:19:37

+0

@BoPersson:不。這不是'char'問題。我想分割'string'。 – Jack 2012-04-04 00:39:40

回答

3

那是因爲你沒有當你說的tbuf內容複製到buf

*buf ++ = tbuf; 

你做什麼是參考保存到當前位置tbuf(或tmpbuf如果你喜歡)。

tmpbuf得到除了分隔符以外的所有東西。

這多少有點像,在循環的末尾:

 <- offset 
tmpbuf = "$$ ## []" 

buf[0] = tmpbuf+0; 
buf[1] = tmpbuf+3; 
buf[2] = tmpbuf+6; 

或者非常簡化存儲表:

 memory 
     address  value 
tmpbuf -> 0x01  [ $] <- buffer[0] points here 
      0x02  [ $] 
      0x03  [ ] 
      0x04  [ #] <- buffer[1] points here 
      0x05  [ #] 
      0x06  [ ] 
      0x07  [ [] <- buffer[2] points here 
      0x08  [ ]] 
      0x09  [ ] 
      ... 
buffer -> 0x3A  [0x01] 
      0x3B  [0x04] 
      0x3C  [0x07] 
      0x3D  [ ] 
      0x3E  [ ] 
      ... 

編輯

對於它的phun ;一個指針,動態的方式,不使用strstr()

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 

int is_needle(char *hay, char *needle) 
{ 
    while (*hay && *++hay == *++needle); 
    return *needle == '\0'; 
} 

char *find(char *hay, char *needle) 
{ 
    while (*hay) { 
     if (*hay == *needle && is_needle(hay, needle)) 
      return hay; 
     ++hay; 
    } 
    return hay; 
} 

int pushstr(char ***vs, size_t *vslen, char *val, size_t slen) 
{ 
    char **vsp = *vs + *vslen; 

    if ((*vsp = realloc(*(*vs + *vslen), slen + 1)) == NULL) { 
     perror("pushstr.1"); exit(1); 
    } 

    memcpy(*vsp, val, slen); 
    *(*vsp + slen) = '\0'; 

    if ((*vs = realloc(*vs, sizeof(char*) * (++*vslen + 1))) == NULL) { 
     perror("pushstr.2"); exit(1); 
    } 
    *(*vs + *vslen) = NULL; 

    return *vslen; 
} 

int main(void) 
{ 
    char *hay = "foo $$ foo ## foo [] fox @@ foo ??"; 
    char *needle = "foo"; 
    char *np; 
    char **vs; 
    size_t vslen = 0; 
    size_t nlen = strlen(needle); 

    if ((vs = malloc(sizeof(char*))) == NULL) { 
     perror("main"); 
     return 1; 
    } 
    *vs = NULL; 

    while (*(np = find(hay, needle))) { 
     if (np != hay) { 
      pushstr(&vs, &vslen, hay, np - hay); 
      hay = np + nlen; 
     } else { 
      hay += nlen; 
     } 
    } 
    if (np != hay) 
     pushstr(&vs, &vslen, hay, np - hay); 

    while (*vs) 
     printf("V: '%s'\n", *vs++); 
    vs -= vslen; 

    while (*vs) 
     free(*vs++); 
    vs -= vslen; 
    free(vs); 

    return 0; 
} 
0

strtok功能是專爲這個任務:

#include <string.h> 
... 
char *token; 
char *line = "LINE TO BE SEPARATED"; 
char *search = " "; 


/* Token will point to "LINE". */ 
token = strtok(line, search); 


/* Token will point to "TO". */ 
token = strtok(NULL, search); 
+0

這不起作用,因爲'strtok'的分隔符字符串將其每個單獨的字符視爲有效的分隔符。使用''foo''作爲'strtok'分隔符字符串會將''f $$ f ## f []「'這樣的輸入拆分爲三個令牌,這不是OP想要的。 – 2012-04-03 20:35:02

+0

謝謝,我沒有注意到他的多字符分隔符。儘管如此,使用其他標準函數,比如'strstr'和'strcpy',將會使任務變得更加清晰。 – 2012-04-03 20:46:01

+0

@KarlBielefeldt:你能告訴我一個簡單的例子嗎? – Jack 2012-04-04 00:44:14

1

這是strstr()的任務。我改了一下你的代碼來利用它。

int add_to_buf(char *str, size_t len, char ***buf) 
{ 
    if (len <= 0) return 0; 
    **buf = malloc (len); 
    strncpy (**buf, str, len); 
    ++*buf; 
    return 1; 
} 

int main() 
{ 
    char *str = "foo $$ foo ## foo []"; 
    char *s = "foo"; 

    char **buffer = malloc (MAX_BUFFER_SIZE*sizeof(*buffer)), **buf = buffer; 
    char *start, *end; 

    int s_len = strlen (s); 

    start = str; 
    end = strstr (str, s); 
    while (end) { 
    add_to_buf (start, end-start, &buf); 
    start = end + s_len; 
    end = strstr (start, s); 
    } 
    add_to_buf (start, strlen (str) - (start-str), &buf); 
    *buf = 0; 

    for (buf = buffer; *buf; ++buf) 
     printf ("%s\n", *buf); 

    free (buffer); 
    return 0; 
} 
1

您正在使用太多的指針一個簡單的程序和你用他們的方式讓人很難理解。我看到的一個直截了當的錯誤是你正在使用buffer**(字符串數組),但你只分配一個字符串。你是這個這個字符串數組來存儲令牌,它會在某處執行一些內存違規。

由於您要打印令牌,因此不需要將它們存儲在單獨的數組中。這樣做:

#include<stdio.h> 
#include<string.h> 

int main(int ac, char*argv[]) { 
char str[] = "foo $$ foo ## foo []"; 
char * s = "foo"; 
char *p; 

p = strtok(str, " "); // tokenize 

while(p!=NULL) 
{ 
if(strcmp(p, s)) //print non matching tokens 
printf("%s\n", p); 
p = strtok(NULL, " "); 
} 

return 0; 
} 

請注意,這裏的分隔符是空格,這使得它更容易在這裏。

5

下面是字符串分割爲字符串數組的函數:

#include <assert.h> 
#include <string.h> 

/* 
* Split a string by a delimiter. 
* 
* This function writes the beginning of each item to @pointers_out 
* (forming an array of C strings), and writes the actual string bytes 
* to @bytes_out. Both buffers are assumed to be big enough for all of the 
* strings. 
* 
* Returns the number of strings written to @pointers_out. 
*/ 
size_t explode(const char *delim, const char *str, 
       char **pointers_out, char *bytes_out) 
{ 
    size_t delim_length  = strlen(delim); 
    char **pointers_out_start = pointers_out; 

    assert(delim_length > 0); 

    for (;;) { 
     /* Find the next occurrence of the item delimiter. */ 
     const char *delim_pos = strstr(str, delim); 

     /* 
     * Emit the current output buffer position, since that is where the 
     * next item will be written. 
     */ 
     *pointers_out++ = bytes_out; 

     if (delim_pos == NULL) { 
      /* 
      * No more item delimiters left. Treat the rest of the input 
      * string as the last item. 
      */ 
      strcpy(bytes_out, str); 
      return pointers_out - pointers_out_start; 
     } else { 
      /* 
      * Item delimiter found. The bytes leading up to it form the next 
      * string. 
      */ 
      while (str < delim_pos) 
       *bytes_out++ = *str++; 

      /* Don't forget the NUL terminator. */ 
      *bytes_out++ = '\0'; 

      /* Skip over the delimiter. */ 
      str += delim_length; 
     } 
    } 
} 

用法:

#include <stdio.h> 
/* ... */ 

#define BIG_ENOUGH 1000 

int main(void) 
{ 
    char *items[BIG_ENOUGH]; 
    char  item_bytes[BIG_ENOUGH]; 
    size_t i; 
    size_t count; 

    count = explode("foo", "foo $$ foo ## foo []", items, item_bytes); 

    for (i = 0; i < count; i++) 
     printf("\"%s\"\n", items[i]); 

    return 0; 
} 

輸出:

"" 
" $$ " 
" ## " 
" []" 

這不會產生精確的輸出你問,因爲我不知道你想如何處理周圍的空間和項目分隔符的出現(在y中我們的例子,"foo")在字符串的開頭。相反,我模仿PHP的explode函數。


我想指出我的explode函數在內存管理上如何。調用者必須確保緩衝區足夠大。對於一個快速的腳本來說這很好,但是在一個更嚴重的程序中可能會令人討厭,在這個程序中,你必須做一些數學運算才能正確使用這個函數。我本可以編寫一個更「健壯」的執行方式來執行自己的分配,但是:

  • 這會混亂執行。

  • 它沒有給調用者使用他們自己的內存分配器的選項。

所以實施explode我做的方式是「壞」,因爲它是難以正確使用,更糟的是,易於使用不當。另一方面,它將功能和內存管理的關注區分開來是「好的」。