c中的字符串處理

我的程序的目標是查看字符串，並能夠取出對話框的問題和答案。c中的字符串處理

例如： ("do you like me?" ("yes" ("we're friends")) ("no" ("i hate you")) )

該計劃將拿出「你喜歡我嗎？」並會給你選擇是或否。一旦你選擇了相應的選擇，它會拋出「我們是朋友」或「我恨你。」

是否有任何圖書館或任何解決方案如何做到這一點？

來源

2013-03-12 dLiGHT

您嘗試了什麼？http://mattgemmell.com/2008/12/08/what-you-you-tried/ – kay 2013-03-12 20:04:38

凱，你有漂亮的網址！我會保存它:) – troyane 2013-03-12 20:06:28

@troyane（還有@Kay）作者爲域名開了個好頭：http://whathaveyoutried.com – iamnotmaynard 2013-03-12 20:09:45

糾正我，如果我錯了，但一個Lisp解析器會做得很好。：P嚴重的是，這看起來很像括號中的字符串列表或其他括號表達式。一個簡單的遞歸解析器就足夠了，只需要創建一個數據結構來創建適合您需求的解析樹。我不得不承認，在最晚的晚上10點到12點之間，正確的解析一個非常簡單的解析器並不是一件容易的事情，我不得不承認。

/* 
* lrparser.c 
* LR-parser 
* A recursive Lisp-subset parser 
* that has a misleading name (it's not an LALR, but a recursive descent one). 
* 
* Originally written to answer 
* http://stackoverflow.com/questions/15371008/string-processing-in-c/ 
* 
* Made in some *really* bored hours by Árpád Goreity (H2CO3) 
* on 12-03-2013 
* 
* Language: C99 (not sure if POSIX) 
*/ 

#include <stdlib.h> 
#include <ctype.h> 
#include <string.h> 
#include <stdio.h> 
#include <unistd.h> 
#include <assert.h> 
#include <stdarg.h> 
#include <stdbool.h> 

// AST node type 
enum { 
    NODE_STRING, 
    NODE_LIST 
}; 

// Permitted tokens 
enum { 
    TOKEN_INVALID = -1, 
    TOKEN_LPAREN = 0, 
    TOKEN_RPAREN, 
    TOKEN_STRING, 
    TOKEN_END 
}; 

// Useful for debugging and error reporting 
static const char *toknames[] = { 
    "Left paren", 
    "Right paren", 
    "String", 
    "End" 
}; 

// ...Or simply an AST node... 
struct ParseTree { 
    int type; // string or list 
    char *string; // if any 
    struct ParseTree **children; 
    size_t n_children; 
}; 

// Construct a node structure from a type and any necessary data 
static struct ParseTree *node_new(int type, ...) 
{ 
    va_list args; 
    va_start(args, type); 
    struct ParseTree *node = malloc(sizeof(*node)); 
    assert(node != NULL); 

    node->type = type; 
    if (type == NODE_STRING) { 
     /* If the node is a string, fill it 
     * (ownership transfer: the argument will be 
     * free()'d by the node_free() function) 
     */ 
     node->string = va_arg(args, char *); 
    } 

    node->children = NULL; 
    node->n_children = 0; 

    va_end(args); 

    return node; 
} 

void node_free(struct ParseTree *tree) 
{ 
    switch (tree->type) { 
    case NODE_STRING: 
     free(tree->string); 
     break; 
    case NODE_LIST: 
     for (int i = 0; i < tree->n_children; i++) { 
      node_free(tree->children[i]); 
     } 
     free(tree->children); 
     break; 
    default: 
     fprintf(stderr, "Warning: unknown node type %d\n", tree->type); 
     break; 
    } 

    free(tree); 
} 

// Sorry, the usual logarithmic storage expansion is omitted for clarity 
void node_add(struct ParseTree *parent, struct ParseTree *child) 
{ 
    assert(parent != NULL); 
    assert(child != NULL); 

    parent->n_children++; 
    parent->children = realloc(parent->children, sizeof(parent->children[0]) * parent->n_children); 
    // Lazy error checking: assert() instead of compare to NULL 
    assert(parent->children != NULL); 
    parent->children[parent->n_children - 1] = child; 
} 

// Just in order to break thread safety 
static const char *s = NULL; // the string to be parsed 
static char *curstr = NULL; // the contents of the string value of the current token 
static int curtok; // the current token 

// The tokenizer 
static int lex() 
{ 
    // Whitespace doesn't matter 
    while (isspace(s[0])) { 
     s++; 
    } 

    // end of string 
    if (s[0] == 0) { 
     return TOKEN_END; 
    } 

    // The followin four are obvious 
    if (s[0] == '(') { 
     s++; 
     return curtok = TOKEN_LPAREN; 
    } 

    if (s[0] == ')') { 
     s++; 
     return curtok = TOKEN_RPAREN; 
    } 

    if (s[0] == '"') { 
     const char *begin = s; 
     while (*++s != '"') 
      ; 

     size_t sz = s - begin - 2 + 1; 
     curstr = malloc(sz + 1); 
     memcpy(curstr, begin + 1, sz); 
     curstr[sz] = 0; 

     // skip trailing quotation mark (") 
     s++; 
     return curtok = TOKEN_STRING; 
    } 

    return curtok = TOKEN_INVALID; 
} 

void expect(int tok) 
{ 
    if (curtok != tok) { 
     fprintf(stderr, "Error: expected token %s, got %s\n", toknames[tok], toknames[curtok]); 
     abort(); 
    } 

    lex(); 
} 

// a. k. a. "parse()" 
// Simple recursive (one-level...) descent (root == list) approach 
static struct ParseTree *recurse_and_descend() 
{ 
    expect(TOKEN_LPAREN);  

    struct ParseTree *node = node_new(NODE_LIST); 

    struct ParseTree *child; 
    while (curtok != TOKEN_RPAREN) { 
     if (curtok == TOKEN_LPAREN) { 
      child = recurse_and_descend(); 
     } else if (curtok == TOKEN_STRING) { 
      child = node_new(NODE_STRING, curstr); 
      lex(); 
     } else { 
      fprintf(stderr, "Unexpected token '%s'\n", toknames[curtok]); 
      // lazy programmer's safety system, let the kernel do the dirty work 
      abort(); 
     } 
     node_add(node, child); 
    } 

    expect(TOKEN_RPAREN); 

    return node; 
} 

static struct ParseTree *parse(const char *str) 
{ 
    s = str; // poor man's initialization 
    lex(); // The first breath of fresh token makes the baby's heart beat 
    return recurse_and_descend(); // Let's do the Harlem shake! 
} 

// petite helper function 
static void dump_indent(int indent) 
{ 
    for (int i = 0; i < indent; i++) { 
     printf("\t"); 
    } 
} 

// Because 0x7f502a00 is not very meaningful for the human eye 
static void dump_tree(struct ParseTree *tree, int indent) 
{ 
    dump_indent(indent); 

    switch (tree->type) { 
    case NODE_STRING: 
     printf("<String \"%s\">\n", tree->string); 
     break; 
    case NODE_LIST: 
     printf("<List>\n"); 
     for (int i = 0; i < tree->n_children; i++) { 
      dump_tree(tree->children[i], indent + 1); 
     } 
     break; 
    default: 
     printf("Unknown node\n"); 
     break; 
    } 
} 

int main(int argc, char *argv[]) 
{ 
    struct ParseTree *tree = parse(argv[1]); 
    dump_tree(tree, 0); 
    node_free(tree); 

    return 0; 
}

用法：

h2co3-macbook:~ h2co3$ ./lrparser "(\"do you like me?\" (\"yes\" (\"we're friends\")) (\"no\" (\"i hate you\" \"me too\")))" 
<List> 
    <String "do you like me?"> 
    <List> 
     <String "yes"> 
     <List> 
      <String "we're friends"> 
    <List> 
     <String "no"> 
     <List> 
      <String "i hate you"> 
      <String "me too">

來源

2013-03-12 20:06:03

@EdS。謝謝：D但是，嚴重的是，這看起來不像Common Lisp的一個真正的子集？ – 2013-03-12 20:10:27

它的確如此。 – 2013-03-12 20:11:04

你是對的，但是爲Lisp解決C中的一個問題是一種美麗，優雅，實用的元範圍蠕變。 – 2013-03-12 20:28:48

如果你想要「的東西，作品」，但並不強勁，很多技巧的工作。如果你真的想要它的工作，你需要研究一下LALR(1) parsers，然後決定這是否足夠簡單來推出你自己的解析器（它是）或者如果你想使用YACC之類的東西。

的上下文無關文法這似乎看起來像

QUESTION => '(' '"' TEXT '"' RESPONSES ')' 
RESPONSES => null | RESPONSE RESPONSES 
RESPONSE => '(' '"' TEXT '(' '"' TEXT '"' ')' ')' 
TEXT => all characters except '(' '"' ')'

然後你分析這可能會導致在處理改變上述語言的組合。基本上RESPONSES可以歸結爲什麼也不是以'（'開頭，這意味着在處理的這一點上，你可以通過查看是否需要解析新的RESPONSE或者結束問題來區分是不是但是被解析的字符）是'（'或'）'

模式內的解析非常簡單，如果字符是固定的，只需檢查它是否符合預期，並提前解析元素的索引。字符不是固定的（就像在文本中）使用例程檢查它是否是邊界，並且任何超出預期的應該將解析器置於錯誤狀態。

來源

2013-03-12 20:27:22

因爲這當然是「做的方法」，所以我認爲它幾乎不會幫助C初學者理解字符串解析的世界：/ – 2013-03-12 20:36:48

@DR那麼，我們都必須從某個地方開始。最簡單的答案已經覆蓋了可行的黑客，但沒有提到基本要素。 – 2013-03-12 20:40:53

c中的字符串處理

回答

相關問題