我的程序的目標是查看字符串,並能夠取出對話框的問題和答案。c中的字符串處理
例如: ("do you like me?" ("yes" ("we're friends")) ("no" ("i hate you")) )
該計劃將拿出 「你喜歡我嗎?」並會給你選擇是或否。一旦你選擇了相應的選擇,它會拋出「我們是朋友」或「我恨你。」
是否有任何圖書館或任何解決方案如何做到這一點?
我的程序的目標是查看字符串,並能夠取出對話框的問題和答案。c中的字符串處理
例如: ("do you like me?" ("yes" ("we're friends")) ("no" ("i hate you")) )
該計劃將拿出 「你喜歡我嗎?」並會給你選擇是或否。一旦你選擇了相應的選擇,它會拋出「我們是朋友」或「我恨你。」
是否有任何圖書館或任何解決方案如何做到這一點?
糾正我,如果我錯了,但一個Lisp解析器會做得很好。 :P嚴重的是,這看起來很像括號中的字符串列表或其他括號表達式。一個簡單的遞歸解析器就足夠了,只需要創建一個數據結構來創建適合您需求的解析樹。我不得不承認,在最晚的晚上10點到12點之間,正確的解析一個非常簡單的解析器並不是一件容易的事情,我不得不承認。
/*
* lrparser.c
* LR-parser
* A recursive Lisp-subset parser
* that has a misleading name (it's not an LALR, but a recursive descent one).
*
* Originally written to answer
* http://stackoverflow.com/questions/15371008/string-processing-in-c/
*
* Made in some *really* bored hours by Árpád Goreity (H2CO3)
* on 12-03-2013
*
* Language: C99 (not sure if POSIX)
*/
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
// AST node type
enum {
NODE_STRING,
NODE_LIST
};
// Permitted tokens
enum {
TOKEN_INVALID = -1,
TOKEN_LPAREN = 0,
TOKEN_RPAREN,
TOKEN_STRING,
TOKEN_END
};
// Useful for debugging and error reporting
static const char *toknames[] = {
"Left paren",
"Right paren",
"String",
"End"
};
// ...Or simply an AST node...
struct ParseTree {
int type; // string or list
char *string; // if any
struct ParseTree **children;
size_t n_children;
};
// Construct a node structure from a type and any necessary data
static struct ParseTree *node_new(int type, ...)
{
va_list args;
va_start(args, type);
struct ParseTree *node = malloc(sizeof(*node));
assert(node != NULL);
node->type = type;
if (type == NODE_STRING) {
/* If the node is a string, fill it
* (ownership transfer: the argument will be
* free()'d by the node_free() function)
*/
node->string = va_arg(args, char *);
}
node->children = NULL;
node->n_children = 0;
va_end(args);
return node;
}
void node_free(struct ParseTree *tree)
{
switch (tree->type) {
case NODE_STRING:
free(tree->string);
break;
case NODE_LIST:
for (int i = 0; i < tree->n_children; i++) {
node_free(tree->children[i]);
}
free(tree->children);
break;
default:
fprintf(stderr, "Warning: unknown node type %d\n", tree->type);
break;
}
free(tree);
}
// Sorry, the usual logarithmic storage expansion is omitted for clarity
void node_add(struct ParseTree *parent, struct ParseTree *child)
{
assert(parent != NULL);
assert(child != NULL);
parent->n_children++;
parent->children = realloc(parent->children, sizeof(parent->children[0]) * parent->n_children);
// Lazy error checking: assert() instead of compare to NULL
assert(parent->children != NULL);
parent->children[parent->n_children - 1] = child;
}
// Just in order to break thread safety
static const char *s = NULL; // the string to be parsed
static char *curstr = NULL; // the contents of the string value of the current token
static int curtok; // the current token
// The tokenizer
static int lex()
{
// Whitespace doesn't matter
while (isspace(s[0])) {
s++;
}
// end of string
if (s[0] == 0) {
return TOKEN_END;
}
// The followin four are obvious
if (s[0] == '(') {
s++;
return curtok = TOKEN_LPAREN;
}
if (s[0] == ')') {
s++;
return curtok = TOKEN_RPAREN;
}
if (s[0] == '"') {
const char *begin = s;
while (*++s != '"')
;
size_t sz = s - begin - 2 + 1;
curstr = malloc(sz + 1);
memcpy(curstr, begin + 1, sz);
curstr[sz] = 0;
// skip trailing quotation mark (")
s++;
return curtok = TOKEN_STRING;
}
return curtok = TOKEN_INVALID;
}
void expect(int tok)
{
if (curtok != tok) {
fprintf(stderr, "Error: expected token %s, got %s\n", toknames[tok], toknames[curtok]);
abort();
}
lex();
}
// a. k. a. "parse()"
// Simple recursive (one-level...) descent (root == list) approach
static struct ParseTree *recurse_and_descend()
{
expect(TOKEN_LPAREN);
struct ParseTree *node = node_new(NODE_LIST);
struct ParseTree *child;
while (curtok != TOKEN_RPAREN) {
if (curtok == TOKEN_LPAREN) {
child = recurse_and_descend();
} else if (curtok == TOKEN_STRING) {
child = node_new(NODE_STRING, curstr);
lex();
} else {
fprintf(stderr, "Unexpected token '%s'\n", toknames[curtok]);
// lazy programmer's safety system, let the kernel do the dirty work
abort();
}
node_add(node, child);
}
expect(TOKEN_RPAREN);
return node;
}
static struct ParseTree *parse(const char *str)
{
s = str; // poor man's initialization
lex(); // The first breath of fresh token makes the baby's heart beat
return recurse_and_descend(); // Let's do the Harlem shake!
}
// petite helper function
static void dump_indent(int indent)
{
for (int i = 0; i < indent; i++) {
printf("\t");
}
}
// Because 0x7f502a00 is not very meaningful for the human eye
static void dump_tree(struct ParseTree *tree, int indent)
{
dump_indent(indent);
switch (tree->type) {
case NODE_STRING:
printf("<String \"%s\">\n", tree->string);
break;
case NODE_LIST:
printf("<List>\n");
for (int i = 0; i < tree->n_children; i++) {
dump_tree(tree->children[i], indent + 1);
}
break;
default:
printf("Unknown node\n");
break;
}
}
int main(int argc, char *argv[])
{
struct ParseTree *tree = parse(argv[1]);
dump_tree(tree, 0);
node_free(tree);
return 0;
}
用法:
h2co3-macbook:~ h2co3$ ./lrparser "(\"do you like me?\" (\"yes\" (\"we're friends\")) (\"no\" (\"i hate you\" \"me too\")))"
<List>
<String "do you like me?">
<List>
<String "yes">
<List>
<String "we're friends">
<List>
<String "no">
<List>
<String "i hate you">
<String "me too">
@EdS。謝謝:D但是,嚴重的是,這看起來不像Common Lisp的一個真正的子集? – 2013-03-12 20:10:27
它的確如此。 – 2013-03-12 20:11:04
你是對的,但是爲Lisp解決C中的一個問題是一種美麗,優雅,實用的元範圍蠕變。 – 2013-03-12 20:28:48
如果你想要 「的東西,作品」,但並不強勁,很多技巧的工作。如果你真的想要它的工作,你需要研究一下LALR(1) parsers,然後決定這是否足夠簡單來推出你自己的解析器(它是)或者如果你想使用YACC之類的東西。
的上下文無關文法這似乎看起來像
QUESTION => '(' '"' TEXT '"' RESPONSES ')'
RESPONSES => null | RESPONSE RESPONSES
RESPONSE => '(' '"' TEXT '(' '"' TEXT '"' ')' ')'
TEXT => all characters except '(' '"' ')'
然後你分析這可能會導致在處理改變上述語言的組合。基本上RESPONSES可以歸結爲什麼也不是以'('開頭,這意味着在處理的這一點上,你可以通過查看是否需要解析新的RESPONSE或者結束問題來區分是不是但是被解析的字符)是'('或')'
模式內的解析非常簡單,如果字符是固定的,只需檢查它是否符合預期,並提前解析元素的索引。字符不是固定的(就像在文本中)使用例程檢查它是否是邊界,並且任何超出預期的應該將解析器置於錯誤狀態。
因爲這當然是「做的方法」,所以我認爲它幾乎不會幫助C初學者理解字符串解析的世界:/ – 2013-03-12 20:36:48
@DR那麼,我們都必須從某個地方開始。最簡單的答案已經覆蓋了可行的黑客,但沒有提到基本要素。 – 2013-03-12 20:40:53
您嘗試了什麼?http://mattgemmell.com/2008/12/08/what-you-you-tried/ – kay 2013-03-12 20:04:38
凱,你有漂亮的網址!我會保存它:) – troyane 2013-03-12 20:06:28
@troyane(還有@Kay)作者爲域名開了個好頭:http://whathaveyoutried.com – iamnotmaynard 2013-03-12 20:09:45