i3
|
00001 /* 00002 * vim:ts=4:sw=4:expandtab 00003 * 00004 * i3 - an improved dynamic tiling window manager 00005 * © 2009-2012 Michael Stapelberg and contributors (see also: LICENSE) 00006 * 00007 * commands_parser.c: hand-written parser to parse commands (commands are what 00008 * you bind on keys and what you can send to i3 using the IPC interface, like 00009 * 'move left' or 'workspace 4'). 00010 * 00011 * We use a hand-written parser instead of lex/yacc because our commands are 00012 * easy for humans, not for computers. Thus, it’s quite hard to specify a 00013 * context-free grammar for the commands. A PEG grammar would be easier, but 00014 * there’s downsides to every PEG parser generator I have come accross so far. 00015 * 00016 * This parser is basically a state machine which looks for literals or strings 00017 * and can push either on a stack. After identifying a literal or string, it 00018 * will either transition to the current state, to a different state, or call a 00019 * function (like cmd_move()). 00020 * 00021 * Special care has been taken that error messages are useful and the code is 00022 * well testable (when compiled with -DTEST_PARSER it will output to stdout 00023 * instead of actually calling any function). 00024 * 00025 */ 00026 #include <stdio.h> 00027 #include <stdlib.h> 00028 #include <string.h> 00029 #include <unistd.h> 00030 #include <stdbool.h> 00031 #include <stdint.h> 00032 00033 #include "all.h" 00034 00035 /******************************************************************************* 00036 * The data structures used for parsing. Essentially the current state and a 00037 * list of tokens for that state. 00038 * 00039 * The GENERATED_* files are generated by generate-commands-parser.pl with the 00040 * input parser-specs/commands.spec. 00041 ******************************************************************************/ 00042 00043 #include "GENERATED_enums.h" 00044 00045 typedef struct token { 00046 char *name; 00047 char *identifier; 00048 /* This might be __CALL */ 00049 cmdp_state next_state; 00050 union { 00051 uint16_t call_identifier; 00052 } extra; 00053 } cmdp_token; 00054 00055 typedef struct tokenptr { 00056 cmdp_token *array; 00057 int n; 00058 } cmdp_token_ptr; 00059 00060 #include "GENERATED_tokens.h" 00061 00062 /******************************************************************************* 00063 * The (small) stack where identified literals are stored during the parsing 00064 * of a single command (like $workspace). 00065 ******************************************************************************/ 00066 00067 struct stack_entry { 00068 /* Just a pointer, not dynamically allocated. */ 00069 const char *identifier; 00070 char *str; 00071 }; 00072 00073 /* 10 entries should be enough for everybody. */ 00074 static struct stack_entry stack[10]; 00075 00076 /* 00077 * Pushes a string (identified by 'identifier') on the stack. We simply use a 00078 * single array, since the number of entries we have to store is very small. 00079 * 00080 */ 00081 static void push_string(const char *identifier, char *str) { 00082 for (int c = 0; c < 10; c++) { 00083 if (stack[c].identifier != NULL) 00084 continue; 00085 /* Found a free slot, let’s store it here. */ 00086 stack[c].identifier = identifier; 00087 stack[c].str = str; 00088 return; 00089 } 00090 00091 /* When we arrive here, the stack is full. This should not happen and 00092 * means there’s either a bug in this parser or the specification 00093 * contains a command with more than 10 identified tokens. */ 00094 fprintf(stderr, "BUG: commands_parser stack full. This means either a bug " 00095 "in the code, or a new command which contains more than " 00096 "10 identified tokens.\n"); 00097 exit(1); 00098 } 00099 00100 // XXX: ideally, this would be const char. need to check if that works with all 00101 // called functions. 00102 static char *get_string(const char *identifier) { 00103 DLOG("Getting string %s from stack...\n", identifier); 00104 for (int c = 0; c < 10; c++) { 00105 if (stack[c].identifier == NULL) 00106 break; 00107 if (strcmp(identifier, stack[c].identifier) == 0) 00108 return stack[c].str; 00109 } 00110 return NULL; 00111 } 00112 00113 static void clear_stack(void) { 00114 DLOG("clearing stack.\n"); 00115 for (int c = 0; c < 10; c++) { 00116 if (stack[c].str != NULL) 00117 free(stack[c].str); 00118 stack[c].identifier = NULL; 00119 stack[c].str = NULL; 00120 } 00121 } 00122 00123 // TODO: remove this if it turns out we don’t need it for testing. 00124 #if 0 00125 /******************************************************************************* 00126 * A dynamically growing linked list which holds the criteria for the current 00127 * command. 00128 ******************************************************************************/ 00129 00130 typedef struct criterion { 00131 char *type; 00132 char *value; 00133 00134 TAILQ_ENTRY(criterion) criteria; 00135 } criterion; 00136 00137 static TAILQ_HEAD(criteria_head, criterion) criteria = 00138 TAILQ_HEAD_INITIALIZER(criteria); 00139 00140 /* 00141 * Stores the given type/value in the list of criteria. 00142 * Accepts a pointer as first argument, since it is 'call'ed by the parser. 00143 * 00144 */ 00145 static void push_criterion(void *unused_criteria, const char *type, 00146 const char *value) { 00147 struct criterion *criterion = malloc(sizeof(struct criterion)); 00148 criterion->type = strdup(type); 00149 criterion->value = strdup(value); 00150 TAILQ_INSERT_TAIL(&criteria, criterion, criteria); 00151 } 00152 00153 /* 00154 * Clears the criteria linked list. 00155 * Accepts a pointer as first argument, since it is 'call'ed by the parser. 00156 * 00157 */ 00158 static void clear_criteria(void *unused_criteria) { 00159 struct criterion *criterion; 00160 while (!TAILQ_EMPTY(&criteria)) { 00161 criterion = TAILQ_FIRST(&criteria); 00162 free(criterion->type); 00163 free(criterion->value); 00164 TAILQ_REMOVE(&criteria, criterion, criteria); 00165 free(criterion); 00166 } 00167 } 00168 #endif 00169 00170 /******************************************************************************* 00171 * The parser itself. 00172 ******************************************************************************/ 00173 00174 static cmdp_state state; 00175 #ifndef TEST_PARSER 00176 static Match current_match; 00177 #endif 00178 static struct CommandResult subcommand_output; 00179 static struct CommandResult command_output; 00180 00181 #include "GENERATED_call.h" 00182 00183 00184 static void next_state(const cmdp_token *token) { 00185 if (token->next_state == __CALL) { 00186 DLOG("should call stuff, yay. call_id = %d\n", 00187 token->extra.call_identifier); 00188 subcommand_output.json_output = NULL; 00189 subcommand_output.needs_tree_render = false; 00190 GENERATED_call(token->extra.call_identifier, &subcommand_output); 00191 if (subcommand_output.json_output) { 00192 DLOG("Subcommand JSON output: %s\n", subcommand_output.json_output); 00193 char *buffer; 00194 /* In the beginning, the contents of json_output are "[\0". */ 00195 if (command_output.json_output[1] == '\0') 00196 sasprintf(&buffer, "%s%s", command_output.json_output, subcommand_output.json_output); 00197 else sasprintf(&buffer, "%s, %s", command_output.json_output, subcommand_output.json_output); 00198 free(command_output.json_output); 00199 command_output.json_output = buffer; 00200 DLOG("merged command JSON output: %s\n", command_output.json_output); 00201 } 00202 /* If any subcommand requires a tree_render(), we need to make the 00203 * whole parser result request a tree_render(). */ 00204 if (subcommand_output.needs_tree_render) 00205 command_output.needs_tree_render = true; 00206 clear_stack(); 00207 return; 00208 } 00209 00210 state = token->next_state; 00211 if (state == INITIAL) { 00212 clear_stack(); 00213 } 00214 } 00215 00216 /* TODO: Return parsing errors via JSON. */ 00217 struct CommandResult *parse_command(const char *input) { 00218 DLOG("new parser handling: %s\n", input); 00219 state = INITIAL; 00220 command_output.json_output = sstrdup("["); 00221 command_output.needs_tree_render = false; 00222 00223 const char *walk = input; 00224 const size_t len = strlen(input); 00225 int c; 00226 const cmdp_token *token; 00227 bool token_handled; 00228 00229 // TODO: make this testable 00230 #ifndef TEST_PARSER 00231 cmd_criteria_init(¤t_match, &subcommand_output); 00232 #endif 00233 00234 /* The "<=" operator is intentional: We also handle the terminating 0-byte 00235 * explicitly by looking for an 'end' token. */ 00236 while ((walk - input) <= len) { 00237 /* skip whitespace and newlines before every token */ 00238 while ((*walk == ' ' || *walk == '\t' || 00239 *walk == '\r' || *walk == '\n') && *walk != '\0') 00240 walk++; 00241 00242 DLOG("remaining input = %s\n", walk); 00243 00244 cmdp_token_ptr *ptr = &(tokens[state]); 00245 token_handled = false; 00246 for (c = 0; c < ptr->n; c++) { 00247 token = &(ptr->array[c]); 00248 DLOG("trying token %d = %s\n", c, token->name); 00249 00250 /* A literal. */ 00251 if (token->name[0] == '\'') { 00252 DLOG("literal\n"); 00253 if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) { 00254 DLOG("found literal, moving to next state\n"); 00255 if (token->identifier != NULL) 00256 push_string(token->identifier, sstrdup(token->name + 1)); 00257 walk += strlen(token->name) - 1; 00258 next_state(token); 00259 token_handled = true; 00260 break; 00261 } 00262 continue; 00263 } 00264 00265 if (strcmp(token->name, "string") == 0 || 00266 strcmp(token->name, "word") == 0) { 00267 DLOG("parsing this as a string\n"); 00268 const char *beginning = walk; 00269 /* Handle quoted strings (or words). */ 00270 if (*walk == '"') { 00271 beginning++; 00272 walk++; 00273 while (*walk != '\0' && (*walk != '"' || *(walk-1) == '\\')) 00274 walk++; 00275 } else { 00276 if (token->name[0] == 's') { 00277 /* For a string (starting with 's'), the delimiters are 00278 * comma (,) and semicolon (;) which introduce a new 00279 * operation or command, respectively. Also, newlines 00280 * end a command. */ 00281 while (*walk != ';' && *walk != ',' && 00282 *walk != '\0' && *walk != '\r' && 00283 *walk != '\n') 00284 walk++; 00285 } else { 00286 /* For a word, the delimiters are white space (' ' or 00287 * '\t'), closing square bracket (]), comma (,) and 00288 * semicolon (;). */ 00289 while (*walk != ' ' && *walk != '\t' && 00290 *walk != ']' && *walk != ',' && 00291 *walk != ';' && *walk != '\r' && 00292 *walk != '\n' && *walk != '\0') 00293 walk++; 00294 } 00295 } 00296 if (walk != beginning) { 00297 char *str = scalloc(walk-beginning + 1); 00298 /* We copy manually to handle escaping of characters. */ 00299 int inpos, outpos; 00300 for (inpos = 0, outpos = 0; 00301 inpos < (walk-beginning); 00302 inpos++, outpos++) { 00303 /* We only handle escaped double quotes to not break 00304 * backwards compatibility with people using \w in 00305 * regular expressions etc. */ 00306 if (beginning[inpos] == '\\' && beginning[inpos+1] == '"') 00307 inpos++; 00308 str[outpos] = beginning[inpos]; 00309 } 00310 if (token->identifier) 00311 push_string(token->identifier, str); 00312 DLOG("str is \"%s\"\n", str); 00313 /* If we are at the end of a quoted string, skip the ending 00314 * double quote. */ 00315 if (*walk == '"') 00316 walk++; 00317 next_state(token); 00318 token_handled = true; 00319 break; 00320 } 00321 } 00322 00323 if (strcmp(token->name, "end") == 0) { 00324 DLOG("checking for the end token.\n"); 00325 if (*walk == '\0' || *walk == ',' || *walk == ';') { 00326 DLOG("yes, indeed. end\n"); 00327 next_state(token); 00328 token_handled = true; 00329 /* To make sure we start with an appropriate matching 00330 * datastructure for commands which do *not* specify any 00331 * criteria, we re-initialize the criteria system after 00332 * every command. */ 00333 // TODO: make this testable 00334 #ifndef TEST_PARSER 00335 if (*walk == '\0' || *walk == ';') 00336 cmd_criteria_init(¤t_match, &subcommand_output); 00337 #endif 00338 walk++; 00339 break; 00340 } 00341 } 00342 } 00343 00344 if (!token_handled) { 00345 /* Figure out how much memory we will need to fill in the names of 00346 * all tokens afterwards. */ 00347 int tokenlen = 0; 00348 for (c = 0; c < ptr->n; c++) 00349 tokenlen += strlen(ptr->array[c].name) + strlen("'', "); 00350 00351 /* Build up a decent error message. We include the problem, the 00352 * full input, and underline the position where the parser 00353 * currently is. */ 00354 char *errormessage; 00355 char *possible_tokens = smalloc(tokenlen + 1); 00356 char *tokenwalk = possible_tokens; 00357 for (c = 0; c < ptr->n; c++) { 00358 token = &(ptr->array[c]); 00359 if (token->name[0] == '\'') { 00360 /* A literal is copied to the error message enclosed with 00361 * single quotes. */ 00362 *tokenwalk++ = '\''; 00363 strcpy(tokenwalk, token->name + 1); 00364 tokenwalk += strlen(token->name + 1); 00365 *tokenwalk++ = '\''; 00366 } else { 00367 /* Any other token is copied to the error message enclosed 00368 * with angle brackets. */ 00369 *tokenwalk++ = '<'; 00370 strcpy(tokenwalk, token->name); 00371 tokenwalk += strlen(token->name); 00372 *tokenwalk++ = '>'; 00373 } 00374 if (c < (ptr->n - 1)) { 00375 *tokenwalk++ = ','; 00376 *tokenwalk++ = ' '; 00377 } 00378 } 00379 *tokenwalk = '\0'; 00380 sasprintf(&errormessage, "Expected one of these tokens: %s", 00381 possible_tokens); 00382 free(possible_tokens); 00383 00384 /* Contains the same amount of characters as 'input' has, but with 00385 * the unparseable part highlighted using ^ characters. */ 00386 char *position = smalloc(len + 1); 00387 for (const char *copywalk = input; *copywalk != '\0'; copywalk++) 00388 position[(copywalk - input)] = (copywalk >= walk ? '^' : ' '); 00389 position[len] = '\0'; 00390 00391 printf("%s\n", errormessage); 00392 printf("Your command: %s\n", input); 00393 printf(" %s\n", position); 00394 00395 free(position); 00396 free(errormessage); 00397 clear_stack(); 00398 break; 00399 } 00400 } 00401 00402 char *buffer; 00403 sasprintf(&buffer, "%s]", command_output.json_output); 00404 free(command_output.json_output); 00405 command_output.json_output = buffer; 00406 DLOG("command_output.json_output = %s\n", command_output.json_output); 00407 DLOG("command_output.needs_tree_render = %d\n", command_output.needs_tree_render); 00408 return &command_output; 00409 } 00410 00411 /******************************************************************************* 00412 * Code for building the stand-alone binary test.commands_parser which is used 00413 * by t/187-commands-parser.t. 00414 ******************************************************************************/ 00415 00416 #ifdef TEST_PARSER 00417 00418 /* 00419 * Logs the given message to stdout while prefixing the current time to it, 00420 * but only if the corresponding debug loglevel was activated. 00421 * This is to be called by DLOG() which includes filename/linenumber 00422 * 00423 */ 00424 void debuglog(uint64_t lev, char *fmt, ...) { 00425 va_list args; 00426 00427 va_start(args, fmt); 00428 fprintf(stderr, "# "); 00429 vfprintf(stderr, fmt, args); 00430 va_end(args); 00431 } 00432 00433 int main(int argc, char *argv[]) { 00434 if (argc < 2) { 00435 fprintf(stderr, "Syntax: %s <command>\n", argv[0]); 00436 return 1; 00437 } 00438 parse_command(argv[1]); 00439 } 00440 #endif