00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085 #include "soarkernel.h"
00086 #include <ctype.h>
00087 #include <errno.h>
00088 #include <string.h>
00089 #include <stdlib.h>
00090
00091 bool constituent_char[256];
00092 bool whitespace[256];
00093 bool number_starters[256];
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106 void start_lex_from_file(char *filename, FILE * already_opened_file)
00107 {
00108 lexer_source_file *lsf;
00109
00110 lsf = allocate_memory(sizeof(lexer_source_file), MISCELLANEOUS_MEM_USAGE);
00111 lsf->saved_lexeme = current_agent(lexeme);
00112 lsf->saved_current_char = current_agent(current_char);
00113 lsf->parent_file = current_agent(current_file);
00114 current_agent(current_file) = lsf;
00115 lsf->filename = make_memory_block_for_string(filename);
00116 lsf->file = already_opened_file;
00117 lsf->fake_rparen_at_eol = FALSE;
00118 lsf->allow_ids = TRUE;
00119 lsf->parentheses_level = 0;
00120 lsf->column_of_start_of_last_lexeme = 0;
00121 lsf->line_of_start_of_last_lexeme = 0;
00122 lsf->current_line = 0;
00123 lsf->current_column = 0;
00124 lsf->buffer[0] = 0;
00125 current_agent(current_char) = ' ';
00126 }
00127
00128 void stop_lex_from_file(void)
00129 {
00130 lexer_source_file *lsf;
00131
00132 if (reading_from_top_level()) {
00133 print("Internal error: tried to stop_lex_from_file at top level\n");
00134 return;
00135 }
00136 lsf = current_agent(current_file);
00137 current_agent(current_file) = current_agent(current_file)->parent_file;
00138 current_agent(current_char) = lsf->saved_current_char;
00139 current_agent(lexeme) = lsf->saved_lexeme;
00140
00141 free_memory_block_for_string(lsf->filename);
00142 free_memory(lsf, MISCELLANEOUS_MEM_USAGE);
00143 }
00144
00145
00146
00147
00148
00149
00150
00151
00152 void get_next_char(void)
00153 {
00154 char *s;
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166 if (current_agent(alternate_input_exit) &&
00167 (current_agent(alternate_input_string) == NULL) && (current_agent(alternate_input_suffix) == NULL)) {
00168 current_agent(current_char) = EOF_AS_CHAR;
00169 control_c_handler(0);
00170 return;
00171 }
00172
00173 if (current_agent(alternate_input_string) != NULL) {
00174 current_agent(current_char) = *current_agent(alternate_input_string)++;
00175
00176 if (current_agent(current_char) == '\0') {
00177 current_agent(alternate_input_string) = NIL;
00178 current_agent(current_char) = *current_agent(alternate_input_suffix)++;
00179 }
00180 } else if (current_agent(alternate_input_suffix) != NULL) {
00181 current_agent(current_char) = *current_agent(alternate_input_suffix)++;
00182
00183 if (current_agent(current_char) == '\0') {
00184 current_agent(alternate_input_suffix) = NIL;
00185
00186
00187 if (current_agent(alternate_input_exit)) {
00188 current_agent(current_char) = EOF_AS_CHAR;
00189 control_c_handler(0);
00190 return;
00191 }
00192
00193 current_agent(current_char) = current_agent(current_file)->buffer
00194 [current_agent(current_file)->current_column++];
00195 }
00196 } else {
00197 current_agent(current_char) = current_agent(current_file)->buffer
00198 [current_agent(current_file)->current_column++];
00199 }
00200
00201 if (current_agent(current_char))
00202 return;
00203
00204 if ((current_agent(current_file)->current_column == BUFSIZE) &&
00205 (current_agent(current_file)->buffer[BUFSIZE - 2] != '\n') &&
00206 (current_agent(current_file)->buffer[BUFSIZE - 2] != EOF_AS_CHAR)) {
00207 char msg[512];
00208 snprintf(msg, MESSAGE_SIZE,
00209 "lexer.c: Error: line too long (max allowed is %d chars)\nFile %s, line %lu\n",
00210 MAX_LEXER_LINE_LENGTH, current_agent(current_file)->filename,
00211 current_agent(current_file)->current_line);
00212 msg[MESSAGE_SIZE - 1] = 0;
00213 abort_with_fatal_error(msg);
00214 }
00215
00216 s = fgets(current_agent(current_file)->buffer, BUFSIZE, current_agent(current_file)->file);
00217
00218 if (s) {
00219 current_agent(current_file)->current_line++;
00220 if (reading_from_top_level()) {
00221 tell_printer_that_output_column_has_been_reset();
00222 if (current_agent(logging_to_file))
00223 print_string_to_log_file_only(current_agent(current_file)->buffer);
00224 }
00225 } else {
00226
00227 if (!feof(current_agent(current_file)->file)) {
00228 if (reading_from_top_level()) {
00229
00230 control_c_handler(0);
00231
00232 return;
00233 } else {
00234 print("I/O error while reading file %s; ignoring the rest of it.\n",
00235 current_agent(current_file)->filename);
00236 }
00237 }
00238 current_agent(current_file)->buffer[0] = EOF_AS_CHAR;
00239 current_agent(current_file)->buffer[1] = 0;
00240 }
00241 current_agent(current_char) = current_agent(current_file)->buffer[0];
00242 current_agent(current_file)->current_column = 1;
00243 }
00244
00245
00246
00247
00248
00249
00250
00251 #define record_position_of_start_of_lexeme() { \
00252 current_agent(current_file)->column_of_start_of_last_lexeme = \
00253 current_agent(current_file)->current_column - 1; \
00254 current_agent(current_file)->line_of_start_of_last_lexeme = \
00255 current_agent(current_file)->current_line; }
00256
00257
00258
00259
00260
00261
00262
00263
00264 #define store_and_advance() { \
00265 current_agent(lexeme).string[current_agent(lexeme).length++] = \
00266 (char)current_agent(current_char); \
00267 get_next_char(); }
00268
00269 #define finish() { current_agent(lexeme).string[current_agent(lexeme).length]=0; }
00270
00271 void read_constituent_string(void)
00272 {
00273 #ifdef __SC__
00274 char *buf;
00275 int i, len;
00276 #endif
00277
00278 while ((current_agent(current_char) != EOF_AS_CHAR) &&
00279 constituent_char[(unsigned char) current_agent(current_char)])
00280 store_and_advance();
00281 finish();
00282 }
00283
00284 void read_rest_of_floating_point_number(void)
00285 {
00286
00287 store_and_advance();
00288 while (isdigit(current_agent(current_char)))
00289 store_and_advance();
00290 if ((current_agent(current_char) == 'e') || (current_agent(current_char) == 'E')) {
00291 store_and_advance();
00292 if ((current_agent(current_char) == '+') || (current_agent(current_char) == '-'))
00293 store_and_advance();
00294 while (isdigit(current_agent(current_char)))
00295 store_and_advance();
00296 }
00297 finish();
00298
00299 #ifdef __SC__
00300 if (strcmp("soar>", current_agent(lexeme).string)) {
00301 if (!(strncmp("soar>", current_agent(lexeme).string, 5))) {
00302
00303 buf =
00304 (char *) allocate_memory((len = (strlen(current_agent(lexeme).string) + 1)) * sizeof(char),
00305 STRING_MEM_USAGE);
00306 for (i = 0; i <= len; i++) {
00307 buf[i] = current_agent(lexeme).string[i];
00308 }
00309 for (i = 5; i <= len; i++) {
00310 current_agent(lexeme).string[i - 5] = buf[i];
00311 }
00312 free_memory_block_for_string(buf);
00313 }
00314 }
00315 #endif
00316 }
00317
00318 void determine_type_of_constituent_string(void)
00319 {
00320 bool possible_id, possible_var, possible_sc, possible_ic, possible_fc;
00321 bool rereadable;
00322
00323 determine_possible_symbol_types_for_string(current_agent(lexeme).string,
00324 current_agent(lexeme).length,
00325 &possible_id,
00326 &possible_var, &possible_sc, &possible_ic, &possible_fc, &rereadable);
00327
00328
00329 if (possible_var) {
00330 current_agent(lexeme).type = VARIABLE_LEXEME;
00331 return;
00332 }
00333
00334
00335 if (possible_ic) {
00336 errno = 0;
00337 current_agent(lexeme).type = INT_CONSTANT_LEXEME;
00338 current_agent(lexeme).int_val = strtol(current_agent(lexeme).string, NULL, 10);
00339 if (errno) {
00340 print("Error: bad integer (probably too large)\n");
00341 print_location_of_most_recent_lexeme();
00342 current_agent(lexeme).int_val = 0;
00343 }
00344 return;
00345 }
00346
00347
00348 if (possible_fc) {
00349 errno = 0;
00350 current_agent(lexeme).type = FLOAT_CONSTANT_LEXEME;
00351
00352 current_agent(lexeme).float_val = (float) strtod(current_agent(lexeme).string, NULL);
00353 if (errno) {
00354 print("Error: bad floating point number\n");
00355 print_location_of_most_recent_lexeme();
00356 current_agent(lexeme).float_val = 0.0;
00357 }
00358 return;
00359 }
00360
00361
00362 if (current_agent(current_file)->allow_ids && possible_id) {
00363 current_agent(lexeme).id_letter = (char) toupper(current_agent(lexeme).string[0]);
00364 errno = 0;
00365 current_agent(lexeme).type = IDENTIFIER_LEXEME;
00366 current_agent(lexeme).id_number = strtoul(&(current_agent(lexeme).string[1]), NULL, 10);
00367 if (errno) {
00368 print("Error: bad number for identifier (probably too large)\n");
00369 print_location_of_most_recent_lexeme();
00370 current_agent(lexeme).id_number = 0;
00371 }
00372 return;
00373 }
00374
00375
00376 if (possible_sc) {
00377 current_agent(lexeme).type = SYM_CONSTANT_LEXEME;
00378 if (current_agent(sysparams)[PRINT_WARNINGS_SYSPARAM]) {
00379 if (current_agent(lexeme).string[0] == '<') {
00380 if (current_agent(lexeme).string[1] == '<') {
00381 print("Warning: Possible disjunctive encountered in reading symbolic constant\n");
00382 print(" If a disjunctive was intended, add a space after <<\n");
00383 print(" If a constant was intended, surround constant with vertical bars\n");
00384 print_location_of_most_recent_lexeme();
00385 } else {
00386 print("Warning: Possible variable encountered in reading symbolic constant\n");
00387 print(" If a constant was intended, surround constant with vertical bars\n");
00388 print_location_of_most_recent_lexeme();
00389 }
00390 } else {
00391 if (current_agent(lexeme).string[current_agent(lexeme).length - 1] == '>') {
00392 if (current_agent(lexeme).string[current_agent(lexeme).length - 2] == '>') {
00393 print("Warning: Possible disjunctive encountered in reading symbolic constant\n");
00394 print(" If a disjunctive was intended, add a space before >>\n");
00395 print(" If a constant was intended, surround constant with vertical bars\n");
00396 print_location_of_most_recent_lexeme();
00397 } else {
00398 print("Warning: Possible variable encountered in reading symbolic constant\n");
00399 print(" If a constant was intended, surround constant with vertical bars\n");
00400 print_location_of_most_recent_lexeme();
00401 }
00402 }
00403 }
00404 }
00405 return;
00406 }
00407
00408
00409
00410
00411
00412
00413
00414
00415 current_agent(lexeme).type = QUOTED_STRING_LEXEME;
00416
00417
00418
00419
00420
00421
00422 }
00423
00424 void do_fake_rparen(void)
00425 {
00426 record_position_of_start_of_lexeme();
00427 current_agent(lexeme).type = R_PAREN_LEXEME;
00428 current_agent(lexeme).length = 1;
00429 current_agent(lexeme).string[0] = ')';
00430 current_agent(lexeme).string[1] = 0;
00431 if (current_agent(current_file)->parentheses_level > 0)
00432 current_agent(current_file)->parentheses_level--;
00433 current_agent(current_file)->fake_rparen_at_eol = FALSE;
00434 }
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445 void (*(lexer_routines[256])) (void);
00446
00447 void lex_eof(void)
00448 {
00449 if (current_agent(current_file)->fake_rparen_at_eol) {
00450 do_fake_rparen();
00451 return;
00452 }
00453 store_and_advance();
00454 finish();
00455 current_agent(lexeme).type = EOF_LEXEME;
00456 }
00457
00458 void lex_at(void)
00459 {
00460 store_and_advance();
00461 finish();
00462 current_agent(lexeme).type = AT_LEXEME;
00463 }
00464
00465 void lex_tilde(void)
00466 {
00467 store_and_advance();
00468 finish();
00469 current_agent(lexeme).type = TILDE_LEXEME;
00470 }
00471
00472 void lex_up_arrow(void)
00473 {
00474 store_and_advance();
00475 finish();
00476 current_agent(lexeme).type = UP_ARROW_LEXEME;
00477 }
00478
00479 void lex_lbrace(void)
00480 {
00481 store_and_advance();
00482 finish();
00483 current_agent(lexeme).type = L_BRACE_LEXEME;
00484 }
00485
00486 void lex_rbrace(void)
00487 {
00488 store_and_advance();
00489 finish();
00490 current_agent(lexeme).type = R_BRACE_LEXEME;
00491 }
00492
00493 void lex_exclamation_point(void)
00494 {
00495 store_and_advance();
00496 finish();
00497 current_agent(lexeme).type = EXCLAMATION_POINT_LEXEME;
00498 }
00499
00500 void lex_comma(void)
00501 {
00502 store_and_advance();
00503 finish();
00504 current_agent(lexeme).type = COMMA_LEXEME;
00505 }
00506
00507 void lex_equal(void)
00508 {
00509
00510
00511
00512 read_constituent_string();
00513 if (current_agent(lexeme).length == 1) {
00514 current_agent(lexeme).type = EQUAL_LEXEME;
00515 return;
00516 }
00517 determine_type_of_constituent_string();
00518 }
00519
00520 void lex_ampersand(void)
00521 {
00522
00523
00524
00525 read_constituent_string();
00526 if (current_agent(lexeme).length == 1) {
00527 current_agent(lexeme).type = AMPERSAND_LEXEME;
00528 return;
00529 }
00530 determine_type_of_constituent_string();
00531 }
00532
00533 void lex_lparen(void)
00534 {
00535 store_and_advance();
00536 finish();
00537 current_agent(lexeme).type = L_PAREN_LEXEME;
00538 current_agent(current_file)->parentheses_level++;
00539 }
00540
00541 void lex_rparen(void)
00542 {
00543 store_and_advance();
00544 finish();
00545 current_agent(lexeme).type = R_PAREN_LEXEME;
00546 if (current_agent(current_file)->parentheses_level > 0)
00547 current_agent(current_file)->parentheses_level--;
00548 }
00549
00550 void lex_greater(void)
00551 {
00552
00553
00554
00555 read_constituent_string();
00556 if (current_agent(lexeme).length == 1) {
00557 current_agent(lexeme).type = GREATER_LEXEME;
00558 return;
00559 }
00560 if (current_agent(lexeme).length == 2) {
00561 if (current_agent(lexeme).string[1] == '>') {
00562 current_agent(lexeme).type = GREATER_GREATER_LEXEME;
00563 return;
00564 }
00565 if (current_agent(lexeme).string[1] == '=') {
00566 current_agent(lexeme).type = GREATER_EQUAL_LEXEME;
00567 return;
00568 }
00569 }
00570 determine_type_of_constituent_string();
00571 }
00572
00573 void lex_less(void)
00574 {
00575
00576
00577
00578 read_constituent_string();
00579 if (current_agent(lexeme).length == 1) {
00580 current_agent(lexeme).type = LESS_LEXEME;
00581 return;
00582 }
00583 if (current_agent(lexeme).length == 2) {
00584 if (current_agent(lexeme).string[1] == '>') {
00585 current_agent(lexeme).type = NOT_EQUAL_LEXEME;
00586 return;
00587 }
00588 if (current_agent(lexeme).string[1] == '=') {
00589 current_agent(lexeme).type = LESS_EQUAL_LEXEME;
00590 return;
00591 }
00592 if (current_agent(lexeme).string[1] == '<') {
00593 current_agent(lexeme).type = LESS_LESS_LEXEME;
00594 return;
00595 }
00596 }
00597 if (current_agent(lexeme).length == 3) {
00598 if ((current_agent(lexeme).string[1] == '=') && (current_agent(lexeme).string[2] == '>')) {
00599 current_agent(lexeme).type = LESS_EQUAL_GREATER_LEXEME;
00600 return;
00601 }
00602 }
00603 determine_type_of_constituent_string();
00604
00605 }
00606
00607 void lex_period(void)
00608 {
00609 store_and_advance();
00610 finish();
00611
00612
00613 if (isdigit(current_agent(current_char)))
00614 read_rest_of_floating_point_number();
00615 if (current_agent(lexeme).length == 1) {
00616 current_agent(lexeme).type = PERIOD_LEXEME;
00617 return;
00618 }
00619 determine_type_of_constituent_string();
00620 }
00621
00622 void lex_plus(void)
00623 {
00624
00625
00626 int i;
00627 bool could_be_floating_point;
00628
00629 read_constituent_string();
00630
00631
00632 if (current_agent(current_char) == '.') {
00633 could_be_floating_point = TRUE;
00634 for (i = 1; i < current_agent(lexeme).length; i++)
00635 if (!isdigit(current_agent(lexeme).string[i]))
00636 could_be_floating_point = FALSE;
00637 if (could_be_floating_point)
00638 read_rest_of_floating_point_number();
00639 }
00640 if (current_agent(lexeme).length == 1) {
00641 current_agent(lexeme).type = PLUS_LEXEME;
00642 return;
00643 }
00644 determine_type_of_constituent_string();
00645 }
00646
00647 void lex_minus(void)
00648 {
00649
00650
00651 int i;
00652 bool could_be_floating_point;
00653
00654 read_constituent_string();
00655
00656
00657 if (current_agent(current_char) == '.') {
00658 could_be_floating_point = TRUE;
00659 for (i = 1; i < current_agent(lexeme).length; i++)
00660 if (!isdigit(current_agent(lexeme).string[i]))
00661 could_be_floating_point = FALSE;
00662 if (could_be_floating_point)
00663 read_rest_of_floating_point_number();
00664 }
00665 if (current_agent(lexeme).length == 1) {
00666 current_agent(lexeme).type = MINUS_LEXEME;
00667 return;
00668 }
00669 if (current_agent(lexeme).length == 3) {
00670 if ((current_agent(lexeme).string[1] == '-') && (current_agent(lexeme).string[2] == '>')) {
00671 current_agent(lexeme).type = RIGHT_ARROW_LEXEME;
00672 return;
00673 }
00674 }
00675 determine_type_of_constituent_string();
00676 }
00677
00678 void lex_digit(void)
00679 {
00680 int i;
00681 bool could_be_floating_point;
00682
00683 read_constituent_string();
00684
00685
00686 if (current_agent(current_char) == '.') {
00687 could_be_floating_point = TRUE;
00688 for (i = 1; i < current_agent(lexeme).length; i++)
00689 if (!isdigit(current_agent(lexeme).string[i]))
00690 could_be_floating_point = FALSE;
00691 if (could_be_floating_point)
00692 read_rest_of_floating_point_number();
00693 }
00694 determine_type_of_constituent_string();
00695 }
00696
00697 void lex_unknown(void)
00698 {
00699 if (reading_from_top_level() && current_agent(current_char) == 0) {
00700 } else {
00701 print("Error: Unknown character encountered by lexer, code=%d\n", current_agent(current_char));
00702 print("File %s, line %lu, column %lu.\n", current_agent(current_file)->filename,
00703 current_agent(current_file)->current_line, current_agent(current_file)->current_column);
00704
00705
00706
00707
00708
00709
00710
00711
00712
00713
00714
00715
00716
00717 }
00718 get_next_char();
00719 get_lexeme();
00720 }
00721
00722 void lex_constituent_string(void)
00723 {
00724 read_constituent_string();
00725 determine_type_of_constituent_string();
00726 }
00727
00728 void lex_vbar(void)
00729 {
00730 current_agent(lexeme).type = SYM_CONSTANT_LEXEME;
00731 get_next_char();
00732
00733 for (;;) {
00734 if ((current_agent(current_char) == EOF_AS_CHAR) || (current_agent(lexeme).length == MAX_LEXEME_LENGTH)) {
00735 print("Error: opening '|' without closing '|'\n");
00736 print_location_of_most_recent_lexeme();
00737
00738 current_agent(lexeme).type = EOF_LEXEME;
00739 current_agent(lexeme).string[0] = EOF_AS_CHAR;
00740 current_agent(lexeme).string[1] = 0;
00741 current_agent(lexeme).length = 1;
00742 return;
00743 }
00744 if (current_agent(current_char) == '\\') {
00745 get_next_char();
00746 current_agent(lexeme).string[current_agent(lexeme).length++] = (char) current_agent(current_char);
00747 get_next_char();
00748 } else if (current_agent(current_char) == '|') {
00749 get_next_char();
00750 break;
00751 } else {
00752 current_agent(lexeme).string[current_agent(lexeme).length++] = (char) current_agent(current_char);
00753 get_next_char();
00754 }
00755 }
00756
00757 current_agent(lexeme).string[current_agent(lexeme).length] = 0;
00758 }
00759
00760 void lex_quote(void)
00761 {
00762 current_agent(lexeme).type = QUOTED_STRING_LEXEME;
00763 get_next_char();
00764 for (;;) {
00765 if ((current_agent(current_char) == EOF_AS_CHAR) || (current_agent(lexeme).length == MAX_LEXEME_LENGTH)) {
00766 print("Error: opening '\"' without closing '\"'\n");
00767 print_location_of_most_recent_lexeme();
00768
00769 current_agent(lexeme).type = EOF_LEXEME;
00770 current_agent(lexeme).string[0] = EOF_AS_CHAR;
00771 current_agent(lexeme).string[1] = 0;
00772 current_agent(lexeme).length = 1;
00773 return;
00774 }
00775 if (current_agent(current_char) == '\\') {
00776 get_next_char();
00777 current_agent(lexeme).string[current_agent(lexeme).length++] = (char) current_agent(current_char);
00778 get_next_char();
00779 } else if (current_agent(current_char) == '"') {
00780 get_next_char();
00781 break;
00782 } else {
00783 current_agent(lexeme).string[current_agent(lexeme).length++] = (char) current_agent(current_char);
00784 get_next_char();
00785 }
00786 }
00787 current_agent(lexeme).string[current_agent(lexeme).length] = 0;
00788 }
00789
00790
00791
00792
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807 void lex_dollar(void)
00808 {
00809 current_agent(lexeme).type = DOLLAR_STRING_LEXEME;
00810 current_agent(lexeme).string[0] = '$';
00811 current_agent(lexeme).length = 1;
00812 get_next_char();
00813 while ((current_agent(current_char) != '\n') &&
00814 (current_agent(current_char) != EOF_AS_CHAR) && (current_agent(lexeme).length < MAX_LEXEME_LENGTH - 1)) {
00815 current_agent(lexeme).string[current_agent(lexeme).length++] = current_agent(current_char);
00816 get_next_char();
00817 }
00818 current_agent(lexeme).string[current_agent(lexeme).length] = '\0';
00819 }
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838
00839 void get_lexeme(void)
00840 {
00841
00842
00843 if (current_agent(lex_alias)) {
00844 current_agent(lexeme) = current_agent(lex_alias)->lexeme;
00845 current_agent(lex_alias) = current_agent(lex_alias)->next;
00846 return;
00847 }
00848
00849
00850 current_agent(lexeme).length = 0;
00851 current_agent(lexeme).string[0] = 0;
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870 current_agent(load_errors_quit) = FALSE;
00871
00872 while (current_agent(load_errors_quit) == FALSE) {
00873 if (current_agent(current_char) == EOF_AS_CHAR)
00874 break;
00875 if (whitespace[(unsigned char) current_agent(current_char)]) {
00876 if (current_agent(current_char) == '\n') {
00877 if (current_agent(current_file)->fake_rparen_at_eol) {
00878 do_fake_rparen();
00879 return;
00880 }
00881
00882
00883
00884
00885
00886
00887
00888
00889
00890
00891
00892
00893 }
00894 get_next_char();
00895 continue;
00896 }
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907 if (current_agent(current_char) == ';') {
00908
00909 get_next_char();
00910 continue;
00911 }
00912 if (current_agent(current_char) == '#') {
00913
00914 while ((current_agent(current_char) != '\n') && (current_agent(current_char) != EOF_AS_CHAR))
00915 get_next_char();
00916 if (current_agent(current_file)->fake_rparen_at_eol) {
00917 do_fake_rparen();
00918 return;
00919 }
00920 if (current_agent(current_char) != EOF_AS_CHAR)
00921 get_next_char();
00922 continue;
00923 }
00924
00925 break;
00926 }
00927
00928 record_position_of_start_of_lexeme();
00929 if (current_agent(current_char) != EOF_AS_CHAR)
00930 (*(lexer_routines[(unsigned char) current_agent(current_char)])) ();
00931 else
00932 lex_eof();
00933 }
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943 char extra_constituents[] = "$%&*+-/:<=>?_";
00944
00945 void init_lexer(void)
00946 {
00947 unsigned int i;
00948
00949
00950 for (i = 0; i < 256; i++)
00951 if (isalnum(i))
00952 constituent_char[i] = TRUE;
00953 else
00954 constituent_char[i] = FALSE;
00955 for (i = 0; i < strlen(extra_constituents); i++)
00956 constituent_char[(int) extra_constituents[i]] = TRUE;
00957
00958
00959 for (i = 0; i < 256; i++)
00960 if (isspace(i))
00961 whitespace[i] = TRUE;
00962 else
00963 whitespace[i] = FALSE;
00964
00965
00966 for (i = 0; i < 256; i++)
00967 if (isdigit(i))
00968 number_starters[i] = TRUE;
00969 else
00970 number_starters[i] = FALSE;
00971 number_starters['+'] = TRUE;
00972 number_starters['-'] = TRUE;
00973 number_starters['.'] = TRUE;
00974
00975
00976 for (i = 0; i < 256; i++)
00977 lexer_routines[i] = lex_unknown;
00978 for (i = 0; i < 256; i++)
00979 if (constituent_char[i])
00980 lexer_routines[i] = lex_constituent_string;
00981 for (i = 0; i < 256; i++)
00982 if (isdigit(i))
00983 lexer_routines[i] = lex_digit;
00984 lexer_routines['@'] = lex_at;
00985 lexer_routines['('] = lex_lparen;
00986 lexer_routines[')'] = lex_rparen;
00987 lexer_routines['+'] = lex_plus;
00988 lexer_routines['-'] = lex_minus;
00989 lexer_routines['~'] = lex_tilde;
00990 lexer_routines['^'] = lex_up_arrow;
00991 lexer_routines['{'] = lex_lbrace;
00992 lexer_routines['}'] = lex_rbrace;
00993 lexer_routines['!'] = lex_exclamation_point;
00994 lexer_routines['>'] = lex_greater;
00995 lexer_routines['<'] = lex_less;
00996 lexer_routines['='] = lex_equal;
00997 lexer_routines['&'] = lex_ampersand;
00998 lexer_routines['|'] = lex_vbar;
00999 lexer_routines[','] = lex_comma;
01000 lexer_routines['.'] = lex_period;
01001 lexer_routines['"'] = lex_quote;
01002 lexer_routines['$'] = lex_dollar;
01003
01004
01005 start_lex_from_file("[standard input]", stdin);
01006 }
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016
01017
01018
01019
01020 void print_location_of_most_recent_lexeme(void)
01021 {
01022 int i;
01023
01024 if (current_agent(current_file)->line_of_start_of_last_lexeme == current_agent(current_file)->current_line) {
01025
01026 if (!reading_from_top_level()) {
01027 print("File %s, line %lu:\n", current_agent(current_file)->filename,
01028 current_agent(current_file)->current_line);
01029
01030 }
01031 if (current_agent(current_file)->buffer[strlen(current_agent(current_file)->buffer) - 1] == '\n')
01032 print_string(current_agent(current_file)->buffer);
01033 else
01034 print("%s\n", current_agent(current_file)->buffer);
01035 for (i = 0; i < current_agent(current_file)->column_of_start_of_last_lexeme; i++)
01036 print_string("-");
01037 print_string("^\n");
01038
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048
01049
01050
01051
01052
01053
01054
01055
01056
01057 } else {
01058
01059 print("File %s, line %lu, column %lu.\n", current_agent(current_file)->filename,
01060 current_agent(current_file)->line_of_start_of_last_lexeme,
01061 current_agent(current_file)->column_of_start_of_last_lexeme + 1);
01062
01063
01064
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076 }
01077 }
01078
01079
01080
01081
01082
01083
01084
01085
01086
01087
01088
01089
01090
01091
01092 int current_lexer_parentheses_level(void)
01093 {
01094 return current_agent(current_file)->parentheses_level;
01095 }
01096
01097 void skip_ahead_to_balanced_parentheses(int parentheses_level)
01098 {
01099 for (;;) {
01100 if (current_agent(lexeme).type == EOF_LEXEME)
01101 return;
01102 if ((current_agent(lexeme).type == R_PAREN_LEXEME) &&
01103 (parentheses_level == current_agent(current_file)->parentheses_level))
01104 return;
01105 get_lexeme();
01106 }
01107 }
01108
01109 void fake_rparen_at_next_end_of_line(void)
01110 {
01111 current_agent(current_file)->parentheses_level++;
01112 current_agent(current_file)->fake_rparen_at_eol = TRUE;
01113 }
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123 void set_lexer_allow_ids(bool allow_identifiers)
01124 {
01125 current_agent(current_file)->allow_ids = allow_identifiers;
01126 }
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140 void determine_possible_symbol_types_for_string(char *s,
01141 int length_of_s,
01142 bool * possible_id,
01143 bool * possible_var,
01144 bool * possible_sc,
01145 bool * possible_ic, bool * possible_fc, bool * rereadable)
01146 {
01147 char *ch;
01148 bool rereadability_dead, rereadability_questionable;
01149
01150 *possible_id = FALSE;
01151 *possible_var = FALSE;
01152 *possible_sc = FALSE;
01153 *possible_ic = FALSE;
01154 *possible_fc = FALSE;
01155 *rereadable = FALSE;
01156
01157
01158 if (number_starters[(unsigned char) (*s)]) {
01159 ch = s;
01160 if ((*ch == '+') || (*ch == '-'))
01161 ch++;
01162 while (isdigit(*ch))
01163 ch++;
01164 if ((*ch == 0) && (isdigit(*(ch - 1))))
01165 *possible_ic = TRUE;
01166 if (*ch == '.') {
01167 ch++;
01168 while (isdigit(*ch))
01169 ch++;
01170 if ((*ch == 'e') || (*ch == 'E')) {
01171 ch++;
01172 if ((*ch == '+') || (*ch == '-'))
01173 ch++;
01174 while (isdigit(*ch))
01175 ch++;
01176 }
01177 if (*ch == 0)
01178 *possible_fc = TRUE;
01179 }
01180 }
01181
01182
01183 for (ch = s; *ch != 0; ch++)
01184 if (!constituent_char[(unsigned char) (*ch)])
01185 return;
01186
01187
01188 rereadability_questionable = FALSE;
01189 rereadability_dead = FALSE;
01190 for (ch = s; *ch != 0; ch++) {
01191 if (islower(*ch) || isdigit(*ch))
01192 continue;
01193 if (isupper(*ch)) {
01194 rereadability_dead = TRUE;
01195 break;
01196 }
01197 rereadability_questionable = TRUE;
01198 }
01199 if (!rereadability_dead) {
01200 if ((!rereadability_questionable) ||
01201 (length_of_s >= LENGTH_OF_LONGEST_SPECIAL_LEXEME) || ((length_of_s == 1) && (*s == '*')))
01202 *rereadable = TRUE;
01203 }
01204
01205
01206 *possible_sc = TRUE;
01207
01208
01209 if ((*s == '<') && (*(s + length_of_s - 1) == '>'))
01210 *possible_var = TRUE;
01211
01212
01213 if (isalpha(*s)) {
01214
01215 ch = s + 1;
01216 while (isdigit(*ch))
01217 ch++;
01218 if ((*ch == 0) && (isdigit(*(ch - 1))))
01219 *possible_id = TRUE;
01220 }
01221 }