-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathywl.html
1195 lines (1068 loc) · 60.2 KB
/
ywl.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<title>LEX-YACC</title>
<meta charset="UTF-8">
<link rel="stylesheet" href="css/style.css">
<script src="js/jquery-1.12.1.min.js" charset="utf-8"></script>
<script src="js/bootstrap.min.js" charset="utf-8"></script>
<script src="js/sticky_sidebar.js" charset="utf-8"></script>
</head>
<body>
<header class="center clearfix" id="navtop"> <a href="index.html" class="logo fleft"><img src="img/logo.png" alt=""></a>
<nav class="fright">
<ul>
<li><a href="index.html">Home</a></li>
<li><a href="about.html">About</a></li>
<li><a href="roadmap.html">Roadmap</a></li>
<li><a href="documentation.html" class="navactive">Documentation</a></li>
</ul>
</nav>
</header>
<div class="about center part clearfix">
<header class="title">
<h3 class="fleft">Contents</h3>
</header>
<aside class="column4 mright">
<menu>
<ul>
<li><a href="#navily" class="sec">Integrating LEX with YACC</a></li>
<li><a href="#navdect" class="sec">Declaring tokens</a></li>
<li><a href="#navytabh" class="sec">y.tab.h</a></li>
<li><a href="#navdeft" class="sec">Defining tokens</a></li>
<li><a href="#navptlp" class="sec">Passing tokens from the Lexer to the Parser</a></li>
<li><a href="#navintro" class="sec">Introduction to attributes</a></li>
<li><a href="#navattrstack" class="sec">The Attribute Stack</a></li>
<li><!--For blank space between links and download button--> </li>
<!-- <li><a href="https://deadlink.com" class="button"> Download as PDF </a></li> -->
<li><!--Blank line for space between download button and main title--> </li>
</ul>
</menu>
</aside>
<section class="columnthird content"> <h1 class="mright">USING LEX WITH YACC</h1>
<article id="navily" class="detail">
<h2>Integrating LEX with YACC</h2>
<p> In the previous documents, we have noted that YACC is used to generate a parser (<a href = "yacc.html">YACC documentation</a>) and LEX is used to generate a lexical analayzer (<a href = "lex.html">LEX documentation</a>). YACC generates the definition for yyparse() in y.tab.c and LEX generates the definition for yylex() in lex.yy.c. We have also noted that yyparse() repetitively calls yylex() to read tokens from the input stream. Till now, for simplicity, we had written a <a href="yacc.html#navexy0al">user-defined yylex()</a> in the YACC program. In this section of the document we will use LEX to generate the definition of yylex() and make YACC use this definition for retrieving tokens. </p>
<pre>
/* Declarations section */
int yylex();
%%
/* Rules */
%%
/* Auxiliary Functions */
</pre>
<p id="compile"> We should now compile it as <i> gcc y.tab.c lex.yy.c -o <objectfilename&rt;</i></p>
<p> NOTE: We <b>must not</b> provide a <a href="lex.html#navexl0">main() definition in the LEX program</a> calling yylex(), as there already exists a <a href="yacc.html#navexy0al">main() function in the YACC program</a> which calls yyparse() which in turn calls yylex().</p>
<!--<h3>YACC to LEX Communication</h3>-->
<p>Recall that yyparse() attempts to parse the given input by calling yylex() to obtain tokens. In the <a href="yacc.html#navexy1">infix to postfix conversion example</a> in the YACC documentation, we had used a <a href="yacc.html#yylex">user defined yylex()</a> in the YACC program. In that example, the YACC program contains the declaration for the token DIGIT in the <a href="yacc.html#navexy1d">declarations section</a> . The definition of the token DIGIT is given in the <a href="yacc.html#navexy1a">auxiliary functions section</a> under the function yylex(). Instead, we will now use LEX to generate yylex().
</p>
<p id="navexctok">First, we will write a YACC program to declare the tokens and generate yyparse().</p>
</artcile>
<br>
<div class="up grid col-one-third" style="float:right">
<a href="#navtop" title="Go back up"> top ↑</a>
</div>
<article id="navdect" class="detail">
<h2>Declaring tokens</h2>
The token DIGIT must be declared in the <i>declaration section</i> to be used in the <i>rules section</i>. The declaration for a token must be made by specifying it in the <a href="yacc.html#decl">YACC declarations section</a> using the <b>%token</b> feature offered by YACC. The following example shows the declaration of the token DIGIT in a YACC program. </p>
<big>in2post.y</big>
<div id="navy" class>
<pre id="navyd">
%{
#include <stdio.h>
%}
%token DIGIT NEWLINE
<c id="navyr"></c>
%%
start : expr NEWLINE {
printf("\nComplete\n");
exit(1);
}
;
expr: expr '+' expr {printf("+ ");}
| expr '-' expr {printf("- ");}
| '(' expr ')'
| DIGIT {printf("%d ",$1);}
;
%%
<c id="navya"></c>
void yyerror(char const *s)
{
printf("yyerror %s\n",s);
return ;
}
int main()
{
yyparse();
return 1;
}
</pre>
</div>
<p id="navlittok">The YACC program given above contains the declaration of the token DIGIT in the <i>declarations section</i>. Note that the grammar contains other terminals like '+', '-', '(' and ')' that also are tokens, but are not declared in the <i>declaration section</i>. These tokens are called <b>literal tokens</b>. Literal tokens are tokens with fixed lexemes. This means that the <a href="lex.html#navyytext">lexeme</a> corresponding to a literal token is a single character or a character string. Such a token do not require an expicit declaration in the YACC program. </p>
<p><b>NOTE</b>: Conceptually, the lexeme of a literal token can be a character or a string. But, not all versions of YACC support string literal tokens. Hence, in our project we will use only single character literal tokens.
</p>
<p> Examples of literal tokens:</p>
<div class="syntax">
'+' '*' '-'
</div>
<p>A lexical analyzer returns a token when it finds a corresponding lexeme. In the case of a literal token, the lexical analyzer returns the lexeme itself as the token ( A type coercion to integer is done so that the value returned by yylex() is of integer type.). For example in the <a href="#navyr">above</a> YACC program, on encoutering the pattern '+' in the input file, yylex() returns '+' itself as the token.</p>
<p>In the parser, an expression like :</p>
<div class="syntax">
expr: expr '+' expr
</div>
<p>is valid because YACC automatically identifies '+' as the literal token.</p>
<p>We must now write a LEX program that contains the <a href="lex.html#regdef">regular definition</a> for DIGIT and the literal tokens.</p>
</article>
<div class="up grid col-one-third" style="float:right">
<a href="#navtop" title="Go back up"> top ↑</a>
</div>
<article id="navytabh" class="detail">
<h2>y.tab.h</h2>
<p> Before writing the LEX program, there must be some way by which the YACC program can tell the LEX program that DIGIT is a valid token that has been declared in the YACC program. This communication is facilitated by the file "y.tab.h" which contains the declarations of all the tokens in the YACC program. The "y.tab.h" is automatically generated by YACC when the 'yacc' command is executed with the -d flag.</p>
<p id="navdflag">In order to generate <a href="#navily">y.tab.c</a> and y.tab.h for the YACC program in in2post.y, do:</p>
<div class="syntax">
user@localhost:~$ yacc -d in2post.y <br>
</div>
<p>An example of the contents of y.tab.h file is shown below. </p>
<div class="syntax">
#define DIGIT 253
</div>
<p>Note that '253' is a YACC generated constant to represent DIGIT. The constant may vary at different executions of YACC. YACC represents a token by defining a <a href="http://gcc.gnu.org/onlinedocs/cpp/Macros.html">macro identifier</a> corresponding to it. </p>
<p>The y.tab.h file must be <i>included</i> in the declarations section of the LEX program. This makes the token declarartions accessible to the LEX program. We will see an example in the next section.</p>
</article>
<div class="up grid col-one-third" style="float:right">
<a href="#navtop" title="Go back up"> top ↑</a>
</div>
<article id="navdeft" class="detail">
<h2>Defining tokens</h2>
<p>The next example example shows the definition of DIGIT and the literal tokens in the LEX program.</p>
<big>in2post.l</big>
<div id="navl">
<pre>
%{
#include <q><</q>stdio.h<q>></q> <!--Dummy tags to display '<' and '>' -->
#include "y.tab.h"
%}
%%
[0-9]+ {
yylval = atoi(yytext);
return DIGIT;
}
"+" return *yytext;
"-" return *yytext;
[()] return *yytext;
[\n] return NEWLINE;
%%
<a id="navyywrapexp" href="lex.html#navyywrap">yywrap</a>()
{
return 1;
}
</pre>
</div>
<p>No explicit declaration of the token DIGIT is requied in the LEX program as y.tab.h (which contains the declaration of DIGIT) has been included in the declarations section.</p>
<p id="navlittokdef"><b>NOTE</b>: As noted earlier we return the lexeme found in case of <a href="#navlittok">literal tokens</a>: '+','*','(',')'. Note that yylex() is a function of return type int but the above LEX program makes yylex() return *yytext where yytext is a character pointer. *yytext de-references to the character value pointed by yytext. Returning a character value does not cause an error because the C compiler type-casts the value to integer automatically.</p>
</p>
<p>To generate lex.yy.c, do:</p>
<div class="syntax">
user@localhost:~$ lex in2post.l <br>
</div>
<p> Once y.tab.c and lex.yy.c files have been generated by YACC and LEX respectively, they can be linked and compiled using the following commands as mentioned <a href="#compile">earlier</a>. The compilation steps and sample input/output of the above example are shown below: </p>
<div class="syntax">
user@localhost:~$ gcc lex.yy.c y.tab.c -o in2post.exe <br>
user@localhost:~$ ./in2post.exe <br>
11+22-33 <br>
11 22 33 - + <br>
user@localhost:~$
</div>
<!-- <p>There exists an alternative method in which the definition of yylex() can be made visible to yyparse(). It can be done by compiling both the .c files (lex.yy.c & y.tab.c) by providing multiple arguments to gcc. An example has been shown below: </p>
<div class="syntax">
gcc lex.yy.c y.tab.c
</div>
<p> <b>NOTE:</b> In this case, the programmer need not explictly include the lex.yyc file in the auxiliary functions section. You may choose between any of the two compilation methods.</p>-->
<div class="up grid col-one-third" style="float:right">
<a href="#navtop" title="Go back up"> top ↑</a>
</div>
</article>
<article id="navptlp" class="detail">
<h2>Passing tokens from the Lexer to the Parser</h2>
<p>Let us consider the <a href="#navy">YACC</a> and <a href="#navl">LEX</a> programs above.</p>
<p>When the input</p>
<div class="syntax">
11+22-33
</div>
is given to the executable file (in2post.exe) <br><br>1. The <a href="#navya">main() function</a> in y.tab.c begins execution. It calls yyparse() which inturn calls yylex() for tokens. <br>2. yylex() reads the input and finds that "11" found in the input matches with the pattern for token DIGIT and returns DIGIT.<br>3. yyparse() which obtains the token DIGIT, shifts it to the parser stack. <br>4. A reduction (corresponding to the rule <i>expr: DIGIT</i>) takes place. This results in the terminal getting replaced with the non-terminal(<i>expr</i>) in the parser stack<br>5. The C statement (semantic action) corresponding to the production is executed (i.e., <i>printf("%d ",$1);</i> is executed.). This prints 11. </p>
<p>We will see what '$1' means and why printing '$1' results in printing the value 11 in detail in the next section.</p>
<p>The execution continues in a similar fashion to complete parsing the entire input.
</p>
<p>
A complete illustration of all the shift and reduce steps is given <a href="#navattrstack">later</a>. The parsing steps have been summarised in the below table for now.
</p>
<table class="tg">
<tr>
<th class="tg-e3zv">I/P Buffer<br></th>
<th class="tg-e3zv">yylex() returns<br></th>
<th class="tg-e3zv">Parser stack<br></th>
<th class="tg-e3zv">Parser action on stack<br></th>
<th class="tg-e3zv">C Action executed<br></th>
<th class="tg-e3zv">Output Stream<br></th>
</tr>
<tr>
<td class="tg-031e">11 + 22 - 33</td>
<td class="tg-031e"></td>
<td class="tg-031e"><br></td>
<td class="tg-031e">_</td>
<td class="tg-e31e"><br></td>
<td class="tg-e31e"><br></td>
</tr>
<tr>
<td class="tg-031e">+ 22 - 33</td>
<td class="tg-031e">DIGIT</td>
<td class="tg-031e">DIGIT<br></td>
<td class="tg-031e">SHIFT</td>
<td class="tg-e31e"><br></td>
<td class="tg-e31e"><br></td>
</tr>
<tr>
<td class="tg-031e">+ 22 - 33</td>
<td class="tg-031e"></td>
<td class="tg-031e">expr<br></td>
<td class="tg-031e">REDUCE</td>
<td class="tg-031e">printf("%d ",$1);</td>
<td class="tg-e31e">11<br></td>
</tr>
<tr>
<td class="tg-031e">22 - 33</td>
<td class="tg-031e">+</td>
<td class="tg-031e">expr +</td>
<td class="tg-031e">SHIFT</td>
<td class="tg-e31e"></td>
<td class="tg-e31e">11</td>
</tr>
<tr>
<td class="tg-031e">- 33</td>
<td class="tg-031e">DIGIT</td>
<td class="tg-031e">expr + 22</td>
<td class="tg-031e">SHIFT</td>
<td class="tg-e31e"></td>
<td class="tg-e3ze">11</td>
</tr>
<tr>
<td class="tg-031e">- 33</td>
<td class="tg-031e"></td>
<td class="tg-031e">expr + expr<br></td>
<td class="tg-031e">REDUCE</td>
<td class="tg-031e">printf("%d ",$1);</td>
<td class="tg-e31e">11 22</td>
</tr>
<tr>
<td class="tg-031e">33</td>
<td class="tg-031e">-</td>
<td class="tg-031e">expr + expr -</td>
<td class="tg-031e">SHIFT</td>
<td class="tg-e31e"></td>
<td class="tg-e31e">11 22</td>
</tr>
<tr>
<td class="tg-031e"><br></td>
<td class="tg-031e">DIGIT</td>
<td class="tg-031e">expr + expr - DIGIT</td>
<td class="tg-031e">SHIFT</td>
<td class="tg-e31e"></td>
<td class="tg-e31e">11 22</td>
</tr>
<tr>
<td class="tg-031e"><br></td>
<td class="tg-031e">0</td>
<td class="tg-031e">expr + expr - expr</td>
<td class="tg-031e">REDUCE</td>
<td class="tg-031e">printf("%d ",$1);</td>
<td class="tg-e31e">11 22 33</td>
</tr>
<tr>
<td class="tg-031e"><br></td>
<td class="tg-031e"></b></td>
<td class="tg-031e">expr + expr</td>
<td class="tg-031e">REDUCE</td>
<td class="tg-e31e">printf("- ");</td>
<td class="tg-e31e">11 22 33 -</td>
</tr>
<tr>
<td class="tg-031e"><br></td>
<td class="tg-031e"></b></td>
<td class="tg-031e">expr<br></td>
<td class="tg-031e">REDUCE</td>
<td class="tg-031e">printf("+ ");</td>
<td class="tg-031e">11 22 33 - +</td>
</tr>
</table>
<br>
<p>Note that yylex() makes a call to yywrap(), when 'End of file' is encountered. We have defined yywrap() to return 1 (We have provided the definition for <a href="#navyywrapexp">yywrap()</a> in our LEX file). <a href="lex.html#navyywrap">Recall</a> that when yylex() receives non-zero value from yywrap(), it returns zero to yyparse(). Also <a href="yacc.html#navyyparse">recall</a> that yyparse() does not call yylex() once it has returned 0. It return zero to main() function to indicate successful parsing.</p>
<p>We have noted how to integrate the lexical analyzer generated by LEX with the parser generated by YACC. Now, we will learn more about managing attributes using LEX and YACC..</p>
</article>
<div class="up grid col-one-third" style="float:right">
<a href="#navtop" title="Go back up"> top ↑</a>
</div>
</article>
<article id="navintro" class="detail">
<h2>Introduction to attributes</h2>
<p>In the <a href="yacc.html#navpassingvalues">last section</a> of the YACC documentation we have noted that it is possible to pass values associated with tokens from yylex() to yyparse(). We had described the term 'attribute' as a value associated with a token. YACC uses yylval to facilitate passing values from the lexical analyzer to the parser. We will now explore how YACC associates attribute values to terminals and non-terminals in a production. We will also explore the usage of YYSTYPE to define custom (user defined )attribute types.
</p>
<p><a href="yacc.html#navyylval">Recall</a> that yylval is a global variable declared in y.tab.c of type YYSTYPE (YYSTYPE is integer unless defined otherwise. We will let YYSTYPE take its default type of integer since it is simpler to understand how attributes are processed in this case. Later we will see how more complex attribute types can be defined and handled).
</p><p> In the YACC documentation, we had seen an <a href="yacc.html#navexy1">example</a> which illustrates the passing of attributes from yylex() to yyparse(). We use the variable yylval to hold the attribute to be passed. If the programmer were to use LEX to generate yylex(), then the attributes will have to be passed to yyparse() using the same mechanism i.e, using yylval (see example below). </p>
<p>In the LEX program, yylex() returns each token by its name. The attribute associated with each token is assigned to yylval and thus becomes accessible to yyparse(). Note that, all tokens except literal tokens must be declared in the declarations section of the YACC program. The following example is a LEX program which returns a token DIGIT when it finds a number.
</p>
<div id="navpairexp">
<pre>
%{
#include "y.tab.h"
#include<e><</e>stdlib.h<e>></e>
#include<e><</e>stdio.h<e>></e>
%}
number [0-9]+
%%
{number}{
yylval = atoi(<a href="lex.html#navyytext">yytext</a>);
return DIGIT;
}
. return *yytext;
%%
</pre>
</div>
<p>In this example, we want to return the token DIGIT when an integer is found in the input stream. In addition to the token, we need to pass the value found in the input stream to yyparse(). The lexeme found in the input stream is a string which contains the integer found. <a href="http://en.cppreference.com/w/cpp/string/byte/atoi">atoi()</a> is a built-in function of return type <i>int</i> defined in the <i>stdlib.h</i> header file. We use atoi() to obtain the integer equivalent of the lexeme found. The obtained integer value is then assigned to yylval.
</p>
<p> The following code segment demonstrates how yyparse() receives the attribute value corresponding to the token DIGIT passed by yylex(). Note that YACC must be run with the <a href="ywl.html#navdflag">-d flag</a> to generate y.tab.h. The LEX program above includes the y.tab.h file in the auxiliary declarations section to <i>import</i> the declarations from y.tab.h.
</p>
<pre>
%{
#include <<e>stdio.h</e>>
int yyerror();
%}
%token DIGIT
%%
start : expr '\n' {printf("\nComplete");exit(1);}
;
expr: expr '+' expr {printf("+ ");}
| expr '*' expr {printf("* ");}
| '(' expr ')'
| DIGIT {printf("%d ",$1);}
;
%%
int yyerror()
{
printf("Error");
}
int main()
{
yyparse();
return 1;
}
</pre>
<p>Note the semantic action for the production <i>expr:DIGIT</i>
<div class="syntax">
DIGIT {printf("%d ",$1);}
</div>
<p>The value corresponding to the token DIGIT, that was assigned to yylval by lex is accessed in YACC using the symbol $1. Recall that values corresponding to the symbols in the handle of a grammar may be accessed using $1, $2, etc according to its position in the production rule.
</p><p>Generally, we say that in the YACC program, the attribute of a grammar symbol in a production can be accessed using the following syntax: $1 for the first symbol in the body of a production, $2 for the second symbol, $3 for the third and so on. For example consider the following example of a YACC rule.
</p>
<div class="syntax">
X: A B C
</div>
<p>The attribute value of 'A' is accessed by the symbol $1, value of ‘B' by $2 and 'C' can by $3. The symbol $$ refers to the attribute value of ‘X’ which is the head of the production. Note that the head of a production must be a non-terminal. Hence, it becomes possible to assign an attribute value to the head of a production by assigning a value to $$. In the above example, an attribute value can be assigned to X through an assignment to $$. Hence we extend our notion of an attribute to: <i>"An attribute is a value associated with a terminal or non-terminal grammar symbol".</i></p>
<p>We will make this clear with an example.</p>
<p>Consider the problem of displaying two numbers in an input stream (ending with a ‘\n’) if they occur as a pair separated by a comma. Also suppose that the numbers must be displayed ONLY after a pair is found. Let us look at a YACC program that solves the problem.</p>
<p>Example: pair.y</p>
<pre>
%{
#include <stdio.h>
int yyerror();
%}
%token DIGIT
%%
start : pair '\n' {printf("\nComplete"); }
;
pair: num ',' num { printf("pair(%d,%d),$1,$3"); }
;
num: DIGIT { $$=$1; }
;
%%
int yyerror()
{
printf("Error");
}
int main()
{
yyparse();
return 1;
}
</pre>
<p>We will use the same <a href="#navpairexp">lex program </a> to receive tokens and token values (attributes).</p>
<p>Note: We have assumed that the attribute values of each symbol is an integer. Later we will see how to allow more complex attributes.</p>
<p>In the above program segment, the first rule displays the value of the numbers for each pair in the input stream. In the action part of the rule, $1 refers to the attribute value of the first num and $3 refers to the attribute value of the and the second num. (Note that $2 refers to the attribute value of the literal token ',' which is the token itself). Since num is a non-terminal, its attribute cannot be set by yylex(). Recall that every non-terminal symbol in the CFG must have at least one production with the non-terminal as the head. The attribute value of a non-terminal must be set by writing semantic rules to set the value of $$ in such productions. Such an attribute value which is “synthesized” by the semantics actions in a production is called a synthesized attribute. In the example, the attribute value of the non-terminal num is synthesized by the following rule:</p>
<div class="syntax">
num: DIGIT { $$=$1; }
</div>
<p>The action of the rule sets the attribute value of num (referred to using $$) to the attribute value of DIGIT (referred to using $1).
</p><p>Sample I/O:</p>
<div class="syntax">
I: 2,5
O: pair(2,5)
I: 3,5,7
O: syntax error
</div>
</article>
<article id="navattrsyn" class="detail">
<h3>Attribute Synthesis</h3>
<p>We have seen that attributes of terminals can be passed from yylex() to yyparse(), whereas attributes of a non-terminal can be synthesized. An attribute of a non-terminal grammar symbol is said to be synthesized if it has been calculated from the attribute values of it's children in the parse tree. Thus the (synthesized) attribute associated with a non-terminal is calculated using the attribute values of the symbols in the handle that it replaces. For example, consider the following grammar:
</p>
<div class="syntax">
Z: X {printf("Result=%d",$1);}<br>
X: A '+' B { $$ = $1 + $3; }
</div>
<p>The attribute value of X is a synthesized attribute as it has been calculated using the attribute values of the symbols in the handle (A ‘+’ B) that it replaces.
</p><p>We will look at an example now.</p>
<p>This is a YACC program that evaluates an expression:</p>
<pre>
%{
#include <stdio.h>
int yyerror();
%}
%token DIGIT
%left '+'
%left '*'
%%
start : expr '\n' { printf("Expression value = %d",$1);}
;
expr: expr '+' expr {$$ = $1 + $3;}
| expr '*' expr {$$ = $1 * $3;}
| '(' expr ')' {$$ = $2;}
| DIGIT {$$ = $1;}
;
%%
int yyerror()
{
printf("Error");
}
int main()
{
yyparse();
return 1;
}
</pre>
<p>Sample I/O:</p>
<div class="syntax">
I: 2+3*(4+5)<br>
O: 29
</div>
<p>Each of the semantic actions in the following rules synthesizes the attribute value for expr by assignment to $$.</p>
<div class="syntax">
expr: expr '+' expr {$$ = $1 + $3;}<br>
| expr '*' expr {$$ = $1 * $3;} <br>
| '(' expr ')' {$$ = $2;}<br>
| DIGIT {$$ = $1;} <br>
;
</div>
<p>We will now see how attribute synthesis is managed internally.</p>
<article id="navattrstack" class="detail">
<h2>The Attribute Stack</h2>
<p><a href="yacc.html#navshiftreduce">Recall</a> that YACC maintains a parse stack to achieve shift-reduce parsing. The parse stack contains grammar symbols (both terminal and non-terminal ) representing the current configuration of the parser. Similar to the parse stack, YACC also maintains an attribute stack to store the attribute value of each grammar symbol in the parse stack.
</p><p>The attribute stack is synchronous with the parse stack -- synchronous because the i'th value on the attribute stack will be the attribute value of the i'th symbol on the parse stack.
</p><p>We will see how attribute synthesis is done on input 2+3*(4+5).
</p>
<p>1. The main() function in y.tab.c begins execution. It calls yyparse() which in turn calls yylex() for tokens.<br>
2. yylex() reads the input and finds that the lexeme "2" matches with the pattern for the token DIGIT. It assigns ‘2’ to yylval and returns DIGIT. Note that YYSTYPE is assumed to take its default value of integer and hence yylval is set to integer type by YACC.<br>
3. yyparse() which obtains the token DIGIT and its attribute value inside the variable yylval, <a href="yacc.html#navshiftreduce">shifts</a> the token DIGIT to the parser stack and pushes the value of yylval (2) to the attribute stack.
</p>
<left><img src="img/ywl1.png" style="max-width=50%" alt= "INITIAL PARSER STACK"></left>
<right><img src="img/ywl1.png" style="max-width=50%" alt="INITIAL ATTRIBUTE STACK"></right>
<p> INITIAL PARSE STACK INITIAL ATTRIBUTE STACK</p>
<left><img src="img/ywl2.png" style="max-width=50%" alt= "PARSER STACK-AFTER SHIFT"></left>
<right><img src="img/ywl3.png" style="max-width=50%" alt="ATTRIBUTE STACK-AFTER SHIFT"></right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>5. A <a href="yacc.html#navshiftreduce">reduction</a> (corresponding to the rule expr: DIGIT) takes place. This results in the following events:<br>
1. The terminal ‘ DIGIT’ gets replaced with the non-terminal expr in the parser stack.<br>
2. The semantic action {$$=$1} for the corresponding reduction is executed. (This sets the (attribute) value of the non terminal at the head of the rule (‘expr’) to the (attribute) value of the first symbol in the handle (DIGIT).)<br>
3. The value of DIGIT (2) is popped from the attribute stack and the synthesized value of ‘expr’(2) is pushed into it.</p>
<p>Note that at any point in the parser’s execution, the symbols $1, $2, $3 etc., refers to the first, second, third etc. attribute values (of the corresponding tokens) on top of the stack. $$ refers to the attribute value of the non-terminal which is the head of the production. When the non-terminal is pushed on to the parse stack, the value of $$ is pushed on to the attribute stack. $$ refers to the symbol on top of the stack after a reduction has taken place.</p>
<left><img src="img/ywl4.png" style="max-width=50%" </left>
<right><img src="img/ywl5.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl6.png" style="max-width=50%" </left>
<right><img src="img/ywl7.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>6. The parser executes a shift action. Now Lex reads and returns the token ‘+’. Since this is a literal token, its value, ‘+’ gets pushed into both the parse stack and the attribute stack after implicit type coercion .
</p>
<left><img src="img/ywl8.png" style="max-width=50%" </left>
<right><img src="img/ywl9.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>7. Since there are no possible reductions to be performed, parser executes another shift operation. Lex returns the token DIGIT again as it encounters ‘3’. The token DIGIT gets pushed to the parser stack and its value, ‘3’, gets pushed to the attribute stack.
</p>
<left><img src="img/ywl10.png" style="max-width=50%" </left>
<right><img src="img/ywl11.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>8. The reduction by the rule expr: DIGIT takes place. The token DIGIT in parse stack is replaced by ‘expr’. The semantic action {$$=$1} sets the value of ‘expr’ to ‘3. In the attribute stack, the value of DIGIT (3) gets replaced by the value of expr (3 itself).
</p>
<left><img src="img/ywl12.png" style="max-width=50%" </left>
<right><img src="img/ywl13.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl14.png" style="max-width=50%" </left>
<right><img src="img/ywl15.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>9. Now even though a valid reduction is possible for expr + expr, the parser executes a shift action. This is because shift/reduce conflict is resolved by looking at operator precedence. <a href="yacc.html#navlookahead" >Recall</a> shift/reduce parsing. The next token, ‘*’ is returned by Lex. This is again a literal token and is pushed into both the parse stack and attribute stack.
</p>
<left><img src="img/ywl16.png" style="max-width=50%" </left>
<right><img src="img/ywl17.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>10. Since there are no matching handles in any of the rules, another shift action is executed. Lex returns ‘(‘ which is again a literal token. The configuration is now
</p>
<left><img src="img/ywl18.png" style="max-width=50%" </left>
<right><img src="img/ywl19.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>11. Again, there are no matching rules. So another shift action is executed. Lex returns DIGIT for ‘4’.
</p>
<left><img src="img/ywl20.png" style="max-width=50%" </left>
<right><img src="img/ywl21.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>12. A reduction by expr:DIGIT takes place.</p>
<left><img src="img/ywl22.png" style="max-width=50%" </left>
<right><img src="img/ywl23.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl24.png" style="max-width=50%" </left>
<right><img src="img/ywl25.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>13. Since there are no matching rules, a shift action is executed. The literal token ‘+’ is returned by Lex and pushed into both stacks by YACC.
</p>
<left><img src="img/ywl26.png" style="max-width=50%" </left>
<right><img src="img/ywl27.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>14. Since there are no matching rules, another shift action is executed. Lex returns DIGIT for ‘5’.</p>
<left><img src="img/ywl28.png" style="max-width=50%" </left>
<right><img src="img/ywl29.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>15. A reduction by the rule expr:DIGIT takes place.</p>
<left><img src="img/ywl30.png" style="max-width=50%" </left>
<right><img src="img/ywl31.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl32.png" style="max-width=50%" </left>
<right><img src="img/ywl33.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>16. The parse stack now contains ‘expr + expr’. Now a reduction by the rule expr : expr ‘+’ expr takes place. The tokens ‘expr’, ‘+’ and ‘expr’ in the parse stack are replaced by a single ‘expr’. The semantic action {$$=$1+$3} executes. $1 and $3 refer to the first and third values in the attribute stack , that is, 4 and 5 respectively. Hence the value of the head($$), ‘expr’, is set to 4+5(=9). ‘4’ ,’+’, and ‘5’ are popped out from the stack and ‘9’ is pushed in.</p>
<left><img src="img/ywl34.png" style="max-width=50%" </left>
<right><img src="img/ywl35.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl36.png" style="max-width=50%" </left>
<right><img src="img/ywl37.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>17. Since there are no matching reductions, a shift action takes place. Lexer returns the literal token ‘)’ which is pushed to both parser stack and attribute stack. </p>
<left><img src="img/ywl38.png" style="max-width=50%" </left>
<right><img src="img/ywl39.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<p>18. Now a reduction by the rule expr: ‘(‘ expr ‘)’ takes place. The tokens ‘(‘ , expr and ‘)’ in the parser stack are replace by a single expr and the symbols ‘(‘ ,’9’ and ‘)’ in the attribute stack are replaced by ‘9’. (Since the semantic action sets $$ to $2 which is ‘9’).
</p>
<left><img src="img/ywl40.png" style="max-width=50%" </left>
<right><img src="img/ywl41.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl42.png" style="max-width=50%" </left>
<right><img src="img/ywl43.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>19. Now we have expr*expr on the top of the parser stack. Reduction by the rule expr: expr ‘*’ expr occurs. The tokens ‘expr’, ‘*’ and ‘expr’ are removed from the parse stack and a single ‘expr’ is pushed instead. The symbols ‘3’, ‘*’ and ‘9’ are replaced by ‘27’ (that is, 3*9) in the attribute stack.
</p>
<left><img src="img/ywl44.png" style="max-width=50%" </left>
<right><img src="img/ywl45.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl46.png" style="max-width=50%" </left>
<right><img src="img/ywl47.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>20. Reduction by the rule expr: expr ‘+’ expr takes place. Now we have a single ‘expr’ in the parser stack and ‘29’ in the attribute stack.
</p>
<left><img src="img/ywl48.png" style="max-width=50%" </left>
<right><img src="img/ywl49.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>
<left><img src="img/ywl50.png" style="max-width=50%" </left>
<right><img src="img/ywl51.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>21. Finally, lexer returns the ‘\n’ character and the final reduction to ‘start’ occurs by the rule start: expr ‘\n’. The semantic action prints ‘29’. </p>
<left><img src="img/ywl52.png" style="max-width=50%" </left>
<right><img src="img/ywl53.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER SHIFT ATTRIBUTE STACK-AFTER SHIFT</p>
<left><img src="img/ywl54.png" style="max-width=50%" </left>
<right><img src="img/ywl55.png" style="max-width=50%" </right>
<p>PARSE STACK-BEFORE READ ATTRIBUTE STACK-BEFORE READ</p>,/a
<left><img src="img/ywl56.png" style="max-width=50%" </left>
<right><img src="img/ywl57.png" style="max-width=50%" </right>
<p>PARSE STACK-AFTER READ ATTRIBUTE STACK-AFTER READ</p>
<p>22. Lexer now encounters end of input (You need to enter Ctrl+D to indicate end of input as input is being read from stdout.) As a result yylex calls yywrap() which returns a non-zero value indication end of input. <a href="lex.html#navyywraplink"> yylex() </a>returns 0. (The $ in the input buffer stands for the end of input marker.)
</p>
<p>23. When yyparse receives 0 from lexer, it returns 0 to main function to indicate that parsing was successfull.
</p>
<p>The following table shows the configuration of the parse stack and the attribute stack at every
step of the parsing process. Assume that whenever yylex() returns a token with no attribute, yyparse() pushes a '.' to the attribute stack.
</p>
<table class="tg">
<tr>
<th class="tg-e3zv">PARSE STACK<br></th>
<th class="tg-e3zv">ATTRIBUTE STACK<br></th>
<th class="tg-e3zv">I/P BUFFER<br></th>
<th class="tg-e3zv">PARSER-ACTION EXECUTED<br></th>
</tr>
<tr>
<td class="tg-031e"></td>
<td class="tg-031e"></td>
<td class="tg-031e">2 + 3 * (4 + 5) $<br></td>
<td class="tg-031e">_</td>
</tr>
<tr>
<td class="tg-031e">DIGIT</td>
<td class="tg-031e">2</td>
<td class="tg-031e">+ 3 * ( 4 + 5 ) $<br></td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr</td>
<td class="tg-031e">2</td>
<td class="tg-031e">+ 3 * ( 4 + 5 ) $</td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr +<br></td>
<td class="tg-031e">2 <b>.</b></td>
<td class="tg-031e">3 * ( 4 + 5 ) $</td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + DIGIT<br></td>
<td class="tg-031e">2 <b>.</b> 3</td>
<td class="tg-031e">* ( 4 + 5 ) $</td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr<br></td>
<td class="tg-031e">2 <b>.</b> 3</td>
<td class="tg-031e">* ( 4 + 5) $<br></td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr + expr *<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>.</b></td>
<td class="tg-031e">( 4 + 5 ) $</td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * (<br></td>
<td class="tg-031e">2 <b>.</b> 3<b> . .</b></td>
<td class="tg-031e">4 + 5 ) $<br></td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( DIGIT<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 4</td>
<td class="tg-031e">+ 5 ) $</td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( expr<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 4 </td>
<td class="tg-031e">+ 5 ) $</td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( expr +<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 4 <b>.</b></td>
<td class="tg-031e">5 ) $</td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( expr + DIGIT<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 4 <b>.</b> 5</td>
<td class="tg-031e">) $<br></td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( expr + expr <br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 4 <b>.</b> 5</td>
<td class="tg-031e">) $<br></td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( expr<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 9</td>
<td class="tg-031e">) $<br></td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * ( expr )<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>. .</b> 9 .</td>
<td class="tg-031e">$</td>
<td class="tg-031e">SHIFT</td>
</tr>
<tr>
<td class="tg-031e">expr + expr * expr<br></td>
<td class="tg-031e">2 <b>.</b> 3 <b>.</b> 9</td>
<td class="tg-031e">$</td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr + expr <br></td>
<td class="tg-031e">2 <b>.</b> 27</td>
<td class="tg-031e">$</td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">expr</td>
<td class="tg-031e">29</td>
<td class="tg-031e">$</td>
<td class="tg-031e">REDUCE</td>
</tr>
<tr>
<td class="tg-031e">$expr</td>
<td class="tg-031e">29</td>
<td class="tg-031e">$</td>
<td class="tg-031e">ACCEPT</td>
</tr>
</table>
</article>
<article id="navyystype" class="detail">
<h2>Customising Attribute Types</h2>
<h3>YYSTYPE</h3>
<p>The attribute stack consists of attributes of tokens as well as synthesized attributes. The macro YYSTYPE denotes the type of the attribute stack. For example, in the above production, $$,$1 and $3 are all of the type YYSTYPE. YYSTYPE is <i>int </i>by default. The macro definition
</p>
<div class="syntax">
#define YYSTYPE int
</div>
<p>can be found in the y.tab.c file. YACC automatically declares yylval to be of the type YYSTYPE.
</p><p>Since by default, YACC defines YYSTYPE to be the type int, only integer valued attributes can be passed from yylex() to yyparse() using the variable <i>yylval</i> and only integer attributes can be synthesized by default. If we were to attempt to assign any other value to <i>yylval</i> or any of the attribute stack variables, a type error would be flagged on compiling y.tab.c using gcc.
</p><p>We will now see how to handle attributes of types other than integer.
</p><p>The default definition of YYSTYPE can overriden with any built-in or userdefined data type. For example if we wanted to print the prefix form of an expression:
</p>
<div class="syntax">
expr: expr OP expr { printf("%c %c %c",$2,$1,$3);}
</div>
<p>The type of YYSTYPE can be overriden manually as shown below. The following line has to be added to the declarations section of the YACC program. This may be used (not recommended) to change the type of all the attributes from int to some other type.
</p>
<div class="syntax">#define YYSTYPE char</div>
<p>In general, YACC sets the type of yylval to that defined by YYSTYPE. Hence, in this case, only character variables and constants can be assigned to yylval.</p>
<p>But in order to have multiple custom attribute values, YACC offers a useful feature called <i>%union</i> declaration to customize the type of YYSTYPE. <i>%union</i> declaration is useful when we require to have different tokens return attributes of different types using <i>yylval</i>. For example if we wanted some tokens to be of the type int and some tokens to be of the type char, the following code segment may be added to the declaration section of the YACC program.</p>
<pre>
/* YACC Auxiliary declarations*/
/* YACC Declarations*/
%union
{
char character;
int integer;
};
%token OP
%token NUMBER
%type <character> OP
%type <integer> NUMBER
%%
expr: expr OP expr { printf("%c %d %d",$<character>2,$<integer>1,$<integer>3); }
| DIGIT { $<integer>$=$<integer>1; }
;
%%
/* Auxiliary functions */
</pre>
<p>Note that the type of the attribute of each token must be mentioned when the token is being declared using the following syntax.
</p>
<div class="syntax">
%token tokenname <br/>
%type <token-type> tokenname<br/>
</div>
<p>
'token-type' must be declared under %union prior to use in the declaration of a token. If the type of a token is not explicitly mentioned, no attribute value can be assigned to the token i.e, it is assumed to be of type void.
</p>
<h3>Exercise:</h3>
<p><b>Use the %union feature for doing the following exercises</b></p>
<p> 1.Do Infix to postfix conversion where lexemes are either operators or single characters instead of numbers.
</p><p><b>Sample input:</b> a+b*c</p>
<p> <b> Sample output:</b> abc*+</p>
<p>The %union feature may be used as follows:</p>
<pre>
%union{
char c;
}
</pre>
<p> Hint: This exercise is similar to infix to postfix conversion in stage 2. Here we need to output the lexemes of a token instead of just the token names. Here each lexeme is a single character. Use yylval to pass the lexemes as the attribute values for each token.
</p>
<p> 2.Do symbolic infix to postfix conversion: </p>
<p><b>Sample input:</b> hello+my*world</p>
<p><b>Sample output:</b> hello my world * +</p>
<p> 3.Do symbolic infix to prefix conversion: </p>
<p><b>Sample input:</b> hello+my*world</p>
<p><b>Sample output:</b> + hello * my world</p>
<p>IMPORTANT NOTE: Now the attribute values to be passed are strings like “hello”. The simple way to do this is to set YYSTYPE has to be set to char* and pass strings from the lexer to the parser using <i>yylval</i>. To achieve this, we may declare:
<pre>
%union{
char *c;
}
</pre>
YACC sets the type of yylval to char*. Hence yylval can hold a pointer to a character string. Note that yytext holds the lexeme that was most recently read by yylex(). Hence, if we were to assign yytext directly to yylval, then yylval would point to this lexeme as required. When yylex() returns the token to yyparse(), this pointer gets pushed to the attribute stack corresponding to the token. However, this method fails because the location that yytext points to gets overwritten when the next lexeme is read from the input by yylex(). Hence the previously read string would be lost from that location. This corrupts the data referenced by the pointer in the attribute stack. To avoid this problem, separate memory should be allocated (using malloc) and the string in yytext should be copied (using strcpy) to this memory and yylval should be set to the newly allocated store. (Alternately the library function strdup may be used. This function allocates a new space, duplicates the string provided as input into this space and returns pointer to it.)
</p>
</article>
<article id="navexptree" class="detail">
<h3> Example </h3>
<p>Let us look at an example program that creates an expression tree using union.
</p>
<p>Sample input: 33+42*(21-16)
</p>
<p>Intermediate data structure:
<img src="img/ywlexp0.png" style="max-width=50%" >
</p>
<p>Sample output: 243 </p>
<p>To build such a data structure, we will use a user defined type tnode containing the following fields:
<br><pre>
<br><i>int</i> flag - We will set this to 0 or 1 to indicate whether the node is a leaf node storing an integer value or an internal node.
<br><i>int</i> val – To store the value in case of leaf node.
<br><i>char</i> op- To store the operator in case of internal node
<br><i>struct</i> tnode *right- To store pointer to right child.
<br><i>struct</i> tnode *left- To store pointer to left child.
</p></pre>
<p>We will create a header file by the name exptree.h for the necessary declarations. This file is to be included in the lex and yacc programs.
</p>
<p>
NOTE : Always keep declarations in a header file, function definitions in .c file and include them in your yacc file. This would keep your code clean.
</p>
<p>exprtree.h</p>
<!--<script src="js/e10adeb0a6e91fbcc02b.js"></script>-->
<pre>
typedef struct tnode{
int val; //value of the expression tree
char *op; //indicates the opertor
struct tnode *left,*right; //left and right branches
}tnode;
/*Make a leaf tnode and set the value of val field*/
struct tnode* makeLeafNode(int n);
/*Make a tnode with opertor, left and right branches set*/
struct tnode* makeOperatorNode(char c,struct tnode *l,struct tnode *r);
/*To evaluate an expression tree*/
int evaluate(struct tnode *t);
</pre>
<p> As the lexer scans the input, it should recognise two types of tokens – numbers and operators. ( In the following example we have used literal tokens to indicate each of the operators '+' , '-', '*', '/'. ) The attribute value corresponding to these tokens can be made to indicate which number/operator was read. We will pack this information in the node structure tnode mentioned above .</p>
<p>exprtree.l</p>
<!--<script src="js/e0fe5bb9afa1e35ccff3.js"></script>-->
<pre>
%{
#include <stdlib.h>
#include <stdio.h>
#include "y.tab.h"
#include "exprtree.h"
int number;
%}
%%
[0-9]+ {number = atoi(yytext); yylval.no = makeLeafNode(number); return NUM;}
"+" {return PLUS;}
"-" {return MINUS;}
"*" {return MUL;}
"/" {return DIV;}
[ \t] {}
[()] {return *yytext;}
[\n] {return END;}
. {yyerror("unknown character\n");exit(1);}
%%
int yywrap(void) {
return 1;
}
</pre>
<p>Notice that yylval is assigned a pointer to newly allocated (using malloc) node of type node. For each token that is a number (DIGIT) or operator(returned as literal tokens '+', '-' , '*', '/' ) that is recognized by the lexer, we pack the information in a node structure and a pointer to this node is passed as attribute to the parser. During reductions, the semantic actions specified in the parser will set the left and the right pointers of these nodes appropriately to complete the creation of the expression tree. We will see how these actions are executed in detail next.</p>
<p>exprtree.y</p>
<!--<script src="js/5031b7ea3f3ae455902c.js"></script>-->
<pre>
%{
#include <stdlib.h>
#include <stdio.h>
#include "exprtree.h"
#include "exprtree.c"
int yylex(void);
%}
%union{
struct tnode *no;
}
%type <no> expr NUM program END
%token NUM PLUS MINUS MUL DIV END
%left PLUS MINUS
%left MUL DIV
%%
program : expr END {
$$ = $2;
printf("Answer : %d\n",evaluate($1));
exit(1);
}
;
expr : expr PLUS expr {$$ = makeOperatorNode('+',$1,$3);}
| expr MINUS expr {$$ = makeOperatorNode('-',$1,$3);}