-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathChapter16.scala
235 lines (210 loc) · 6.51 KB
/
Chapter16.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import scala.collection.mutable
import scala.xml._
import scala.xml.dtd.DocType
import scala.xml.parsing.XhtmlParser
import scala.xml.transform.{RewriteRule, RuleTransformer}
object Chapter16 {
/**
* Task 1:
*
* What is `<fred/>(0)`? `<fred/>(0)(0)`? Why?
*
* Solution:
*
* Since `Elem` extends `Node` and `Node` extends `NodeSeq`, which is in turn extends
* `Seq[Node]`, in other words an XML element is represented as node sequence of one item.
* So, expression `elem(0)` will always return the same `elem`:
* {{{
* elem(0) == elem
* elem(0)(0) == elem
* elem(0)(0)(0) == elem
* ...
* }}}
*/
/**
* Task 2:
*
* What is the result of
* {{{
* <ul>
* <li>Opening bracket: [</li>
* <li>Closing bracket: ]</li>
* <li>Opening brace: {</li>
* <li>Closing brace: }</li>
* </ul>
* }}}
* How do you fix it?
*
* Solution:
*
* The given snippet produces an error in the third `li` element: "No closing Tag", because
* the brace `{` symbol is interpreted by Scala compiler as start of Scala expression.
* To fix it we can escape braces by using '{{' and '}}'.
*
* @see Chapter16Spec.scala
*/
/**
* Task 3:
*
* Contrast
* {{{
* <li>Fred</li> match { case <li>{Text(t)}</li> => t }
* }}}
* and
* {{{
* <li>{"Fred"}</li> match { case <li>{Text(t)}</li> => t }
* }}}
* Why do they act differently?
*
* Solution:
*
* Since embedded strings, like `{"Fred"}` don't get turned into `Text` nodes we cannot
* properly pattern match using `Text` node. That's why our second expression failed.
* To fix it we should either rewrite our patten match expression or we can wrap embedded strings
* into `Text` node:
* {{{
* <li>{Text("Fred")}</li> match { case <li>{Text(t)}</li> => t }
* }}}
*/
/**
* Task 4:
*
* Read an XHTML file and print all `img` elements that don't have an `alt` attribute.
*/
def printImgWithoutAlt(file: String): Unit = {
val root = XML.load(getClass.getResourceAsStream(file))
for (n <- root \\ "img" if n.attribute("alt").isEmpty) {
println(n)
}
}
/**
* Task 5:
*
* Print the names of all images in an XHTML file. That is, print all `src` attribute values
* inside `img` elements.
*/
def printAllImg(file: String): Unit = {
val root = XML.load(getClass.getResourceAsStream(file))
for (n <- root \\ "img";
src <- n.attribute("src")) {
println(src.text)
}
}
/**
* Task 6:
*
* Read an XHTML file and print a table of all hyperlinks in the file, together with their URLs.
* That is, print the child text and the `href` attribute of each a element.
*/
def printAllHyperlinks(file: String): Unit = {
val root = XML.load(getClass.getResourceAsStream(file))
var maxTextLen = 0
var maxHrefLen = 0
// extract hyperlinks
val links = mutable.Buffer[(String, String)]()
for (n <- root \\ "a";
hrefAttr <- n.attribute("href")) {
// extract text from a tag
val sb = new StringBuilder()
for (c <- n.child) sb ++= (c match {
case Text(item) => item.trim
case item => item.toString()
})
val text = sb.toString()
val href = hrefAttr.text
maxTextLen = if (maxTextLen < text.length) text.length else maxTextLen
maxHrefLen = if (maxHrefLen < href.length) href.length else maxHrefLen
links += Tuple2(text, href)
}
val headerAndFooter: String = {
val sb = new StringBuilder("+")
for (_ <- 0 until maxTextLen) sb += '-'
sb ++= "--+--"
for (_ <- 0 until maxHrefLen) sb += '-'
sb += '+'
sb.toString()
}
// print extracted hyperlinks as table
println(headerAndFooter)
for ((text, href) <- links) {
print("| ")
print(text)
for (_ <- text.length until maxTextLen) print(' ')
print(" | ")
print(href)
for (_ <- href.length until maxHrefLen) print(' ')
println(" |")
}
println(headerAndFooter)
}
/**
* Task 7:
*
* Write a function that has a parameter of type `Map[String, String]` and returns a `dl` element
* with a `dt` for each key and `dd` for each value. For example,
* {{{
* Map("A" -> "1", "B" -> "2")
* }}}
* should yield `<dl><dt>A</dt><dd>1</dd><dt>B</dt><dd>2</dd></dl>`.
*/
def mapToXml(map: Map[String, String]): Elem = {
<dl>{
for ((key, value) <- map) yield {
<dt>{key}</dt>
<dd>{value}</dd>
}
}</dl>
}
/**
* Task 8:
*
* Write a function that takes a `dl` element and turns it into a `Map[String, String]`.
* This function should be the inverse of the function in the preceding exercise, provided
* all `dt` children are distinct.
*/
def xmlToMap(elem: Elem): Map[String, String] = elem match {
case <dl>{children @ _*}</dl> =>
val map = new mutable.HashMap[String, String]
var currKey = ""
for (child <- children) child match {
case <dt>{key}</dt> => currKey = key.text.trim
case <dd>{value}</dd> => map(currKey) = value.text.trim
}
map.toMap
case _ => Map.empty
}
/**
* Task 9:
*
* Transform an XHTML document by adding an `alt="TODO"` attribute to all img elements without
* an `alt` attribute, preserving everything else.
*/
def transformXhtml(root: Node): Node = {
val rule = new RewriteRule {
override def transform(n: Node) = n match {
case e @ <img/> if e.attribute("alt").isEmpty || e.attributes("alt").text.isEmpty =>
e.asInstanceOf[Elem] % Attribute(null, "alt", "TODO", Null)
case _ => n
}
}
new RuleTransformer(rule).transform(root).head
}
/**
* Task 10:
*
* Write a function that reads an XHTML document, carries out the transformation of
* the preceding exercise, and saves the result.
* Be sure to preserve the DTD and any CDATA sections.
*/
def transformXhtmlFile(inFile: String, outFile: String): Unit = {
val doc = new XhtmlParser(scala.io.Source.fromInputStream(
getClass.getResourceAsStream(inFile))).initialize.document()
val xhtml = transformXhtml(doc.docElem)
val dtd = doc.dtd
XML.save(outFile, xhtml, enc = "UTF-8",
doctype = DocType(xhtml.label, dtd.externalID, dtd.decls))
}
}
object Chapter16PrintImgWithoutAltApp extends Utils.FileApp(Chapter16.printImgWithoutAlt)
object Chapter16PrintAllImgApp extends Utils.FileApp(Chapter16.printAllImg)
object Chapter16PrintAllHyperlinksApp extends Utils.FileApp(Chapter16.printAllHyperlinks)