Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tag: heroku-devcent…
Havoc Pennington September 29, 2011
file 269 lines (239 sloc) 9.431 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
package com.typesafe.webwords.web

import scala.collection.mutable
import scala.xml
import scala.xml.Attribute
import akka.actor.{ Index => _, _ }
import akka.actor.Actor.actorOf
import akka.http._
import com.typesafe.webwords.common._
import java.net.URL
import java.net.URI
import java.net.MalformedURLException
import java.net.URISyntaxException
import javax.servlet.http.HttpServletResponse

// this is just here for testing a simple case.
class HelloActor extends Actor {
    override def receive = {
        case get: Get =>
            get OK "hello!"
        case request: RequestMethod =>
            request NotAllowed "unsupported request"
    }
}

// we send any paths we don't recognize to this one.
class Custom404Actor extends Actor {
    override def receive = {
        case request: RequestMethod =>
            // you could do something nicer ;-) this is just an example
            request NotFound "Nothing here!"
    }
}

// this actor handles the main page.
class WordsActor(config: WebWordsConfig) extends Actor {
    private val client = actorOf(new ClientActor(config))

    case class Finish(request: RequestMethod, url: String, index: Option[Index],
        cacheHit: Boolean, startTime: Long)

    private def form(url: String, skipCache: Boolean, badUrl: Boolean = false) = {
        <div>
            <form action="/words" method="get">
                <fieldset>
                    <div>
                        <label for="url">Site</label>
                        <input type="text" id="url" name="url" value={ url } style="min-width: 300px;"></input>
                        {
                            if (badUrl) {
                                <div style="font-color: red;">Invalid or missing URL</div>
                            }
                        }
                    </div>
                    <div>
                        {
                            <input type="checkbox" id="skipCache" name="skipCache"></input> %
                                (if (skipCache) Attribute("checked", xml.Text(""), xml.Null) else xml.Null)
                        }
                        <label for="skipCache">Skip cache</label>
                    </div>
                    <div>
                        <button>Spider &amp; Index</button>
                    </div>
                </fieldset>
            </form>
        </div>
    }

    private def results(url: String, index: Index, cacheHit: Boolean, elapsed: Long) = {
        // world's ugliest word cloud!
        def countToStyle(count: Int) = {
            val maxCount = (index.wordCounts.headOption map { _._2 }).getOrElse(1)
            val font = 6 + ((count.doubleValue / maxCount.doubleValue) * 24).intValue
            Attribute("style", xml.Text("font-size: " + font + "pt;"), xml.Null)
        }

        <div>
            <p>
                <a href={ url }>{ url }</a>
                spidered and indexed.
            </p>
            <p>{ elapsed }ms elapsed.</p>
            <p>{ index.links.size } links scraped.</p>
            {
                if (cacheHit)
                    <p>Results were retrieved from cache.</p>
                else
                    <p>Results newly-spidered (not from cache).</p>
            }
        </div>
        <h3>Word Counts</h3>
        <div style="max-width: 600px; margin-left: 100px; margin-top: 20px; margin-bottom: 20px;">
            {
                val nodes = xml.NodeSeq.newBuilder
                for ((word, count) <- index.wordCounts) {
                    nodes += <span title={ count.toString }>{ word }</span> % countToStyle(count)
                    nodes += xml.Text(" ")
                }
                nodes.result
            }
        </div>
        <div style="font-size: small">(hover to see counts)</div>
        <h3>Links Found</h3>
        <div style="margin-left: 50px;">
            <ol>
                {
                    val nodes = xml.NodeSeq.newBuilder
                    for ((text, url) <- index.links)
                        nodes += <li><a href={ url }>{ text }</a></li>
                    nodes.result
                }
            </ol>
        </div>
    }

    def wordsPage(formNode: xml.NodeSeq, resultsNode: xml.NodeSeq) = {
        <html>
            <head>
                <title>Web Words!</title>
            </head>
            <body style="max-width: 800px;">
                <div>
                    <div>
                        { formNode }
                    </div>
                    {
                        if (resultsNode.nonEmpty)
                            <div>
                                { resultsNode }
                            </div>
                    }
                </div>
            </body>
        </html>
    }

    private def completeWithHtml(request: RequestMethod, html: xml.NodeSeq) = {
        request.response.setContentType("text/html")
        request.response.setCharacterEncoding("utf-8")
        request.OK("<!DOCTYPE html>\n" + html)
    }

    private def handleFinish(finish: Finish) = {
        val elapsed = System.currentTimeMillis - finish.startTime
        finish match {
            case Finish(request, url, Some(index), cacheHit, startTime) =>
                val html = wordsPage(form(url, skipCache = false), results(url, index, cacheHit, elapsed))

                completeWithHtml(request, html)

            case Finish(request, url, None, cacheHit, startTime) =>
                request.OK("Failed to index url in " + elapsed + "ms (try reloading)")
        }
    }

    private def parseURL(s: String): Option[URL] = {
        val maybe = try {
            new URI(s) // we want it to be a valid URI also
            Some(new URL(s))
        } catch {
            case e: MalformedURLException => None
            case e: URISyntaxException => None
        }
        maybe.orElse({
            if (s.startsWith("http"))
                None
            else
                parseURL("http://" + s)
        })
    }

    private def handleGet(get: RequestMethod) = {
        val skipCacheStr = Option(get.request.getParameter("skipCache")).getOrElse("false")
        val skipCache = Seq("true", "on", "checked").contains(skipCacheStr.toLowerCase)
        val urlStr = Option(get.request.getParameter("url"))
        val url = parseURL(urlStr.getOrElse(""))

        if (url.isDefined) {
            val startTime = System.currentTimeMillis
            val futureGotIndex = client ? GetIndex(url.get.toExternalForm, skipCache)

            futureGotIndex foreach {
                // now we're in another thread, so we just send ourselves
                // a message, don't touch actor state
                case GotIndex(url, indexOption, cacheHit) =>
                    self ! Finish(get, url, indexOption, cacheHit, startTime)
            }

            // we have to worry about timing out also.
            futureGotIndex onTimeout { _ =>
                // again in another thread - most methods on futures are in another thread!
                self ! Finish(get, url.get.toExternalForm, index = None, cacheHit = false, startTime = startTime)
            }
        } else {
            val html = wordsPage(form(urlStr.getOrElse(""), skipCache, badUrl = urlStr.isDefined),
                resultsNode = xml.NodeSeq.Empty)

            completeWithHtml(get, html)
        }
    }

    override def receive = {
        case get: Get =>
            handleGet(get)
        case request: RequestMethod =>
            request NotAllowed "unsupported request"
        case finish: Finish =>
            handleFinish(finish)
    }

    override def preStart = {
        client.start
    }

    override def postStop = {
        client.stop
    }
}

// This actor simply delegates to the real handlers.
// There are extra libraries such as Spray that make this less typing:
// https://github.com/spray/spray/wiki
// but for this example, showing how you would do it manually.
class WebBootstrap(rootEndpoint: ActorRef, config: WebWordsConfig) extends Actor with Endpoint {
    private val handlers = Map(
        "/hello" -> actorOf[HelloActor],
        "/words" -> actorOf(new WordsActor(config)))

    private val custom404 = actorOf[Custom404Actor]

    // Caution: this callback does not run in the actor thread,
    // so has to be thread-safe. We keep it simple and only touch
    // immutable values so there's nothing to worry about.
    private val handlerFactory: PartialFunction[String, ActorRef] = {
        case path if handlers.contains(path) =>
            handlers(path)
        case "/" =>
            handlers("/words")
        case path: String =>
            custom404
    }

    override def receive = handleHttpRequest

    override def preStart = {
        // start up our handlers
        handlers.values foreach { _.start }
        custom404.start

        // register ourselves with the akka-http RootEndpoint actor.
        // In Akka 2.0, Endpoint.Attach takes a partial function,
        // in 1.2 it still takes two separate functions.
        // So in 2.0 this can just be Endpoint.Attach(handlerFactory)
        rootEndpoint ! Endpoint.Attach({
            path =>
                handlerFactory.isDefinedAt(path)
        }, {
            path =>
                handlerFactory(path)
        })
    }

    override def postStop = {
        handlers.values foreach { _.stop }
        custom404.stop
    }
}
Something went wrong with that request. Please try again.