Skip to content
Browse files

Initial commit

  • Loading branch information...
0 parents commit 23ff84ac066758972ca0054c6e286f4b96393d60 Markus Weimer committed Feb 23, 2012
Showing with 2,061 additions and 0 deletions.
  1. +157 −0 LICENSE.txt
  2. +10 −0 NOTICE.txt
  3. +99 −0 pom.xml
  4. +26 −0 scalops-dsl.iml
  5. +64 −0 src/main/scala/com/yahoo/Scalops.scala
  6. +78 −0 src/main/scala/com/yahoo/scalops/dsl/Queryable.scala
  7. +30 −0 src/main/scala/com/yahoo/scalops/dsl/actions/Action.scala
  8. +18 −0 src/main/scala/com/yahoo/scalops/dsl/actions/CollectAction.scala
  9. +18 −0 src/main/scala/com/yahoo/scalops/dsl/actions/CountAction.scala
  10. +20 −0 src/main/scala/com/yahoo/scalops/dsl/actions/IsEmptyAction.scala
  11. +18 −0 src/main/scala/com/yahoo/scalops/dsl/actions/ReduceAction.scala
  12. +45 −0 src/main/scala/com/yahoo/scalops/dsl/oper/Scaloop.scala
  13. +65 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/CoGroupTransformation.scala
  14. +20 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/EmptyTableTransformation.scala
  15. +27 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/FilterTransformation.scala
  16. +18 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/GroupTransformation.scala
  17. +44 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/JoinBranch.scala
  18. +82 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/JoinTransformation.scala
  19. +20 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/LoadTransformation.scala
  20. +24 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/MapTransformation.scala
  21. +27 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/OrderTransformation.scala
  22. +18 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/TableTransformation.scala
  23. +16 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/Transformation.scala
  24. +18 −0 src/main/scala/com/yahoo/scalops/dsl/transformations/UpdateTransformation.scala
  25. +36 −0 src/main/scala/com/yahoo/scalops/dsl/types/BooleanType.scala
  26. +17 −0 src/main/scala/com/yahoo/scalops/dsl/types/CompositeType.scala
  27. +28 −0 src/main/scala/com/yahoo/scalops/dsl/types/ListType.scala
  28. +55 −0 src/main/scala/com/yahoo/scalops/dsl/types/NumericType.scala
  29. +26 −0 src/main/scala/com/yahoo/scalops/dsl/types/NumericTypeArithmetics.scala
  30. +20 −0 src/main/scala/com/yahoo/scalops/dsl/types/ObjectType.scala
  31. +48 −0 src/main/scala/com/yahoo/scalops/dsl/types/Operator.scala
  32. +20 −0 src/main/scala/com/yahoo/scalops/dsl/types/StringType.scala
  33. +36 −0 src/main/scala/com/yahoo/scalops/dsl/types/Type.scala
  34. +58 −0 src/main/scala/com/yahoo/scalops/dsl/types/VectorType.scala
  35. +62 −0 src/main/scala/com/yahoo/scalops/examples/App.scala
  36. +89 −0 src/main/scala/com/yahoo/scalops/examples/BatchGradientDescent.scala
  37. +84 −0 src/main/scala/com/yahoo/scalops/examples/DenseVector.scala
  38. +28 −0 src/main/scala/com/yahoo/scalops/examples/GradientFunctions.scala
  39. +69 −0 src/main/scala/com/yahoo/scalops/examples/KMeans.scala
  40. +24 −0 src/main/scala/com/yahoo/scalops/examples/ML.scala
  41. +25 −0 src/main/scala/com/yahoo/scalops/examples/MLTypes.scala
  42. +140 −0 src/main/scala/com/yahoo/scalops/examples/MatrixFactorization.scala
  43. +41 −0 src/main/scala/com/yahoo/scalops/examples/PageRank.scala
  44. +76 −0 src/main/scala/com/yahoo/scalops/examples/ParallelStochasticGradientDescent.scala
  45. +47 −0 src/main/scala/com/yahoo/scalops/examples/Pregel.scala
  46. +70 −0 src/main/scala/com/yahoo/scalops/examples/Vector.scala
157 LICENSE.txt
@@ -0,0 +1,157 @@
+Apache License Version 2.0, January 2004 http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and
+distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the
+copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other
+entities that control, are controlled by, or are under common control with
+that entity. For the purposes of this definition, "control" means (i) the
+power, direct or indirect, to cause the direction or management of such
+entity, whether by contract or otherwise, or (ii) ownership of fifty percent
+(50%) or more of the outstanding shares, or (iii) beneficial ownership of such
+entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising
+permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications,
+including but not limited to software source code, documentation source, and
+configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or
+translation of a Source form, including but not limited to compiled object
+code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form,
+made available under the License, as indicated by a copyright notice that is
+included in or attached to the work (an example is provided in the Appendix
+below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that
+is based on (or derived from) the Work and for which the editorial revisions,
+annotations, elaborations, or other modifications represent, as a whole, an
+original work of authorship. For the purposes of this License, Derivative
+Works shall not include works that remain separable from, or merely link (or
+bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original
+version of the Work and any modifications or additions to that Work or
+Derivative Works thereof, that is intentionally submitted to Licensor for
+inclusion in the Work by the copyright owner or by an individual or Legal
+Entity authorized to submit on behalf of the copyright owner. For the purposes
+of this definition, "submitted" means any form of electronic, verbal, or
+written communication sent to the Licensor or its representatives, including
+but not limited to communication on electronic mailing lists, source code
+control systems, and issue tracking systems that are managed by, or on behalf
+of, the Licensor for the purpose of discussing and improving the Work, but
+excluding communication that is conspicuously marked or otherwise designated
+in writing by the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
+of whom a Contribution has been received by Licensor and subsequently
+incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable copyright license to
+reproduce, prepare Derivative Works of, publicly display, publicly perform,
+sublicense, and distribute the Work and such Derivative Works in Source or
+Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this
+License, each Contributor hereby grants to You a perpetual, worldwide,
+non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this
+section) patent license to make, have made, use, offer to sell, sell, import,
+and otherwise transfer the Work, where such license applies only to those
+patent claims licensable by such Contributor that are necessarily infringed by
+their Contribution(s) alone or by combination of their Contribution(s) with
+the Work to which such Contribution(s) was submitted. If You institute patent
+litigation against any entity (including a cross-claim or counterclaim in a
+lawsuit) alleging that the Work or a Contribution incorporated within the Work
+constitutes direct or contributory patent infringement, then any patent
+licenses granted to You under this License for that Work shall terminate as of
+the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or
+Derivative Works thereof in any medium, with or without modifications, and in
+Source or Object form, provided that You meet the following conditions:
+
+(a) You must give any other recipients of the Work or Derivative Works a copy
+of this License; and
+
+(b) You must cause any modified files to carry prominent notices stating that
+You changed the files; and
+
+(c) You must retain, in the Source form of any Derivative Works that You
+distribute, all copyright, patent, trademark, and attribution notices from the
+Source form of the Work, excluding those notices that do not pertain to any
+part of the Derivative Works; and
+
+(d) If the Work includes a "NOTICE" text file as part of its distribution,
+then any Derivative Works that You distribute must include a readable copy of
+the attribution notices contained within such NOTICE file, excluding those
+notices that do not pertain to any part of the Derivative Works, in at least
+one of the following places: within a NOTICE text file distributed as part of
+the Derivative Works; within the Source form or documentation, if provided
+along with the Derivative Works; or, within a display generated by the
+Derivative Works, if and wherever such third-party notices normally appear.
+The contents of the NOTICE file are for informational purposes only and do not
+modify the License. You may add Your own attribution notices within Derivative
+Works that You distribute, alongside or as an addendum to the NOTICE text from
+the Work, provided that such additional attribution notices cannot be
+construed as modifying the License.
+
+You may add Your own copyright statement to Your modifications and may provide
+additional or different license terms and conditions for use, reproduction, or
+distribution of Your modifications, or for any such Derivative Works as a
+whole, provided Your use, reproduction, and distribution of the Work otherwise
+complies with the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise, any
+Contribution intentionally submitted for inclusion in the Work by You to the
+Licensor shall be under the terms and conditions of this License, without any
+additional terms or conditions. Notwithstanding the above, nothing herein
+shall supersede or modify the terms of any separate license agreement you may
+have executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names,
+trademarks, service marks, or product names of the Licensor, except as
+required for reasonable and customary use in describing the origin of the Work
+and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in
+writing, Licensor provides the Work (and each Contributor provides its
+Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied, including, without limitation, any warranties
+or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+PARTICULAR PURPOSE. You are solely responsible for determining the
+appropriateness of using or redistributing the Work and assume any risks
+associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in
+tort (including negligence), contract, or otherwise, unless required by
+applicable law (such as deliberate and grossly negligent acts) or agreed to in
+writing, shall any Contributor be liable to You for damages, including any
+direct, indirect, special, incidental, or consequential damages of any
+character arising as a result of this License or out of the use or inability
+to use the Work (including but not limited to damages for loss of goodwill,
+work stoppage, computer failure or malfunction, or any and all other
+commercial damages or losses), even if such Contributor has been advised of
+the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work
+or Derivative Works thereof, You may choose to offer, and charge a fee for,
+acceptance of support, warranty, indemnity, or other liability obligations
+and/or rights consistent with this License. However, in accepting such
+obligations, You may act only on Your own behalf and on Your sole
+responsibility, not on behalf of any other Contributor, and only if You agree
+to indemnify, defend, and hold each Contributor harmless for any liability
+incurred by, or claims asserted against, such Contributor by reason of your
+accepting any such warranty or additional liability.
10 NOTICE.txt
@@ -0,0 +1,10 @@
+=======================================================================
+NOTICE file for use with, and corresponding to Section 4 of,
+the Apache License, Version 2.0,
+in this case for the ScalOps project. ==
+=========================================================================
+
+ This product includes software developed by
+ Yahoo! Inc. (www.yahoo.com)
+ Copyright (c) 2010 Yahoo! Inc. All rights reserved.
+
99 pom.xml
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ ~ Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ ~ Apache License, Version 2.0 (the "License"); you may not use this file except
+ ~ in compliance with the License. You may obtain a copy of the License at
+ ~ http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ ~ or agreed to in writing, software distributed under the License is distributed
+ ~ on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ ~ express or implied. See the License for the specific language governing
+ ~ permissions and limitations under the License. See accompanying LICENSE file.
+ -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>com.yahoo</groupId>
+ <artifactId>scalops-dsl</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ <packaging>jar</packaging>
+ <name>scalops-dsl</name>
+ <description>The ScalOps Domain Specific Language (DSL).</description>
+ <prerequisites>
+ <maven>2.2.1</maven>
+ </prerequisites>
+
+ <build>
+ <sourceDirectory>src/main/scala</sourceDirectory>
+ <testSourceDirectory>src/test/scala</testSourceDirectory>
+ <plugins>
+ <plugin>
+ <groupId>org.scala-tools</groupId>
+ <artifactId>maven-scala-plugin</artifactId>
+ <version>2.15.2</version>
+ <configuration>
+ <args>
+ <arg>-optimise</arg>
+ </args>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>compile</goal>
+ <goal>testCompile</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.5</version>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ </configuration>
+ </plugin>
+
+ <!--
+ This packages the scala runtime with the jar.
+ We might want a better solution than this
+ -->
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <configuration>
+ <descriptorRefs>
+ <descriptorRef>jar-with-dependencies</descriptorRef>
+ </descriptorRefs>
+ </configuration>
+ </plugin>
+
+ </plugins>
+ </build>
+
+
+ <repositories>
+ <repository>
+ <id>scala-tools.org</id>
+ <name>Scala-tools Maven2 Repository</name>
+ <url>http://scala-tools.org/repo-releases</url>
+ </repository>
+ </repositories>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>scala-tools.org</id>
+ <name>Scala-tools Maven2 Repository</name>
+ <url>http://scala-tools.org/repo-releases</url>
+ </pluginRepository>
+ </pluginRepositories>
+
+
+ <dependencies>
+ <dependency>
+ <groupId>org.scala-lang</groupId>
+ <artifactId>scala-library</artifactId>
+ <version>2.9.1</version>
+ </dependency>
+ </dependencies>
+</project>
26 scalops-dsl.iml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
+ <component name="FacetManager">
+ <facet type="scala" name="Scala">
+ <configuration>
+ <option name="compilerLibraryLevel" value="Project" />
+ <option name="compilerLibraryName" value="Maven: org.scala-lang:scala-compiler-bundle:2.9.1" />
+ <option name="optimiseBytecode" value="true" />
+ <option name="vmOptions" value="" />
+ </configuration>
+ </facet>
+ </component>
+ <component name="NewModuleRootManager" inherit-compiler-output="false">
+ <output url="file://$MODULE_DIR$/target/classes" />
+ <output-test url="file://$MODULE_DIR$/target/test-classes" />
+ <content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/src/test/scala" isTestSource="true" />
+ <excludeFolder url="file://$MODULE_DIR$/target" />
+ </content>
+ <orderEntry type="inheritedJdk" />
+ <orderEntry type="sourceFolder" forTests="false" />
+ <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.9.1" level="project" />
+ </component>
+</module>
+
64 src/main/scala/com/yahoo/Scalops.scala
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo
+
+import com.yahoo.scalops.dsl.types.NumericTypeImplicits
+import com.yahoo.scalops.dsl.transformations.MapTransformationImplicits
+import com.yahoo.scalops.dsl.transformations.FilterTransformationImplicits
+import com.yahoo.scalops.dsl.transformations.LoadTransformationImplicits
+import com.yahoo.scalops.dsl.transformations.JoinTransformationImplicits
+import com.yahoo.scalops.dsl.transformations.OrderTransformationImplicits
+import com.yahoo.scalops.dsl.transformations.CoGroupTransformationImplicits
+import com.yahoo.scalops.dsl.types.NumericTypeArithmetics
+import com.yahoo.scalops.dsl.actions.ActionImplicits
+import com.yahoo.scalops.dsl.types.BooleanImplicits
+import com.yahoo.scalops.dsl.types.VectorImplicits
+import com.yahoo.scalops.dsl.types.ListTypeImplicits
+import com.yahoo.scalops.dsl.types.StringTypeImplicits
+import com.yahoo.scalops.dsl.transformations.EmptyTableTransformationImplicits
+import com.yahoo.scalops.dsl.oper.ScaloopImplicits
+
+object Scalops extends NumericTypeImplicits
+with NumericTypeArithmetics
+with BooleanImplicits
+with VectorImplicits
+with ListTypeImplicits
+with StringTypeImplicits
+with ActionImplicits
+with EmptyTableTransformationImplicits
+with LoadTransformationImplicits
+with MapTransformationImplicits
+with FilterTransformationImplicits
+with OrderTransformationImplicits
+with ScaloopImplicits
+with JoinTransformationImplicits
+with CoGroupTransformationImplicits {
+
+ // Types
+ type Queryable[T] = com.yahoo.scalops.dsl.Queryable[T]
+ type Type[T] = com.yahoo.scalops.dsl.types.Type[T]
+ type NumericType[T] = com.yahoo.scalops.dsl.types.NumericType[T]
+ type DoubleType = NumericType[Double]
+ type FloatType = NumericType[Float]
+ type IntType = NumericType[Int]
+ type LongType = NumericType[Long]
+ type StringType = com.yahoo.scalops.dsl.types.StringType
+ type VectorType = com.yahoo.scalops.dsl.types.VectorType
+ type ListType[T] = com.yahoo.scalops.dsl.types.ListType[T]
+ type CompositeType = com.yahoo.scalops.dsl.types.CompositeType
+ type BooleanType = com.yahoo.scalops.dsl.types.BooleanType
+
+ // Flatten
+ case class FunctionFlatten[T](t: Traversable[T]) extends Throwable
+
+ def flatten[T](t: Traversable[T]): T = throw new FunctionFlatten(t)
+}
78 src/main/scala/com/yahoo/scalops/dsl/Queryable.scala
@@ -0,0 +1,78 @@
+package com.yahoo.scalops.dsl
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+import actions.{IsEmptyAction, ReduceAction, CollectAction, CountAction}
+import com.yahoo.scalops.dsl.transformations.MapTransformation
+import com.yahoo.scalops.dsl.transformations.FilterTransformation
+import com.yahoo.scalops.dsl.transformations.UpdateTransformation
+import com.yahoo.scalops.dsl.transformations.GroupTransformation
+import com.yahoo.scalops.dsl.transformations.OrderTransformation
+import com.yahoo.scalops.dsl.transformations.TableTransformation
+import com.yahoo.scalops.dsl.types.Type
+import com.yahoo.scalops.dsl.types.ListType
+
+trait Queryable[T] {
+
+ // Transformations
+ /**
+ * Materializes the Query into a table
+ */
+ def toTable = new TableTransformation(this)
+
+ /**
+ * Applies the given function to each element of the Queryable, resulting in a new Queryable of the results
+ */
+ def map[T2](f: T => T2): Queryable[T2] = new MapTransformation(this, f)
+
+ /**
+ * Filter the Queryable by the given predicate. Those elements where predicate evaluates to true will be preserved
+ */
+ def filter(predicate: T => Boolean): Queryable[T] = new FilterTransformation(this, predicate)
+
+ /**
+ * Overwrite the queryable with the elements found in the other queryable
+ */
+ def :=(that: Queryable[T]): Queryable[T] = new UpdateTransformation[T](this, that)
+
+ /**
+ * Group the records in the Queryable using the given grouping predicate
+ */
+ def group[K](predicate: T => K): Queryable[(K, Traversable[T])] = new GroupTransformation[K, T](this, predicate)
+
+ def sort[R <% Ordered[R]](predicate: T => R): Queryable[T] = new OrderTransformation[T, R](this, predicate)
+
+ // Actions
+
+ /**
+ * Count the number of elements in the Queryable
+ */
+ def count() = new CountAction(this)
+
+ def size = count()
+
+ /**
+ * Collect the elements of the Queryable into a local collection.
+ */
+ def collect(): ListType[T] = new CollectAction[T](this)
+
+ /**
+ * Apply the given function to reduce the Queryable to a scalar
+ */
+ def reduce(f: (T, T) => T): Type[T] = new ReduceAction(this, f)
+
+ /**
+ * Determine whether the Queryable is empty.
+ */
+ def isEmpty = new IsEmptyAction(this)
+
+}
30 src/main/scala/com/yahoo/scalops/dsl/actions/Action.scala
@@ -0,0 +1,30 @@
+package com.yahoo.scalops.dsl.actions
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+import com.yahoo.scalops.dsl.Queryable
+import com.yahoo.scalops.dsl.types.Type
+import com.yahoo.scalops.dsl.types.BooleanType
+import com.yahoo.scalops.dsl.types.ListType
+
+trait Action[T] extends Queryable[T] with Type[T]
+
+/**
+ * Mixed into Scalops._
+ */
+trait ActionImplicits {
+
+ implicit def actionToBoolean(t: Action[Boolean]): BooleanType = BooleanType.typeToBooleanType(t)
+
+ implicit def actionToList[T](t: Action[List[T]]): ListType[T] = ListType.typeToListType[T](t)
+
+}
18 src/main/scala/com/yahoo/scalops/dsl/actions/CollectAction.scala
@@ -0,0 +1,18 @@
+package com.yahoo.scalops.dsl.actions
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+import com.yahoo.scalops.dsl.Queryable
+
+class CollectAction[T](input: Queryable[T]) extends Action[List[T]] {
+ def name = input.toString() + ".collect()"
+}
18 src/main/scala/com/yahoo/scalops/dsl/actions/CountAction.scala
@@ -0,0 +1,18 @@
+package com.yahoo.scalops.dsl.actions
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class CountAction(input: Queryable[_]) extends Action[Long] {
+ override def toString = input.toString() + ".count()"
+}
20 src/main/scala/com/yahoo/scalops/dsl/actions/IsEmptyAction.scala
@@ -0,0 +1,20 @@
+package com.yahoo.scalops.dsl.actions
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class IsEmptyAction(val q: Queryable[_]) extends Action[Boolean] {
+
+ override def toString = q.toString() + ".isEmpty()"
+
+}
18 src/main/scala/com/yahoo/scalops/dsl/actions/ReduceAction.scala
@@ -0,0 +1,18 @@
+package com.yahoo.scalops.dsl.actions
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class ReduceAction[T](input: Queryable[T], f: (T, T) => T) extends Action[T] {
+ def name = input.toString() + ".reduce(" + f.toString() + ")"
+}
45 src/main/scala/com/yahoo/scalops/dsl/oper/Scaloop.scala
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.oper
+
+import com.yahoo.scalops.dsl.types.Type
+
+/**
+ * Looping operations for scalop
+ *
+ * @author Markus Weimer <weimer@yahoo-inc.com>
+ */
+
+/**
+ * A loop with a while condition
+ */
+case class ConditionedLoopOperator[T <: Type[_]](env: T, whileCondition: T => Type[Boolean], body: T => T) {
+ override def toString = "loop(" + env + ", " + whileCondition + "){\n\t" + body(env) + "\n}"
+}
+
+/**
+ * A loop over a range
+ */
+case class RangeLoopOperator[T <: Type[_]](env: T, range: Range, body: T => T) {
+ override def toString = "loop(" + env + "," + range + "){\n\t" + body(env).getClass().toString() + "\n}"
+}
+
+trait ScaloopImplicits {
+
+ implicit def scaloopConditionOperatorToEnvironment[T <: Type[_]](so: ConditionedLoopOperator[T]): T = so.env
+
+ def loop[T <: Type[_]](env: T, whileCondition: T => Type[Boolean])(body: T => T) = new ConditionedLoopOperator[T](env, whileCondition, body)
+
+ implicit def scaloopRangeOperatorToEnvironment[T <: Type[_]](so: RangeLoopOperator[T]): T = so.env
+
+ def loop[T <: Type[_]](env: T, range: Range)(body: T => T) = new RangeLoopOperator[T](env, range, body)
+}
65 src/main/scala/com/yahoo/scalops/dsl/transformations/CoGroupTransformation.scala
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+case class CoGroupTransformation[K, R](val input: JoinBranch[K, _]*) extends Transformation[R] {
+ override def toString = input.mkString("COGROUP ", ", ", "")
+
+ private[this] var parallelism: Option[scala.Int] = None
+
+ def parallel(level: scala.Int) = {
+ this.parallelism = Some(level)
+ this
+ }
+
+}
+
+trait CoGroupTransformationImplicits {
+
+ def cogroup[K, B1, B2](b1: JoinBranch[K, B1], b2: JoinBranch[K, B2]) = CoGroupTransformation[K, (K, Traversable[B1], Traversable[B2])](b1, b2)
+
+ def cogroup[K, B1, B2, B3](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3]) = CoGroupTransformation[K, (K, Traversable[B1], Traversable[B2], Traversable[B3])](b1, b2, b3)
+
+ def cogroup[K, B1, B2, B3, B4](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4]) = CoGroupTransformation[K, (K, Traversable[B1], Traversable[B2], Traversable[B3], Traversable[B4])](b1, b2, b3, b4)
+
+ def cogroup[K, B1, B2, B3, B4, B5](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4],
+ b5: JoinBranch[K, B5]) = CoGroupTransformation[K, (K, Traversable[B1], Traversable[B2], Traversable[B3], Traversable[B4], Traversable[B5])](b1, b2, b3, b4, b5)
+
+ def cogroup[K, B1, B2, B3, B4, B5, B6](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4],
+ b5: JoinBranch[K, B5],
+ b6: JoinBranch[K, B6]) = CoGroupTransformation[K, (K, Traversable[B1], Traversable[B2], Traversable[B3], Traversable[B4], Traversable[B5], Traversable[B6])](b1, b2, b3, b4, b5, b6)
+
+ def cogroup[K, B1, B2, B3, B4, B5, B6, B7](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4],
+ b5: JoinBranch[K, B5],
+ b6: JoinBranch[K, B6],
+ b7: JoinBranch[K, B7]) = CoGroupTransformation[K, (K, Traversable[B1], Traversable[B2], Traversable[B3], Traversable[B4], Traversable[B5], Traversable[B6], Traversable[B7])](b1, b2, b3, b4, b5, b6, b7)
+
+}
20 src/main/scala/com/yahoo/scalops/dsl/transformations/EmptyTableTransformation.scala
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+case class EmptyTableTransformation[T]() extends Transformation[T] {
+ override val toString = "Table"
+}
+
+trait EmptyTableTransformationImplicits {
+ def table[T]() = EmptyTableTransformation[T]()
+}
27 src/main/scala/com/yahoo/scalops/dsl/transformations/FilterTransformation.scala
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class FilterTransformation[T](val input: Queryable[T], val predicate: T => Boolean) extends Queryable[T] {
+ override def toString = input.toString() + ".filter(" + predicate.toString() + ")"
+}
+
+trait FilterTransformationImplicits {
+
+ class FilterHelper[T](val input: Queryable[T]) {
+ def by(predicate: T => Boolean) = FilterTransformation(input, predicate)
+ }
+
+ def filter[T](input: Queryable[T]) = new FilterHelper(input)
+}
18 src/main/scala/com/yahoo/scalops/dsl/transformations/GroupTransformation.scala
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class GroupTransformation[K, V](val input: Queryable[V], val predicate: V => K) extends Transformation[(K, Traversable[V])] {
+ override def toString = input.toString() + ".group(" + predicate.toString() + ")"
+}
44 src/main/scala/com/yahoo/scalops/dsl/transformations/JoinBranch.scala
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class JoinBranch[K, T](val input: Queryable[T], val key: T => K) {
+ private[this] var _outer: Boolean = false
+
+ override def toString = input.toString() + ".by(" + key.toString() + ")"
+
+ def outer = {
+ _outer = true;
+ this
+ }
+}
+
+case class JoinBranchImplicit[T](val input: Queryable[T]) {
+ def by[K1](k1: T => K1) = new JoinBranch[K1, T](input, k1)
+
+ def by[K1, K2](k1: T => K1, k2: T => K2) =
+ new JoinBranch[Tuple2[K1, K2], T](input, (t: T) => new Tuple2(k1(t), k2(t)))
+
+ def by[K1, K2, K3](k1: T => K1, k2: T => K2, k3: T => K3) =
+ new JoinBranch[Tuple3[K1, K2, K3], T](input, (t: T) => new Tuple3(k1(t), k2(t), k3(t)))
+
+ def by[K1, K2, K3, K4](k1: T => K1, k2: T => K2, k3: T => K3, k4: T => K4) =
+ new JoinBranch[Tuple4[K1, K2, K3, K4], T](input, (t: T) => new Tuple4(k1(t), k2(t), k3(t), k4(t)))
+
+ def by[K1, K2, K3, K4, K5](k1: T => K1, k2: T => K2, k3: T => K3, k4: T => K4, k5: T => K5) =
+ new JoinBranch[Tuple5[K1, K2, K3, K4, K5], T](input, (t: T) => new Tuple5(k1(t), k2(t), k3(t), k4(t), k5(t)))
+
+ def by[K1, K2, K3, K4, K5, K6](k1: T => K1, k2: T => K2, k3: T => K3, k4: T => K4, k5: T => K5, k6: T => K6) =
+ new JoinBranch[Tuple6[K1, K2, K3, K4, K5, K6], T](input, (t: T) => new Tuple6(k1(t), k2(t), k3(t), k4(t), k5(t), k6(t)))
+}
82 src/main/scala/com/yahoo/scalops/dsl/transformations/JoinTransformation.scala
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class JoinTransformation[K, R](val input: JoinBranch[K, _]*) extends Transformation[R] {
+
+ object JoinMethod extends Enumeration {
+ val Replicated, Skewed, Merge = Value
+ }
+
+ type Method = JoinMethod.Value
+
+ override def toString = input.mkString("join(", ", ", ")")
+
+ private[this] var method: Option[Method] = None
+
+ private[this] var parallelism: Option[scala.Int] = None
+
+ def using(m: Method) {
+ this.method = Some(m)
+ this
+ }
+
+ def parallel(level: scala.Int) = {
+ this.parallelism = Some(level)
+ this
+ }
+}
+
+trait JoinTransformationImplicits {
+ implicit def queryableToBranch[T](input: Queryable[T]) = JoinBranchImplicit[T](input)
+
+ implicit def transformationToBranch[T](input: Transformation[T]) = JoinBranchImplicit[T](input)
+
+ def join[K, B1, B2](b1: JoinBranch[K, B1], b2: JoinBranch[K, B2]) = JoinTransformation[K, (B1, B2)](b1, b2)
+
+ def join[K, B1, B2, B3](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3]) = JoinTransformation[K, (B1, B2, B3)](b1, b2, b3)
+
+ def join[K, B1, B2, B3, B4](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4]) = JoinTransformation[K, (B1, B2, B3, B4)](b1, b2, b3, b4)
+
+ def join[K, B1, B2, B3, B4, B5](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4],
+ b5: JoinBranch[K, B5]) = JoinTransformation[K, (B1, B2, B3, B4, B5)](b1, b2, b3, b4, b5)
+
+ def join[K, B1, B2, B3, B4, B5, B6](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4],
+ b5: JoinBranch[K, B5],
+ b6: JoinBranch[K, B6]) = JoinTransformation[K, (B1, B2, B3, B4, B5, B6)](b1, b2, b3, b4, b5, b6)
+
+ def join[K, B1, B2, B3, B4, B5, B6, B7](
+ b1: JoinBranch[K, B1],
+ b2: JoinBranch[K, B2],
+ b3: JoinBranch[K, B3],
+ b4: JoinBranch[K, B4],
+ b5: JoinBranch[K, B5],
+ b6: JoinBranch[K, B6],
+ b7: JoinBranch[K, B7]) = JoinTransformation[K, (B1, B2, B3, B4, B5, B6, B7)](b1, b2, b3, b4, b5, b6, b7)
+}
20 src/main/scala/com/yahoo/scalops/dsl/transformations/LoadTransformation.scala
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+case class LoadTransformation[T](val url: String) extends Transformation[T] {
+ override def toString = "load(" + url + ")"
+}
+
+trait LoadTransformationImplicits {
+ def load[T](url: String) = LoadTransformation[T](url)
+}
24 src/main/scala/com/yahoo/scalops/dsl/transformations/MapTransformation.scala
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+/**
+ * Map transformation: Apply a given function to all records in the queryable, yielding a new queryable of the output type.
+ */
+case class MapTransformation[T1, T2](val input: Queryable[T1], val f: T1 => T2) extends Transformation[T2] {
+ override def toString = input.toString + ".map(" + f.toString + ")"
+}
+
+trait MapTransformationImplicits {
+}
27 src/main/scala/com/yahoo/scalops/dsl/transformations/OrderTransformation.scala
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class OrderTransformation[T, R <% Ordered[R]](val input: Queryable[T], val field: T => R) extends Transformation[T] {
+ override def toString = input.toString() + ".sort(" + field.toString() + ")"
+}
+
+trait OrderTransformationImplicits {
+
+ case class OrderByHelper[T](val input: Queryable[T]) {
+ def by[R <% Ordered[R]](field: T => R) = OrderTransformation[T, R](input, field)
+ }
+
+ def order[T](input: Queryable[T]) = OrderByHelper[T](input)
+}
18 src/main/scala/com/yahoo/scalops/dsl/transformations/TableTransformation.scala
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class TableTransformation[T](val input: Queryable[T]) extends Transformation[T] {
+ override lazy val toString = input.toString() + ".table"
+}
16 src/main/scala/com/yahoo/scalops/dsl/transformations/Transformation.scala
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+trait Transformation[T] extends Queryable[T]
18 src/main/scala/com/yahoo/scalops/dsl/transformations/UpdateTransformation.scala
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.transformations
+
+import com.yahoo.scalops.dsl.Queryable
+
+case class UpdateTransformation[T](val input: Queryable[T], val updates: Queryable[T]) extends Transformation[T] {
+
+}
36 src/main/scala/com/yahoo/scalops/dsl/types/BooleanType.scala
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+class BooleanType extends Type[Boolean] {
+
+ case class UnaryBooleanExpression(t: Type[Boolean], oper: String) extends BooleanType
+
+ case class BinaryBooleanExpression(b1: Type[Boolean], b2: Type[Boolean], oper: String) extends BooleanType
+
+ def unary_! : BooleanType = new UnaryBooleanExpression(this, "!")
+
+ def &&(that: Type[Boolean]) = new BinaryBooleanExpression(this, that, "&&")
+
+ def ||(that: Type[Boolean]) = new BinaryBooleanExpression(this, that, "||")
+}
+
+trait BooleanImplicits {
+
+ private[BooleanImplicits] case class BooleanContainer(override val value: Option[Boolean]) extends BooleanType
+
+ implicit def typeToBooleanType(t: Type[Boolean]) = new BooleanContainer(t.value)
+
+ implicit def boxBoolean(b: Boolean) = BooleanContainer(Some(b))
+}
+
+object BooleanType extends BooleanImplicits
17 src/main/scala/com/yahoo/scalops/dsl/types/CompositeType.scala
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+// Used for the loop environment
+trait CompositeType extends Type[Any] {
+
+}
28 src/main/scala/com/yahoo/scalops/dsl/types/ListType.scala
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+class ListType[T] extends Type[List[T]]
+
+trait ListTypeImplicits {
+
+ private[ListTypeImplicits] case class ListContainer[T](override val value: Option[List[T]]) extends ListType[T]
+
+ implicit def listToListType[T](l: List[T]): ListType[T] = new ListContainer[T](Some(l))
+
+ implicit def listTypeToList[T](lt: ListType[T]): Traversable[T] = lt.value.get
+
+ implicit def typeToListType[T](t: Type[List[T]]): ListType[T] = ListContainer[T](t.value)
+
+}
+
+object ListType extends ListTypeImplicits
55 src/main/scala/com/yahoo/scalops/dsl/types/NumericType.scala
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+case class NumericType[T](override val value: Option[T])(implicit numeric: Numeric[T]) extends Type[T] {
+
+ case class BinaryNumericExpression[T](x: NumericType[T], y: NumericType[T], oper: Operator) extends Type[T] {
+ override def toString() = x.toString() + oper.toString() + y.toString()
+ }
+
+ case class BinaryBooleanExpression[T](x: NumericType[T], y: NumericType[T], oper: BooleanOperator) extends Type[Boolean] {
+ override def toString() = x.toString() + oper.toString() + y.toString()
+
+ // TODO: This is a hack
+ implicit def asBoolean(): Boolean = false
+ }
+
+ def +(that: Type[T]) = BinaryNumericExpression(this, com.yahoo.Scalops.toNumeric(that), Plus)
+
+ def -(that: Type[T]) = BinaryNumericExpression(this, com.yahoo.Scalops.toNumeric(that), Minus)
+
+ def *(that: Type[T]) = BinaryNumericExpression(this, com.yahoo.Scalops.toNumeric(that), Mult)
+
+ def /(that: Type[T]) = BinaryNumericExpression(this, com.yahoo.Scalops.toNumeric(that), Div)
+
+ def <(that: Type[T]) = BinaryBooleanExpression(this, com.yahoo.Scalops.toNumeric(that), LessThan)
+
+ def ==(that: Type[T]) = BinaryBooleanExpression(this, com.yahoo.Scalops.toNumeric(that), Equals)
+
+ def >(that: Type[T]) = BinaryBooleanExpression(this, com.yahoo.Scalops.toNumeric(that), MoreThan)
+}
+
+trait NumericTypeImplicits {
+ implicit def toNumeric[T](t: Type[T])(implicit numeric: Numeric[T]) = new NumericType(t.value)
+
+ implicit def boxNumeric[T](t: T)(implicit numeric: Numeric[T]) = new NumericType(Some(t))(numeric)
+
+ implicit def unboxNumeric[T](t: NumericType[T]) = t.value.get
+
+ implicit def boxDouble(d: Double) = NumericType[Double](Some(d))
+}
+
+object NumericType extends NumericTypeImplicits {
+
+}
+
26 src/main/scala/com/yahoo/scalops/dsl/types/NumericTypeArithmetics.scala
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+trait NumericTypeArithmetics {
+
+ // to double
+ implicit def intToDouble(t: Type[Int]) = NumericType[Double](Some(t.value.get))
+
+ implicit def longToDouble(t: Type[Long]) = NumericType[Double](Some(t.value.get))
+
+ implicit def floatToDouble(t: Type[Float]) = NumericType[Double](Some(t.value.get))
+
+ // to Long
+ implicit def intToLong(t: Type[Int]) = NumericType[Long](Some(t.value.get))
+
+}
20 src/main/scala/com/yahoo/scalops/dsl/types/ObjectType.scala
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+case class ObjectType[T](override val value: Option[T]) extends Type[T] {
+ implicit def get = value.get
+}
+
+object ObjectType {
+ def apply[T](x: T) = new ObjectType[T](Some(x))
+}
48 src/main/scala/com/yahoo/scalops/dsl/types/Operator.scala
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+trait Operator
+
+// Numeric operators
+trait NumericOperator extends Operator
+
+object Plus extends NumericOperator {
+ override def toString = "+"
+}
+
+object Minus extends NumericOperator {
+ override def toString = "-"
+}
+
+object Mult extends NumericOperator {
+ override def toString = "*"
+}
+
+object Div extends NumericOperator {
+ override def toString = "/"
+}
+
+// Operators yielding a boolean
+trait BooleanOperator extends Operator
+
+object LessThan extends BooleanOperator {
+ override def toString() = "<"
+}
+
+object MoreThan extends BooleanOperator {
+ override def toString() = ">"
+}
+
+object Equals extends BooleanOperator {
+ override def toString() = "=="
+}
20 src/main/scala/com/yahoo/scalops/dsl/types/StringType.scala
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+case class StringType(override val value: Option[String]) extends Type[String]
+
+trait StringTypeImplicits {
+ implicit def box(s: String): StringType = new StringType(Some(s))
+}
+
+object StringType extends StringTypeImplicits
36 src/main/scala/com/yahoo/scalops/dsl/types/Type.scala
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+/**
+ * The base type for all scalops Types
+ *
+ * The type subclasses all consist of three things:
+ *
+ * - The Type class itself
+ * - A Trait with implicit conversions. This trait is woven into in Scalops
+ * - A companion object that extends the trait with the implicit conversions and maybe adds additional methods
+ */
+trait Type[T] {
+ def value: Option[T] = None
+}
+
+trait TypeImplicits {
+
+ private case class TypeContainer[T](override val value: Option[T]) extends Type[T]
+
+ implicit def box[T](x: T): Type[T] = new TypeContainer[T](Some(x))
+
+ implicit def unbox[T](x: Type[T]) = x.value
+}
+
+object Type extends TypeImplicits
58 src/main/scala/com/yahoo/scalops/dsl/types/VectorType.scala
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.dsl.types
+
+
+import com.yahoo.scalops.examples.Vector
+
+class VectorType extends Type[Vector] {
+
+ case class BinaryVectorExpression(x: VectorType, y: VectorType, oper: Operator) extends VectorType {
+ override def toString() = x.toString() + oper.toString() + y.toString()
+ }
+
+ case class BinaryVectorNumericExpression[T](x: VectorType, y: NumericType[T], oper: Operator) extends VectorType {
+ override def toString() = x.toString() + oper.toString() + y.toString()
+ }
+
+ def +(that: VectorType) = BinaryVectorExpression(this, that, Plus)
+
+ def -(that: VectorType) = BinaryVectorExpression(this, that, Minus)
+
+ def *(that: VectorType) = BinaryVectorExpression(this, that, Mult)
+
+ def *[T](that: NumericType[T]) = BinaryVectorNumericExpression[T](this, that, Mult)
+
+ def /[T](that: NumericType[T]) = BinaryVectorNumericExpression[T](this, that, Div)
+
+}
+
+trait VectorImplicits {
+
+ private[VectorImplicits] case class VectorContainer(override val value: Option[Vector]) extends VectorType
+
+ implicit def toVector(t: Type[Vector]): VectorType = new VectorContainer(t.value)
+
+ implicit def asVector(v: VectorType): Vector = v.value.get
+
+}
+
+object VectorType extends VectorImplicits {
+
+ class Zeros(val size: Int) extends VectorType
+
+ /**
+ * Creates an empty vector of the given size
+ */
+ def zeros(size: Int): VectorType = new Zeros(size)
+
+}
62 src/main/scala/com/yahoo/scalops/examples/App.scala
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+
+object Test {
+
+ class Book(val title: String, val author: String, val address: String) {
+ override def toString = "Book: " + title + ", " + author + ", " + address
+ }
+
+ class Author(val name: String, val address: String, val books: List[Book], val years: Double) {
+ override val toString = "Author: " + name + ", " + address + ", " + books + ", " + years
+ }
+
+ def run = {
+
+ val authors = load[Author]("hdfs://examples/authors.dat")
+
+ val books = load[Book]("hdfs://examples/book.dat")
+
+ val t1 = filter(books) by (_.author == "F")
+
+ val r1 = join(books by(_.author, _.address), authors by(_.name, _.address)).
+ filter(x => x._1.address == "Foobar" && x._2.name != "Baz").
+ group(_._2.name + "foobar").map(x => (x._1, x._2.size, flatten(x._2), x._2.map(_._2.years).reduce(_ + _)))
+
+ println("R1: " + r1.toString)
+
+ val r2 = filter(r1) by (_._2 < 100)
+ println("R2: " + r2.toString)
+
+ val r3 = books.map {
+ b =>
+ val x = "foobar"
+ if (x == "foo") null else new Book(x, b.address, b.author)
+ }
+ println("R3: " + r3)
+
+ val r4 = filter(r3) by (_.address == "foobar")
+ println("R4: " + r4)
+ }
+}
+
+object App {
+
+ def main(args: Array[String]) {
+ println("START TEST")
+ Test.run
+ }
+
+}
89 src/main/scala/com/yahoo/scalops/examples/BatchGradientDescent.scala
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+import com.yahoo.scalops.dsl.types.VectorType
+
+import com.yahoo.scalops.examples.MLTypes.Example
+
+/**
+ * A example batch gradient descent implementation for scalops
+ *
+ * @author Markus Weimer <weimer@yahoo-inc.com>
+ */
+object BatchGradientDescent {
+
+ /**
+ * Train a linear model using basic batch gradient descent
+ *
+ * The loss function to be optimized (e.g. for logistic regression, least squares, linear SVM, ...) is passed as a
+ * parameter to this train function in the form of computeGradient, which can compute the gradient of the loss with
+ * respect to w for a single example.
+ *
+ * @param trainingExamples The set of examples to train the system on
+ * @param computeGradient A function that computes the loss and gradient wrt. the weight vector
+ * @param eta the learning rate to apply
+ * @param lambda the regularization constant
+ * @param eps the minimum change between iterations to keep the algorithm running
+ */
+ def train(trainingExamples: Queryable[Example], computeGradient: (Example, Vector) => (Double, Vector), eta: Double, lambda: Double, eps: Double = 0.01): Vector = {
+
+ /**
+ * The Environment of the batch gradient descent loop: A weight vector and a sum of all losses.
+ */
+ case class LoopEnvironment(var w: VectorType, var objectiveFunctionValue: DoubleType, var eta: DoubleType) extends CompositeType
+
+ // Set the inital value for the environment: A empty vector and maximum loss
+ val initialValue = new LoopEnvironment(w = VectorType.zeros(1000), objectiveFunctionValue = Double.MaxValue, eta = eta)
+
+ // Loop until the objectiveFunctionvalue is below 0.01
+ val result = loop(initialValue, (env: LoopEnvironment) => (env.objectiveFunctionValue > eps)) {
+ env: LoopEnvironment => {
+ // Compute the loss and gradient in parallel for all data points
+ // computeGradient returns a tuple(Double, Vector) for each data point
+ val lossesAndGradients = trainingExamples.map(x => computeGradient(x, env.w))
+
+ // Sum up the loss and regularizer to update the objective function value
+ env.objectiveFunctionValue = lossesAndGradients.map(x => x._1).reduce(_ + _)
+ env.objectiveFunctionValue += lambda * env.w.get.norm2
+
+ // Sum up the gradients and update the weight vector
+ // Note that there are of course more clever ways to do this (e.g. a line search along the gradient for the best step size)
+ // However, we chose to use a simple step here for simplicity of the example
+ env.w -= lossesAndGradients.map(x => x._2).reduce(_ + _) * eta
+
+ // Apply the l2 regularizer
+ env.w *= (1.0 - env.eta * lambda)
+
+ // Update the learning rate. There are many ways to do this. Here for the sake of brevity: Just shrink it.
+ env.eta *= 0.9
+
+ // The loop body has to return the new environment
+ env
+ }
+ }
+ println(result)
+ return result.w
+ }
+
+ def bgd(Y: Queryable[Example], g: (Example, Vector) => Vector, e: Double, l: Double) =
+ loop(VectorType.zeros(1000), 0 until 100) {
+ w => (w - (Y.map(x => g(x, w)).reduce(_ + _) * e)) * (1.0 - e * l)
+ }
+
+ def main(args: Array[String]) {
+ val trainingData = load[Example]("hdfs://example/")
+ val w = train(trainingData, GradientFunctions.computeSquaredErrorLossAndGradient, eta = 0.01, lambda = 0.1)
+ println(w)
+ }
+}
84 src/main/scala/com/yahoo/scalops/examples/DenseVector.scala
@@ -0,0 +1,84 @@
+package com.yahoo.scalops.examples
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+class DenseVector(val a: Array[Double]) extends Vector {
+ def apply(index: Int) = a(index)
+
+ def update(index: Int, value: Double) = {
+ a(index) = value
+ }
+
+ def size() = a.size
+
+ def *(that: Double): DenseVector = new DenseVector(a.map(x => x * that))
+
+
+ def +(that: Vector): DenseVector = {
+ val result = new Array[Double](size)
+ for (i <- 0 until size) {
+ result(i) = this(i) + that(i)
+ }
+ return new DenseVector(result)
+ }
+
+ def -(that: Vector): DenseVector = {
+ val result = new Array[Double](size)
+ for (i <- 0 until size) {
+ result(i) = this(i) - that(i)
+ }
+ return new DenseVector(result)
+ }
+}
+
+
+object DenseVector {
+
+ def apply(v: Vector): DenseVector = {
+ val result = new Array[Double](v.size)
+ for (i <- 0 until v.size) {
+ result(i) = v(i)
+ }
+ new DenseVector(result)
+ }
+
+ def apply(a: Array[Double]): DenseVector = new DenseVector(a)
+
+
+ /**
+ * Creates a zero vector of the given size
+ */
+ def zeros(size: Int): DenseVector = new DenseVector(new Array[Double](size))
+
+ /**
+ * Creates a random vector of the given size
+ */
+ def random(size: Int): DenseVector = {
+ val a = new Array[Double](size)
+ for (i <- 0 until a.length) {
+ a(i) = math.random
+ }
+
+ DenseVector(a)
+ }
+
+ def apply(x: Double, xs: Double*): DenseVector = {
+ val array = Array[Double](xs.length + 1)
+ array(0) = x
+ var i = 1
+ for (x <- xs.iterator) {
+ array(i) = x
+ i += 1
+ }
+ DenseVector(array)
+ }
+}
28 src/main/scala/com/yahoo/scalops/examples/GradientFunctions.scala
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.scalops.examples.MLTypes.Example
+
+/**
+ * Gradient computations for a bunch of loss functions
+ * @author Markus Weimer <weimer@yahoo-inc.com>
+ */
+object GradientFunctions {
+ def computeSquaredErrorLossAndGradient(ex: Example, w: Vector): (Double, Vector) = computeSquaredErrorLossAndGradient(ex.label, ex.features, w)
+
+ def computeSquaredErrorLossAndGradient(y: Double, x: Vector, w: Vector): (Double, Vector) = {
+ val diff = (w.dot(x) - y)
+ return (diff * diff, x * 2.0 * diff)
+ }
+
+}
69 src/main/scala/com/yahoo/scalops/examples/KMeans.scala
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+
+/**
+ * An example implementation of KMeans
+ *
+ */
+object KMeans {
+
+ /**
+ * A cluster centroid
+ */
+ case class Centroid(v: Vector, id: Int)
+
+ /**
+ * Finds the closest of the centroids in the given list to the given vector
+ */
+ def computeClosestCentroid(v: Vector, centroids: Traversable[Centroid]): Centroid = {
+ centroids.map(x => (x, v.distance2(x.v))).minBy(_._2)._1
+ }
+
+ /**
+ * Creates a set of random centroids for initialization
+ */
+ def makeInitialCentroids(k: Int, d: Int) = {
+ (for {i <- 0 until k} yield new Centroid(DenseVector.random(d), i)).toList
+ }
+
+ def mean(vectors: Traversable[Vector]): Vector = {
+ vectors.reduce(_ + _) / vectors.size
+ }
+
+ def makeCentroid(id: Int, vectors: Traversable[Vector]) = new Centroid(mean(vectors), id)
+
+ def cluster(data: Queryable[Vector], k: Int, d: Int) = {
+ val initialValue: ListType[Centroid] = makeInitialCentroids(k, d)
+
+ val result = loop(initialValue, 0 until 10) {
+ centroids => {
+ // Assign all vectors to their closest centroid and group by that centroid
+ val groups = data.map(x => (x, computeClosestCentroid(x, centroids).id)).group(_._2)
+ // Compute new centroids
+ val newcentroids = groups.map {
+ g => makeCentroid(g._1, g._2.map(_._1))
+ }
+ // Assign to environment
+ newcentroids.collect()
+ }
+ }
+ result
+ }
+
+ def main(args: Array[String]) {
+ val data = load[Vector]("hdfs://example/")
+ println(cluster(data, 2, 2))
+ }
+}
24 src/main/scala/com/yahoo/scalops/examples/ML.scala
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.scalops.examples.MLTypes.Example
+import com.yahoo.scalops.dsl.Queryable
+
+object ML {
+
+ def evaluate(w: Vector, x: Example): Double = 0.0
+
+ def train(trainingExamples: Queryable[Example]) = {
+ BatchGradientDescent.train(trainingExamples, GradientFunctions.computeSquaredErrorLossAndGradient, eta = 0.01, lambda = 0.1)
+ }
+}
25 src/main/scala/com/yahoo/scalops/examples/MLTypes.scala
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+
+/**
+ * Types useful in machine learning
+ */
+object MLTypes {
+
+ /**
+ * The Example class to be used
+ */
+ final case class Example(identifier: String, label: Double, features: Vector)
+
+}
140 src/main/scala/com/yahoo/scalops/examples/MatrixFactorization.scala
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+
+/**
+ * An sample scalops-implementation of
+ *
+ * "Large-Scale Matrix Factorization with Distributed Stochastic Gradient Descent"
+ *
+ * presented by Rainer Gemulla, Peter Haas, Erik Nijkamp and Yannis Sismanis at KDD 2011
+ *
+ * It computes F:= U*V such that (F-Y)^2 is minimized s.t. regularization
+ *
+ * @author Markus Weimer <weimer@yahoo-inc.com>
+ */
+object MatrixFactorization {
+
+ /**
+ * A factor vector U_i
+ */
+ case class RowVector(i: Int, v: Vector)
+
+ /**
+ * A factor vector V_j
+ */
+ case class ColVector(j: Int, v: Vector)
+
+ /**
+ * A entry in the data matrix Y
+ */
+ case class MatrixEntry(i: Int, j: Int, v: Double)
+
+ /**
+ * A naive partitioner.
+ */
+ class Partitioner(nBlocks: Int, maxRow: Int, maxCol: Int) {
+ lazy val rowBlockSize = maxRow / nBlocks
+ lazy val colBlockSize = maxCol / nBlocks
+
+ private def getRowPartition(i: Int, iteration: Int) = (i % rowBlockSize) + iteration
+
+ def apply(u: RowVector, iteration: Int) = getRowPartition(u.i, iteration)
+
+ private def getColPartition(j: Int, iteration: Int) = (j % colBlockSize) + iteration
+
+ def apply(v: ColVector, iteration: Int) = getColPartition(v.j, iteration)
+
+ def apply(y: MatrixEntry, iteration: Int): Int = {
+ val rowBlock = getRowPartition(y.i, iteration)
+ val colBlock = getColPartition(y.j, iteration)
+ assert(rowBlock == colBlock)
+ return rowBlock
+ }
+ }
+
+ /**
+ * A basic least squares matrix factorization update step with L2 regularization using SGD
+ *
+ * @param Y the input data
+ * @param U the set of row vectorts to update over
+ * @param V the set of column vectors
+ * @param eta the learning rate
+ * @param lambda the regularization constant
+ *
+ * @return new row and column vectors after one SGD pass over the data
+ *
+ */
+ def update(Y: Traversable[MatrixEntry], U: Traversable[RowVector], V: Traversable[ColVector], eta: Double, lambda: Double): (Traversable[RowVector], Traversable[ColVector]) = {
+ // Index the vectors contained in U and V in a map from their i, j to the vector
+ val uMap = Map(U.map(x => x.i -> x.v).toList: _*)
+ val vMap = Map(V.map(x => x.j -> x.v).toList: _*)
+
+ // Update the model
+ for (y <- Y) {
+ val u = uMap(y.i)
+ val v = vMap(y.j)
+ val gradient = 2.0 * eta * (u.dot(v) - y.v)
+ uMap(y.i) -= v * gradient
+ vMap(y.j) -= u * gradient
+ }
+
+ // Regularize the model
+ uMap.values.map(_ * (1.0 - eta * lambda))
+ vMap.values.map(_ * (1.0 - eta * lambda))
+
+ // Emit the final model
+ val u = uMap.map(e => new RowVector(e._1, e._2))
+ val v = vMap.map(e => new ColVector(e._1, e._2))
+ return (u, v)
+ }
+
+ /**
+ * Trains a Matrixfactorization model.
+ *
+ * @param Y the known training data
+ * @param nPartitions The number of partitions the training should happen in
+ * @param d The dimensionality of the factor model learned
+ * @param eta The learning rate to employ
+ * @param lbda The regularization constant
+ * @param iter The number of iterations to run
+ */
+ def train(Y: Queryable[MatrixEntry], nPartitions: Int, d: Int, eta: Double, lbda: Double, nIter: Int): (Queryable[RowVector], Queryable[ColVector]) = {
+
+ // Initialize the system ; WRONG: Make them unique
+ val U = Y.map(y => new RowVector(y.i, DenseVector.random(d))).toTable
+ val V = Y.map(y => new ColVector(y.j, DenseVector.random(d))).toTable
+
+ // Compute some needed stats
+ val maxRow: IntType = Y.map(y => y.i).reduce(scala.math.max(_, _))
+ val maxCol: IntType = Y.map(y => y.j).reduce(scala.math.max(_, _))
+ val part = new Partitioner(nPartitions, maxRow, maxCol)
+
+ val initialValue: IntType = 0
+ val result = loop(initialValue, (iter: IntType) => iter < nIter) {
+ iter => {
+ val groups = cogroup(Y by (part(_, iter)), U by (part(_, iter)), V by (part(_, iter)))
+ val updatedModel = groups.map(g => update(Y = g._2, U = g._3, V = g._4, eta = eta, lambda = lbda))
+ // Assign the new models to U and V
+ U := updatedModel.map(x => flatten(x._1))
+ V := updatedModel.map(x => flatten(x._2))
+ iter + 1
+ }
+ }
+
+ return (U, V)
+
+ }
+
+}
41 src/main/scala/com/yahoo/scalops/examples/PageRank.scala
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+
+/**
+ * Compare with: http://ofps.oreilly.com/titles/9781449302641/embedding.html
+ */
+class PageRank {
+
+ class Page(val url: String, val rank: Double, val links: List[String])
+
+ def run(delta: Float) = {
+ var pages = load[Page]("hdfs://example")
+
+ val initialValue: DoubleType = Double.MaxValue
+
+ // Loop until the objectiveFunctionvalue is below 0.01
+ val result = loop(initialValue, (delta: DoubleType) => delta < 0.01) {
+ delta: DoubleType => {
+ var outbound_pagerank = pages.map(page => (page.rank / page.links.size, flatten(page.links)))
+ var cogrp = cogroup(outbound_pagerank by (_._2), pages by (_.url))
+
+ var new_pagerank = cogrp.map(row => (new Page(row._1, (1 - delta) + row._2.map(_._1).reduce(_ + _) * delta, row._3.head.links), row._3.head.rank))
+ pages := new_pagerank.map(_._1)
+ var diff = new_pagerank.map(page => math.abs(page._2 - page._1.rank))
+ diff.reduce(math.max(_, _))
+ }
+ }
+ }
+}
76 src/main/scala/com/yahoo/scalops/examples/ParallelStochasticGradientDescent.scala
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+import com.yahoo.scalops.dsl.types.VectorType
+
+import com.yahoo.scalops.examples.MLTypes.Example
+
+/**
+ * An implementation of the Parallel Stochastic Gradient Descent for linear modeling presented in
+ * "Parallelized Stochastic Gradient Descent" by Martin Zinkevich, Markus Weimer, Alex Smola, Lihong Li at NIPS 2010
+ */
+
+object ParallelStochasticGradientDescent {
+
+ /**
+ * A sequential SGD implementation that employs L2 regularization and accepts the loss function
+ * (logistic regression, classification, ...) as a parameter.
+ *
+ *
+ * Note that this class is written in pure scala and makes no use of scalops types etc.
+ * Thus, it is an example of code written in pure scala (could even be java) that we can make use of in scalops.
+ */
+ class DIOSTrainer(lossFunction: (Vector, Example) => (Double, Double), eta: Double, lambda: Double) {
+
+ def train(w: Vector, examples: Traversable[Example]): (Vector, Double) = {
+ var result = w
+ var lossSum: Double = 0.0
+ for (x <- examples) {
+ val (loss, gradient) = lossFunction(result, x)
+ lossSum += loss
+ result -= x.features * gradient * eta
+ result *= (1.0 - lambda * eta)
+ }
+ return (result, lossSum)
+ }
+ }
+
+ /**
+ * The environment type for StochasticGradientDescent
+ */
+ case class SGDEnvironment(var w: VectorType, var objectiveFunctionValue: DoubleType) extends CompositeType
+
+ def train(trainingExamples: Queryable[Example], trainer: DIOSTrainer, lambda: Double, nBins: Int = 100): Vector = {
+ // Randomly group the data set into nBins
+ val groups = trainingExamples.group(x => x.identifier.hashCode() % nBins)
+
+ val initialValue = new SGDEnvironment(w = VectorType.zeros(1000), objectiveFunctionValue = Double.MaxValue)
+
+ val result = loop(initialValue, (e: SGDEnvironment) => e.objectiveFunctionValue > 0.01) {
+ env => {
+ // Compute the models for each bin
+ val models = groups.map {
+ g => trainer.train(env.w, g._2)
+ }
+ // Average the models
+ env.w = models.map(e => e._1).reduce(_ + _) / nBins
+ // Compute the current loss function value for convergence
+ env.objectiveFunctionValue = models.map(e => e._2).reduce(_ + _) / nBins + lambda * env.w.norm2()
+ env
+ }
+
+ }
+ result.w
+ }
+}
47 src/main/scala/com/yahoo/scalops/examples/Pregel.scala
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+package com.yahoo.scalops.examples
+
+import com.yahoo.Scalops._
+
+object Pregel {
+
+ case class Node(id: Int, neighbors: List[Int])
+
+ case class Message(to: Int)
+
+ /**
+ * An implementation of pregel in scalops
+ *
+ * @param nodes The set of nodes in the graph
+ * @param f A pregel-style update function
+ *
+ */
+ def run(nodes: Queryable[Node], f: (Node, Traversable[Message]) => (Node, Traversable[Message])) {
+
+ def nf(n: Node, m: Traversable[Message]) = if (m == null) (n, m) else f(n, m)
+
+ val messages = table[Message]()
+ val init: BooleanType = false
+
+ loop(init, (b: BooleanType) => !b) {
+ b => {
+ val x = cogroup((nodes by (_.id)).outer, messages by (_.to))
+ val msgAndNodes = x.map(e => nf(e._2.head, e._3))
+ messages := msgAndNodes.map(x => flatten(x._2))
+ nodes := msgAndNodes.map(_._1)
+ messages.isEmpty
+ }
+ }
+ }
+
+}
70 src/main/scala/com/yahoo/scalops/examples/Vector.scala
@@ -0,0 +1,70 @@
+package com.yahoo.scalops.examples
+
+/*
+ * Copyright (c) 2012 Yahoo! Inc. All rights reserved. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
+ * or agreed to in writing, software distributed under the License is distributed
+ * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing
+ * permissions and limitations under the License. See accompanying LICENSE file.
+ */
+
+trait Vector {
+
+ def apply(index: Int): Double
+
+ def update(index: Int, value: Double)
+
+ def size(): Int
+
+ def *(that: Double): Vector
+
+ def +(that: Vector): Vector
+
+ def -(that: Vector): Vector
+
+ def -=(that: Vector) {
+ require(this.size == that.size)
+ for (i <- 0 until size) {
+ this(i) -= that(i)
+ }
+ }
+
+ def +=(that: Vector) {
+ require(this.size == that.size)
+ for (i <- 0 until size) {
+ this(i) += that(i)
+ }
+
+ }
+
+ /**
+ * (that-this).norm2()
+ */
+ def distance2(that: Vector): Double = math.sqrt((for (i <- 0 until this.size) yield (math.pow(that(i) - this(i), 2.0))).reduce(_ + _))
+
+ def unary_-(): Vector = this * (-1.0)