Skip to content
Browse files

Updated versions.

Removed (:distinct false) clauses. Added (:distinct true) in place of implicit distincts.
  • Loading branch information...
1 parent f768ba2 commit 88682aec4f4b27a43236464f5e8c48350b7e20e4 @sritchie sritchie committed
View
10 project.clj
@@ -16,15 +16,15 @@
[org.clojure/tools.logging "0.2.3"]
[clojure-csv/clojure-csv "1.3.2"]
[org.clojure/math.numeric-tower "0.0.1"]
- [incanter/incanter-core "1.3.0-SNAPSHOT"]
+ [incanter/incanter-core "1.3.0"]
[clj-time "0.3.4"]
[forma/gdal "1.8.0"]
[forma/jblas "1.2.1"]
- [cascalog "1.9.0-wip12"]
- [cascalog-lzo "0.1.0-wip12"]
+ [cascalog "1.9.0"]
+ [cascalog-lzo "0.1.1"]
[cascalog-checkpoint "0.1.1"]
- [backtype/dfs-datastores "1.1.3-SNAPSHOT"]
- [backtype/dfs-datastores-cascading "1.1.4"]
+ [backtype/dfs-datastores "1.1.3"]
+ [backtype/dfs-datastores-cascading "1.2.0"]
[org.apache.thrift/libthrift "0.8.0"
:exclusions [org.slf4j/slf4j-api]]
[com.google.protobuf/protobuf-java "2.4.0a"]]
View
25 src/clj/forma/hadoop/jobs/forma.clj
@@ -26,7 +26,8 @@
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?fire-series]
(fire-src ?chunk)
(get-loc ?chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?f-series)
- (schema/adjust-fires est-map ?f-series :> ?fire-series)))
+ (schema/adjust-fires est-map ?f-series :> ?fire-series)
+ (:distinct true)))
(defn filter-query
"Note that the ?ts here is an ArrayList of one of the items inside
@@ -39,8 +40,7 @@
(schema/unpack-timeseries ?series :> ?start _ ?ts)
(schema/get-vals-wrap ?ts :> ?vals)
(p/blossom-chunk ?vcf-chunk :> ?s-res ?mod-h ?mod-v ?sample ?line ?vcf)
- (>= ?vcf vcf-limit)
- (:distinct false)))
+ (>= ?vcf vcf-limit)))
(defn dynamic-filter
"Returns a new generator of ndvi and rain timeseries obtained by
@@ -52,8 +52,7 @@
(reli-src ?s-res ?mod-h ?mod-v ?sample ?line ?r-start ?reli)
(rain-src ?s-res ?mod-h ?mod-v ?sample ?line ?p-start ?precl)
(schema/adjust ?p-start ?precl ?n-start ?ndvi ?r-start ?reli
- :> ?start-idx ?precl-ts ?ndvi-ts ?reli-ts)
- (:distinct false)))
+ :> ?start-idx ?precl-ts ?ndvi-ts ?reli-ts)))
(defmapcatop tele-clean
"Return clean timeseries with telescoping window, nil if no (or not enough) good training data"
@@ -84,8 +83,7 @@
bad-set #{2 3 255}]
(<- [?s-res ?mod-h ?mod-v ?sample ?line ?start ?clean-ndvi]
(dynamic-src ?s-res ?mod-h ?mod-v ?sample ?line ?start ?ndvi _ ?reli)
- (tele-clean est-map good-set bad-set ?start ?ndvi ?reli :> ?clean-ndvi)
- (:distinct false))))
+ (tele-clean est-map good-set bad-set ?start ?ndvi ?reli :> ?clean-ndvi))))
(defn analyze-trends
"Accepts an est-map, and sources for ndvi and rain timeseries and
@@ -103,8 +101,7 @@
(clean-src ?s-res ?mod-h ?mod-v ?sample ?line ?start ?ndvi)
(a/short-stat long-block short-block ?ndvi :> ?short)
(a/long-stats ?ndvi ?short-precl :> ?long ?t-stat)
- (a/hansen-stat ?ndvi :> ?break)
- (:distinct false))))
+ (a/hansen-stat ?ndvi :> ?break))))
(defn forma-tap
"Accepts an est-map and sources for ndvi, rain, and fire timeseries,
@@ -116,8 +113,7 @@
(fire-src ?s-res ?mh ?mv ?s ?l !!fire)
(dynamic-src ?s-res ?mh ?mv ?s ?l ?start ?short ?break ?long ?t-stat)
(schema/forma-seq !!fire ?short ?break ?long ?t-stat :> ?forma-seq)
- (p/index ?forma-seq :zero-index ?start :> ?period ?forma-val)
- (:distinct false)))
+ (p/index ?forma-seq :zero-index ?start :> ?period ?forma-val)))
(defmapcatop [process-neighbors [num-neighbors]]
"Processes all neighbors... Returns the index within the chunk, the
@@ -144,8 +140,7 @@ value, and the aggregate of the neighbors."
(<- [?s-res ?period ?mod-h ?mod-v ?sample ?line ?val ?neighbor-val]
(src ?s-res ?period ?mod-h ?mod-v ?win-col ?win-row ?window)
(process-neighbors [neighbors] ?window :> ?win-idx ?val ?neighbor-val)
- (r/tile-position cols rows ?win-col ?win-row ?win-idx :> ?sample ?line)
- (:distinct false))))
+ (r/tile-position cols rows ?win-col ?win-row ?win-idx :> ?sample ?line))))
(defn beta-generator
"query to return the beta vector associated with each ecoregion"
@@ -157,8 +152,7 @@ value, and the aggregate of the neighbors."
(static-src ?s-res ?mod-h ?mod-v ?s ?l _ _ ?eco ?hansen)
(= ?pd first-idx)
(log/logistic-beta-wrap [ridge-const convergence-thresh max-iterations]
- ?hansen ?val ?neighbor-val :> ?beta)
- (:distinct false))))
+ ?hansen ?val ?neighbor-val :> ?beta))))
(defmapop [apply-betas [betas]]
[eco val neighbor-val]
@@ -175,7 +169,6 @@ value, and the aggregate of the neighbors."
(static-src ?s-res ?mod-h ?mod-v ?s ?l _ _ ?eco _)
(apply-betas [betas] ?eco ?val ?neighbor-val :> ?prob)
(log/mk-timeseries ?pd ?prob :> ?prob-series)
- (:distinct false)
(:trap trap-tap))))
(comment
View
9 src/clj/forma/hadoop/jobs/scatter.clj
@@ -111,8 +111,7 @@
(stretch/ts-expander base-t-res t-res ?series :> ?new-series)
(assoc ?chunk
:value ?new-series
- :temporal-res t-res :> ?final-chunk)
- (:distinct false)))))
+ :temporal-res t-res :> ?final-chunk)))))
(defmapcatop expand-rain-pixel
[sample line]
@@ -135,8 +134,7 @@
(assoc ?chunk
:value ?new-series
:location ?location
- :temporal-res t-res :> ?final-chunk)
- (:distinct false))))
+ :temporal-res t-res :> ?final-chunk))))
(defn first-half-query
"Poorly named! Returns a query that generates a number of position
@@ -170,8 +168,7 @@
([:tmp-dirs vcf-path]
(?- (hfs-seqfile vcf-path)
(<- [?subpail ?chunk]
- ((constrained-tap pail-path "vcf" s-res "00") ?subpail ?chunk)
- (:distinct false))))
+ ((constrained-tap pail-path "vcf" s-res "00") ?subpail ?chunk))))
ndvi-step
([:tmp-dirs ndvi-path]
View
15 src/clj/forma/hadoop/jobs/timeseries.clj
@@ -50,20 +50,19 @@
data-src (<- [?name ?t-res ?date ?s-res ?mod-h ?mod-v ?chunk-idx ?size ?datachunk]
(chunk-source _ ?chunk)
(schema/unpack-chunk-val ?chunk :> ?name ?t-res ?date ?location ?datachunk)
- (schema/unpack-chunk-location ?location :> ?s-res ?mod-h ?mod-v ?chunk-idx ?size)
- (:distinct false))
+ (schema/unpack-chunk-location ?location :> ?s-res ?mod-h ?mod-v ?chunk-idx ?size))
series-src (<- [?name ?t-res ?s-res ?mod-h ?mod-v ?chunk-idx ?size ?pix-idx ?timeseries]
(data-src ?name ?t-res ?date ?s-res ?mod-h ?mod-v ?chunk-idx ?size ?datachunk)
(mk-tseries ?t-res ?date ?datachunk :> ?pix-idx ?start ?end ?tseries)
(schema/mk-array-value ?tseries :> ?array-val)
- (schema/timeseries-value ?start ?end ?array-val :> ?timeseries))]
+ (schema/timeseries-value ?start ?end ?array-val :> ?timeseries)
+ (:distinct true))]
(<- [?chunk]
(series-src ?name ?t-res ?s-res ?mod-h ?mod-v ?chunk-idx ?size ?pix-idx ?timeseries)
(r/tile-position ?s-res ?size ?chunk-idx ?pix-idx :> ?sample ?line)
(schema/pixel-location ?s-res ?mod-h ?mod-v ?sample ?line :> ?pix-location)
(schema/mk-data-value ?timeseries :> ?data-val)
- (schema/chunk-value ?name ?t-res nil ?pix-location ?data-val :> ?chunk)
- (:distinct false))))
+ (schema/chunk-value ?name ?t-res nil ?pix-location ?data-val :> ?chunk))))
(def ^:dynamic *missing-val*
-9999)
@@ -107,8 +106,7 @@
(schema/unpack-chunk-val ?chunk :> ?name _ ?date ?location ?val)
(merge-firevals ?val :> ?tuple)
(date/beginning t-res ?date :> ?datestring)
- (schema/unpack-pixel-location ?location :> ?s-res ?mod-h ?mod-v ?s ?l)
- (:distinct false)))
+ (schema/unpack-pixel-location ?location :> ?s-res ?mod-h ?mod-v ?s ?l)))
(defn create-fire-series
"Aggregates fires into timeseries."
@@ -126,8 +124,7 @@
(query ?name ?s-res ?mod-h ?mod-v ?s ?l ?fire-series)
(schema/pixel-location ?s-res ?mod-h ?mod-v ?s ?l :> ?location)
(schema/mk-data-value ?fire-series :> ?data-val)
- (schema/chunk-value ?name t-res nil ?location ?data-val :> ?chunk)
- (:distinct false))))
+ (schema/chunk-value ?name t-res nil ?location ?data-val :> ?chunk))))
(defn fire-query
"Returns a source of fire timeseries data chunk objects."
View
3 src/clj/forma/hadoop/predicate.clj
@@ -242,5 +242,4 @@
(fn [src]
(construct (swap-syms gen [inpos val] [outpos outval])
[[src :>> (get-out-fields gen)]
- [aggr inpos val :> outpos outval]
- [:distinct false]]))))))
+ [aggr inpos val :> outpos outval]]))))))
View
9 src/clj/forma/source/fire.clj
@@ -156,8 +156,7 @@
:> ?datestring _ _ ?s-lat ?s-lon ?s-kelvin _ _ _ ?s-conf)
(not= "YYYYMMDD" ?datestring)
(monthly-datestring ?datestring :> ?date)
- (fire-pred ?s-lat ?s-lon ?s-kelvin ?s-conf :> ?dataset ?t-res ?lat ?lon ?tuple)
- (:distinct false)))
+ (fire-pred ?s-lat ?s-lon ?s-kelvin ?s-conf :> ?dataset ?t-res ?lat ?lon ?tuple)))
(defn fire-source-daily
"Returns a Cascalog query that creates tuples for daily fires.
@@ -178,8 +177,7 @@
(p/mangle [#","] ?line
:> ?s-lat ?s-lon ?s-kelvin _ _ ?datestring _ _ ?s-conf _ _ _)
(daily-datestring ?datestring :> ?date)
- (fire-pred ?s-lat ?s-lon ?s-kelvin ?s-conf :> ?dataset ?t-res ?lat ?lon ?tuple)
- (:distinct false)))
+ (fire-pred ?s-lat ?s-lon ?s-kelvin ?s-conf :> ?dataset ?t-res ?lat ?lon ?tuple)))
(defn reproject-fires
"Returns a Cascalog query that creates DataChunk Thrift objects for fires."
@@ -190,5 +188,4 @@
(schema/mk-data-value ?tuple :> ?tuple-val)
(r/latlon->modis ?m-res ?lat ?lon :> ?mod-h ?mod-v ?sample ?line)
(schema/pixel-location ?m-res ?mod-h ?mod-v ?sample ?line :> ?location)
- (schema/chunk-value ?dataset ?t-res ?date ?location ?tuple-val :> ?datachunk)
- (:distinct false)))
+ (schema/chunk-value ?dataset ?t-res ?date ?location ?tuple-val :> ?datachunk)))
View
3 src/clj/forma/source/hdf.clj
@@ -259,5 +259,4 @@ as a 1-tuple."
(split-id ?tileid :> ?mod-h ?mod-v)
((c/juxt #'spatial-res #'temporal-res) ?productname :> ?s-res ?t-res)
(schema/mk-array-value ?chunk :> ?array)
- (chunkifier ?dataset ?date ?s-res ?t-res ?mod-h ?mod-v ?chunkid ?array :> ?datachunk)
- (:distinct false))))
+ (chunkifier ?dataset ?date ?s-res ?t-res ?mod-h ?mod-v ?chunkid ?array :> ?datachunk))))
View
3 src/clj/forma/source/rain.clj
@@ -185,8 +185,7 @@
(not= ?val nodata)
(pix-tap :>> mod-coords)
(p/add-fields "precl" "32" m-res :> ?dataset ?t-res ?m-res)
- (r/wgs84-indexer :<< (into [m-res ascii-map] mod-coords) :> ?row ?col)
- (:distinct false))))
+ (r/wgs84-indexer :<< (into [m-res ascii-map] mod-coords) :> ?row ?col))))
(defn rain-chunks
"Cascalog subquery to fully process a WGS84 float array at the
View
6 src/clj/forma/source/static.clj
@@ -94,8 +94,7 @@
((c/juxt #'mod #'quot) ?row span :> ?line ?mv)
(pixel-tap ?mod-h ?mod-v ?sample ?line :> true)
(+ 5 ?mv :> ?mod-v)
- (p/add-fields dataset m-res "00" nil :> ?dataset ?m-res ?t-res !date)
- (:distinct false))))
+ (p/add-fields dataset m-res "00" nil :> ?dataset ?m-res ?t-res !date))))
;; TODO: Make a note that gzipped files can't be unpacked well when
;; they exist on S3. They need to be moved over to HDFS for that. I
@@ -123,8 +122,7 @@
(count ?chunk :> ?count)
(= ?count chunk-size)
(chunkifier
- ?dataset !date ?s-res ?t-res ?mod-h ?mod-v ?chunkid ?arr :> ?datachunk)
- (:distinct false))))
+ ?dataset !date ?s-res ?t-res ?mod-h ?mod-v ?chunkid ?arr :> ?datachunk))))
(defn static-chunks
"TODO: DOCS!"
View
7 test/forma/hadoop/jobs/timeseries_test.clj
@@ -59,8 +59,7 @@ this query don't contain -9999."
(let [fires-src [[[(schema/fire-value 0 0 0 10)
(schema/fire-value 1 0 0 33)]]]
result [(schema/fire-series 0 [(schema/fire-value 0 0 0 10)
- (schema/fire-value 1 0 0 43)])]]
+ (schema/fire-value 1 0 0 43)])]]
(??<- [?vals]
- (fires-src ?fire-vals)
- (:distinct false)
- (running-fire-sum 0 ?fire-vals :> ?vals)) => [result]))
+ (fires-src ?fire-vals)
+ (running-fire-sum 0 ?fire-vals :> ?vals)) => [result]))
View
3 test/forma/source/hdf_test.clj
@@ -59,8 +59,7 @@ error."
subquery (<- [?dataset ?chunkid ?chunk]
(src ?filename ?hdf)
(unpack-modis [[:ndvi]] ?hdf :> ?dataset ?freetile)
- (raster-chunks [chunk-size] ?freetile :> ?chunkid ?chunk)
- (:distinct false))]
+ (raster-chunks [chunk-size] ?freetile :> ?chunkid ?chunk))]
(fact?<- [[?num-chunks]]
[?count]
(subquery ?dataset ?chunkid ?chunk)

0 comments on commit 88682ae

Please sign in to comment.
Something went wrong with that request. Please try again.