In [1]:
%classpath add jar /home/harrison/Documents/Dep2Rel/dep2rel.jar

In [2]:
(require 
    '[clojure.java.io :as io]
    '[edu.ucdenver.ccp.nlp.readers :as rdr]
    '[edu.ucdenver.ccp.knowtator-clj :as k]
    '[edu.ucdenver.ccp.nlp.sentence :as sentence]
    '[edu.ucdenver.ccp.nlp.relation-extraction :as re]
    '[taoensso.timbre :as log]
    '[edu.ucdenver.ccp.nlp.evaluation :as evaluation])

null

In [3]:
(def home-dir
  (io/file "/" "media" "tuh8888" "Seagate Expansion Drive" "data"))

(def biocreative-dir
  (io/file home-dir "BioCreative" "BCVI-2017" "ChemProt_Corpus"))

(def training-dir
  (io/file biocreative-dir "chemprot_training"))

(def word-vector-dir
  (io/file home-dir "WordVectors"))

(def word2vec-db
  (.getAbsolutePath
    (io/file word-vector-dir "bio-word-vectors-clj.vec")))

#'beaker_clojure_shell_237661ea-74c9-44e6-9566-4b0a7c053b06/word2vec-db

Use [UCDenver CCP SyntaxNet](https://github.com/UCDenver-ccp/syntaxnet-docker) model trained on CRAFT to process sentences and get dependency parses

In [None]:
%%bash
git clone https://github.com/UCDenver-ccp/syntaxnet-docker.git
cd syntaxnet-docker
sudo docker build -t ucdenverccp/syntaxnet:latest .
sudo docker run --rm -v /media/tuh8888/Seagate Expansion Drive/data/BioCreative/BCVI-2017/ChemProt_Corpus/chemprot_training/chemprot_training_sentences:/syntaxnet-input ucdenverccp/syntaxnet:latest -m craft

In [7]:
(def annotations (k/model training-dir nil))

(def abstracts-f (io/file training-dir "chemprot_training_abstracts.tsv"))
(rdr/biocreative-read-abstracts (k/model annotations) abstracts-f)

(def entities-f (io/file training-dir "chemprot_training_entities.tsv"))
(rdr/biocreative-read-entities (k/model annotations) entities-f)

(def relations-f (io/file training-dir "chemprot_training_relations.tsv"))
(rdr/biocreative-read-relations (k/model annotations) relations-f)

#'beaker_clojure_shell_8658733a-e891-4471-b1a5-338b8e4cc6a6/abstracts

In [None]:
(def model1 (k/simple-model annotations))

(def structures-annotations-with-embeddings (word2vec/with-word2vec word2vec-db
                                              (sentence/structures-annotations-with-embeddings model1)))

(def concept-annotations-with-toks (sentence/concept-annotations-with-toks model1))

(def model (assoc model1
             :concept-annotations concept-annotations-with-toks
             :structure-annotations structures-annotations-with-embeddings))

(def sentences (sentence/concept-annotations->sentences model))
(log/info "Num sentences:" (count sentences))

In [None]:
(def matches (let [property "INHIBITOR"

                     ;sentences (filter #(<= (count (:context %)) 2) sentences)
                     actual-true (set (->> property
                                           (k/edges-for-property model)
                                           (map evaluation/edge->triple)
                                           (filter (fn [t] (some #(= t (:entities %)) sentences)))))
                     all-triples (set (map evaluation/sent->triple sentences))

                     seeds (clojure.set/union
                             (apply evaluation/make-seeds sentences (first actual-true))
                             (apply evaluation/make-seeds sentences (second actual-true)))
                     seed-thresh 0.85
                     context-thresh 0.9
                     cluster-thresh 0.95
                     min-support 1
                     params {:seed             (first seeds)
                             :seed-thresh      seed-thresh
                             :context-thresh   context-thresh
                             :seed-match-fn    #(and (re/concepts-match? %1 %2)
                                                     (< seed-thresh (re/context-vector-cosine-sim %1 %2)))
                             :context-match-fn #(< context-thresh (re/context-vector-cosine-sim %1 %2))
                             :cluster-merge-fn re/add-to-pattern
                             :cluster-match-fn #(let [score (re/context-vector-cosine-sim %1 %2)]
                                                  (and (< (or %3 cluster-thresh) score)
                                                       score))
                             :min-support      min-support}
                     matches (->> (re/cluster-bootstrap-extract-relations seeds sentences params)
                                  (map #(merge % params)))]
                 (log/info "Metrics:" (math/calc-metrics {:predicted-true (evaluation/predicted-true matches)
                                                          :actual-true    actual-true
                                                          :all            all-triples}))
                 matches))