Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Moving revision text to the revision model. Sphinx asociation search …

…works, needs excerpt cleanup.
  • Loading branch information...
commit f87d0f29d59db252cfd8a4467ad2d8b2c5405415 1 parent 4bc21ee
Brian Michalski authored
5  app/controllers/revisions_controller.rb
@@ -63,13 +63,8 @@ def create
63 63
     @revision = Revision.new(params[:revision])
64 64
     @revision.document_id = params[:document_id]
65 65
     @revision.user_id = current_user.id
66  
-    logger.info @revision.upload_content_type
67 66
     respond_to do |format|
68 67
       if @revision.save
69  
-        #Find the revision we just made and index text from document
70  
-        @rev = Revision.find(:first, :order => 'created_at desc')
71  
-        #run the method which indexes the document
72  
-        @rev.rev_text
73 68
         expire_fragment(:controller => :documents, :action => :show, :id => @revision.document_id, :action_suffix => 'revisions')
74 69
         if @revision.document.category.is_featured
75 70
           expire_action :controller => :home, :action => :index
20  app/models/document.rb
@@ -10,16 +10,6 @@ class Document < ActiveRecord::Base
10 10
 
11 11
   accepts_nested_attributes_for :revisions, :allow_destroy => true
12 12
 
13  
-  #Indexing
14  
-
15  
-  define_index do
16  
-    indexes title
17  
-    indexes description
18  
-    indexes current_revision_text
19  
-    ##Indexing Revisions Text doesn't work yet, working on it.
20  
-    #indexes revisions(:text), :as => :revision_text
21  
-  end
22  
-  
23 13
   #Scoping
24 14
   default_scope :order => 'title ASC'
25 15
   named_scope :by_updated, :order => 'updated_at DESC'
@@ -29,6 +19,16 @@ def current_revision
29 19
     Revision.find(:first, :conditions => {:document_id => self.id})
30 20
   end
31 21
 
  22
+  #Indexing
  23
+  define_index do
  24
+    indexes title
  25
+    indexes description
  26
+    #indexes current_revision_text
  27
+    ##Indexing Revisions Text doesn't work yet, working on it.
  28
+    indexes revisions(:search_text), :as => :revision_text
  29
+  end
  30
+
  31
+
32 32
   #Try to extract some text from the current revision
33 33
   def current_revision_text
34 34
     revision = self.current_revision
50  app/models/revision.rb
... ...
@@ -1,4 +1,6 @@
1 1
 class Revision < ActiveRecord::Base
  2
+  before_create :set_text 
  3
+
2 4
   acts_as_list :scope => :document
3 5
 
4 6
   has_attached_file :upload, :storage => :database
@@ -45,41 +47,23 @@ def type
45 47
     result
46 48
   end
47 49
 
  50
+  # Attempt to extract text from a document.
  51
+  # TODO: Refactor this into a library module/class/something else.
  52
+  # We should not be maintaining this in a test-less model.
48 53
   def text
49 54
     tempfile = Tempfile.new(self.upload_file_name)
50 55
     tempfile.write(self.upload.file_contents)
  56
+    tempfile.close #If you don't close the file it might still be empty before the next command executes
51 57
     #find out what kinda file we're dealing with and run appropriate system calls
52 58
     result = case self.upload_content_type
53  
-      when "application/msword" then `catdoc -w #{tempfile.path}`
54  
-      when "application/pdf" then `pdftotext #{tempfile.path} -`
55  
-      when "application/vnd.ms-excel" then `xls2cvs #{tempfile.path}`
56  
-      when "application/vnd.ms-powerpoint" then `catppt #{tempfile.path}`
57  
-      when "image/jpeg" then `jhead -c #{tempfile.path}`
58  
-      when "image/png"  then `jhead -c #{tempfile.path}`
59  
-      else ""
60  
-    end
61  
-    result.gsub(tempfile.path,"")
62  
-  end
63  
-
64  
-  def rev_text
65  
-    #Current Revision Text 
66  
-    #create temporary files for use during indexing
67  
-    tempfile = Tempfile.new(self.upload_file_name)
68  
-    tempfile.write (self.upload.file_contents)
69  
-    #If it's a docx,xlsx, or pptx, fix the zip archive (paperclip bungles it up)
70  
-    if self.upload_content_type == ("application/vnd.openxmlformats-officedocument.wordprocessingml.document" or "application/vnd.openxmlformats-officedocument.presentationml.presentation" or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
71  
-      system("yes | zip -FF #{tempfile.path} --out /tmp/doc")
72  
-    end
73  
-    #find what kind of file we're dealing with and run appropriate system calls
74  
-    result = case self.upload_content_type
75 59
       when "text/plain" then `cat #{tempfile.path}`
76 60
       when "application/pdf" then `pdftotext #{tempfile.path} -`
77 61
       when "application/msword" then `catdoc -w #{tempfile.path}`
78 62
       when "application/vnd.ms-excel" then `xls2cvs #{tempfile.path}`
79 63
       when "application/vnd.ms-powerpoint" then `catppt #{tempfile.path}`
80  
-      when "application/vnd.openxmlformats-officedocument.wordprocessingml.document" then `doctotext "/tmp/doc.docx"`
81  
-      when "application/vnd.openxmlformats-officedocument.presentationml.presentation" then `doctotext "/tmp/doc.docx"`
82  
-      when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" then `doctotext "/tmp/doc.docx"`
  64
+      when "application/vnd.openxmlformats-officedocument.wordprocessingml.document" then `doctotext #{tempfile.path}`
  65
+      when "application/vnd.openxmlformats-officedocument.presentationml.presentation" then `doctotext  #{tempfile.path}`
  66
+      when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" then `doctotext #{tempfile.path}`
83 67
       when "image/jpeg" then `jhead -c #{tempfile.path}`
84 68
       when "image/png"  then `jhead -c #{tempfile.path}`
85 69
       when "application/vnd.oasis.opendocument.text" then `odt2txt #{tempfile.path}`
@@ -87,14 +71,14 @@ def rev_text
87 71
       when "application/vnd.oasis.opendocument.spreadsheet" then `odt2txt #{tempfile.path}`
88 72
       else ""
89 73
     end
90  
-
91  
-    result.gsub!(tempfile.path,"")
92  
-    #remove the temporary file we made when fixing zipped documents
93  
-    system('rm -rf "/tmp/doc"')
94  
-    #save the data
95  
-    document = Document.find(self.document_id)
96  
-    document.current_revision_text = result
97  
-    document.save
  74
+    result.gsub(tempfile.path,"")
  75
+    tempfile.unlink
  76
+    result
  77
+  end
  78
+  
  79
+  # Set the search_text field to the extracted text from a revision
  80
+  def set_text
  81
+    self.search_text = text
98 82
   end
99 83
 
100 84
   #Authenticates Access
9  db/migrate/20100607040352_revision_search_text.rb
... ...
@@ -1,9 +0,0 @@
1  
-class RevisionSearchText < ActiveRecord::Migration
2  
-  def self.up
3  
-    add_column :documents, :current_revision_text, :text, :default => ""
4  
-  end
5  
-
6  
-  def self.down
7  
-    remove_column :current_revisions_text
8  
-  end
9  
-end
9  db/migrate/20100823173636_add_text_to_revision.rb
... ...
@@ -0,0 +1,9 @@
  1
+class AddTextToRevision < ActiveRecord::Migration
  2
+  def self.up
  3
+    add_column :revisions, :search_text, :text
  4
+  end
  5
+
  6
+  def self.down
  7
+    remove_column :revisions, :search_text
  8
+  end
  9
+end
10  db/schema.rb
@@ -9,7 +9,7 @@
9 9
 #
10 10
 # It's strongly recommended to check this file into your version control system.
11 11
 
12  
-ActiveRecord::Schema.define(:version => 20100607040352) do
  12
+ActiveRecord::Schema.define(:version => 20100823173636) do
13 13
 
14 14
   create_table "backgrounds", :force => true do |t|
15 15
     t.string   "image_file_name"
@@ -46,14 +46,13 @@
46 46
   create_table "documents", :force => true do |t|
47 47
     t.string   "title"
48 48
     t.text     "description"
49  
-    t.boolean  "readable",              :default => true
50  
-    t.boolean  "writable",              :default => false
51  
-    t.integer  "downloaded",            :default => 0
  49
+    t.boolean  "readable",    :default => true
  50
+    t.boolean  "writable",    :default => false
  51
+    t.integer  "downloaded",  :default => 0
52 52
     t.integer  "category_id"
53 53
     t.integer  "user_id"
54 54
     t.datetime "created_at"
55 55
     t.datetime "updated_at"
56  
-    t.text     "current_revision_text"
57 56
   end
58 57
 
59 58
   add_index "documents", ["category_id"], :name => "category_id_index"
@@ -86,6 +85,7 @@
86 85
     t.integer  "upload_file_size"
87 86
     t.datetime "upload_updated_at"
88 87
     t.binary   "upload_file",         :limit => 2147483647
  88
+    t.text     "search_text"
89 89
   end
90 90
 
91 91
   add_index "revisions", ["document_id"], :name => "document_id_index"

0 notes on commit f87d0f2

Please sign in to comment.
Something went wrong with that request. Please try again.