Skip to content

Commit

Permalink
Add Sunspot Solr search for documents
Browse files Browse the repository at this point in the history
  • Loading branch information
gbprz committed Aug 21, 2014
1 parent a05f8d8 commit 0a26875
Show file tree
Hide file tree
Showing 42 changed files with 1,527 additions and 50 deletions.
6 changes: 5 additions & 1 deletion Gemfile
Expand Up @@ -36,4 +36,8 @@ gem 'sdoc', '~> 0.4.0', group: :doc
gem 'spring', group: :development

# Use textractor for parsing PDF and Word documents
gem 'textractor'
gem 'textractor'

# Use Sunspot Rails and Sunspot Solr for our search engine
gem 'sunspot_rails'
gem 'sunspot_solr'
13 changes: 13 additions & 0 deletions Gemfile.lock
Expand Up @@ -75,6 +75,7 @@ GEM
nokogiri (~> 1.6)
omniauth (~> 1.1.0)
polyglot (0.3.5)
pr_geohash (1.0.0)
rack (1.5.2)
rack-test (0.6.2)
rack (>= 1.0)
Expand All @@ -97,6 +98,8 @@ GEM
rdoc (4.1.1)
json (~> 1.4)
ref (1.0.5)
rsolr (1.0.10)
builder (>= 2.1.2)
sass (3.2.19)
sass-rails (4.0.3)
railties (>= 4.0.0, < 5.0)
Expand All @@ -117,6 +120,14 @@ GEM
activesupport (>= 3.0)
sprockets (~> 2.8)
sqlite3 (1.3.9)
sunspot (2.1.1)
pr_geohash (~> 1.0)
rsolr (~> 1.0.7)
sunspot_rails (2.1.1)
nokogiri
rails (>= 3)
sunspot (= 2.1.1)
sunspot_solr (2.1.1)
textractor (0.2.0)
escape (>= 0.0.4)
therubyracer (0.12.1)
Expand Down Expand Up @@ -152,6 +163,8 @@ DEPENDENCIES
sdoc (~> 0.4.0)
spring
sqlite3
sunspot_rails
sunspot_solr
textractor
therubyracer
turbolinks
Expand Down
16 changes: 16 additions & 0 deletions app/controllers/documents_controller.rb
Expand Up @@ -46,7 +46,12 @@ def create
position: 0
)
if !@revision.save
@document.destroy
flash[:error] = "Unable to upload revision"
else
# Extract text from file to provide search engine with searchable content
@revision.extract_text
@revision.save
end
end

Expand All @@ -57,6 +62,17 @@ def create
end
end

def search
# Use Sunspot Solr to search for documents based on the search query
begin
@documents = Document.search do
fulltext params[:query], highlight: true
end
rescue
@documents ||= nil
end
end

private
def document_params
params.require(:document).permit(:title, :description,
Expand Down
57 changes: 31 additions & 26 deletions app/controllers/revisions_controller.rb
Expand Up @@ -19,34 +19,39 @@ def download
end

def create
@document = Document.find(params[:document_id])

@revision = Revision.new(file_name: revision_params.original_filename,
file_type: revision_params.content_type,
file_data: revision_params.read,
document_id: @document.id,
user_id: current_user.id,
search_text: Revision.extract_text
)

@revision.position = 0

@document.revisions.each do |revision|
revision.position += 1
revision.save
end

if !@revision.save
flash[:error] = "Unable to upload revision"
redirect_to document_path(@document)
else
redirect_to document_path(@document)
end
@document = Document.find(params[:document_id])

@revision = Revision.new(file_name: revision_params.original_filename,
file_type: revision_params.content_type,
file_data: revision_params.read,
document_id: @document.id,
user_id: current_user.id,
position: 0
)

# Set this new revision as the current revision
@revision.position = 0
# Increase the position of all previous revisions
# to move them down in the document's history
@document.revisions.each do |revision|
revision.position += 1
revision.save
end

if !@revision.save
flash[:error] = "Unable to upload revision"
redirect_to document_path(@document)
else
# Our revision has been saved
# extract it's contents for the search engine
@revision.extract_text
redirect_to document_path(@document)
end
end

private
def revision_params
params[:revision][:file]
end
def revision_params
params[:revision][:file]
end

end
14 changes: 11 additions & 3 deletions app/models/document.rb
Expand Up @@ -4,14 +4,22 @@ class Document < ActiveRecord::Base
belongs_to :category
has_many :revisions, dependent: :destroy

# Sunspot Solr search configuration for the document object
searchable do
# Give document titles higher weight when determining search results
text :title, default_boost: 2, stored: true
text :description, stored: true
text :revision_search_texts, stored: true do
revisions.map { |revision| revision.search_text }
end
end

def current_revision
Revision.where(document_id: id).order("position asc").first
end

def total_downloads
count = 0
revisions.each {|rev| count += rev.download_count}
return count
revisions.sum(:download_count)
end

def self.latest_docs
Expand Down
22 changes: 18 additions & 4 deletions app/models/revision.rb
Expand Up @@ -3,9 +3,8 @@ class Revision < ActiveRecord::Base
belongs_to :document
belongs_to :user

validates :file_name, presence: true
validates :file_type, presence: true

validates_presence_of :file_name, :file_type, :file_data

def extension_type
ext = case file_type
# PDF Files
Expand All @@ -17,8 +16,23 @@ def extension_type
ext
end

def self.extract_text
def extract_text
# Create a temporary file to read from
tempfile = Tempfile.new(file_name, :encoding => 'ascii-8bit')
tempfile.write(file_data)
tempfile.close

# Try extracting the contents of the file depending on the content type
begin
contents = Textractor.text_from_path(tempfile.path, :content_type => file_type)
rescue
logger.error("Unable to extract text from file. Revision id = #{id}, File name = #{filename}")
contents = nil
end
tempfile.unlink

# Redundant line breaks are useless to us
self.search_text = contents.gsub(/(\r?\n)+/,"\n") if !contents.blank?
end

end
13 changes: 13 additions & 0 deletions app/views/documents/search.html.erb
@@ -0,0 +1,13 @@
<h3> <%= "Search results for #{params[:query]}" %> </h3>
<hr />
<div class="panel panel-default">
<ol class="breadcrumb">
<li> Matched Documents </li>
</ol>
<div class="panel-body" style="padding: 0px">
<% @documents.each_hit_with_result do |hit, result| %>
<%= render partial: "documents/document_list_item.html.erb", locals: {doc: result} %>
<br />
<% end %>
</div>
</div>
4 changes: 1 addition & 3 deletions app/views/layouts/_header.erb
Expand Up @@ -24,9 +24,7 @@
<% end %>

<!-- Search field -->
<%= text_field_tag :article_search,
nil, style: "width: 300px; margin-left: 25px" ,:class => "form-control",
placeholder: "Search for documents and categories ..." %>
<%= render partial: "layouts/search.html.erb" %>
</h3>

</div>
11 changes: 11 additions & 0 deletions app/views/layouts/_search.html.erb
@@ -0,0 +1,11 @@
<div style="display:inline-block">
<%= form_tag search_documents_path, method: :post do %>
<div class="input-group">
<%= search_field_tag :query, nil, style: "width: 400px; margin-left: 25px",
:class => "form-control", :placeholder => "Search for documents and categories ..." %>
<div class="input-group-btn">
<%= submit_tag "search", :class => "btn btn-default" %>
</div>
</div>
<% end %>
</div>
23 changes: 23 additions & 0 deletions config/sunspot.yml
@@ -0,0 +1,23 @@
production:
solr:
hostname: localhost
port: 8983
log_level: WARNING
path: /solr/production
# read_timeout: 2
# open_timeout: 0.5

development:
solr:
hostname: localhost
port: 8982
log_level: INFO
path: /solr/development

test:
solr:
hostname: localhost
port: 8981
log_level: WARNING
path: /solr/test

18 changes: 5 additions & 13 deletions lib/permissions.rb
Expand Up @@ -4,8 +4,8 @@ def current_user
@current_user = User.find_by_id(session[:user_id])
end

# Check if the current user can upload documents to the specified category
def can_upload_documents(category)
# Check if the current user can upload documents to the specified category
def can_upload_documents(category)
if !current_user.nil?
if category.nil?
# User will specify category upon uploading
Expand Down Expand Up @@ -38,17 +38,9 @@ def category_viewable?(category)
# Check if the current user can upload documents to the specified category
def upload_permitted_categories
# Return all categories the current user can upload to
if !current_user.nil?
if current_user.is_admin?
# current user is an admin and can upload to any category
return Category.all
else
return current_user.writable_categories
end
else
# user is not logged in, they can't upload anything
return nil
end
return nil if current_user.nil?
permitted_categories = Category.all if current_user.is_admin?
permitted_categories ||= current_user.writable_categories
end

end
Empty file.
31 changes: 31 additions & 0 deletions solr/conf/admin-extra.html
@@ -0,0 +1,31 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<!-- The content of this page will be statically included into the top
of the admin page. Uncomment this as an example to see there the content
will show up.
<hr>
<i>This line will appear before the first table</i>
<tr>
<td colspan="2">
This row will be appended to the end of the first table
</td>
</tr>
<hr>
-->
36 changes: 36 additions & 0 deletions solr/conf/elevate.xml
@@ -0,0 +1,36 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
-->

<elevate>
<query text="foo bar">
<doc id="1" />
<doc id="2" />
<doc id="3" />
</query>

<query text="ipod">
<doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
<doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
</query>

</elevate>

0 comments on commit 0a26875

Please sign in to comment.