Skip to content

Commit

Permalink
Ignore sim sets in workflow subjects count (#3144)
Browse files Browse the repository at this point in the history
* add sms counter cache to workflow

* add real subjects to the workflow counter

* add workflow subjects count worker

* call the subjects count worker when refreshing workflow data

* count the subjects before running the retired count worker

ensure we have an up to date count for the subjects if we need to compare the retired at counts

* use AR relation methods to get the workflow_ids

* count the workflow's subjects after counting a sets

ensure we recount the workflow subjects after counting a linked set's subjects.

* clarify comment text

* use the cached count value

* inline the shared helper to point of use

* avoid AR counter cache method names

clashing with a counter cache means this method proxies up to AR .empty? checks

* use real_set_member_subjects_count column name

* count and cache the number of real subjects for the workflow

* use the workflow counter cache for subjects_count

* remove unused live_subejcts_sets association

* build the associations correctly and cache the real subjects count

* use the real_set_member_subjects_count attribute

* correct the memoized instance variable name

* add workflow subjects_count migration task

ensure we backfill the workflow subjects_count cache values before deploying.
  • Loading branch information
camallen committed Jul 8, 2019
1 parent f039669 commit 5da1aba
Show file tree
Hide file tree
Showing 21 changed files with 181 additions and 88 deletions.
4 changes: 1 addition & 3 deletions app/controllers/api/v1/subject_sets_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,7 @@ def destroy_links
super do |subject_set|
notify_subject_selector(subject_set)
reset_subject_counts(subject_set.id)
reset_workflow_retired_counts(
subject_set.subject_sets_workflows.pluck(:workflow_id)
)
reset_workflow_retired_counts(subject_set.workflow_ids)
end
end

Expand Down
4 changes: 4 additions & 0 deletions app/counters/workflow_counter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ def retired_subjects
sws_query.where.not(retired_at: nil).count
end

def subjects
workflow.non_training_subject_sets.sum(:set_member_subjects_count)
end

def sws_query
SubjectWorkflowStatus
.where(workflow_id: workflow.id)
Expand Down
14 changes: 7 additions & 7 deletions app/models/project.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class Project < ActiveRecord::Base
-> { where(active: true, serialize_with_project: true).active },
class_name: "Workflow"
has_many :subject_sets, dependent: :destroy
has_many :live_subject_sets, through: :active_workflows, source: 'subject_sets'
has_many :classifications, dependent: :restrict_with_exception
has_many :subjects, dependent: :restrict_with_exception
has_many :acls, class_name: "AccessControlList", as: :resource, dependent: :destroy
Expand Down Expand Up @@ -130,13 +129,14 @@ def send_notifications
end

def subjects_count
@subject_count ||= if live_subject_sets.loaded?
live_subject_sets.inject(0) do |sum,set|
sum + set.set_member_subjects_count
@subjects_count ||=
if active_workflows.loaded?
active_workflows.inject(0) do |sum,workflow|
sum + workflow.real_set_member_subjects_count
end
else
active_workflows.sum :real_set_member_subjects_count
end
else
live_subject_sets.sum :set_member_subjects_count
end
end

def retired_subjects_count
Expand Down
8 changes: 1 addition & 7 deletions app/models/workflow.rb
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,7 @@ def subject_selector
end

def subjects_count
@subject_count ||= if subject_sets.loaded?
subject_sets.inject(0) do |sum,set|
sum + set.set_member_subjects_count
end
else
subject_sets.sum :set_member_subjects_count
end
real_set_member_subjects_count
end

def retired_subjects_count
Expand Down
1 change: 1 addition & 0 deletions app/workers/calculate_project_completeness_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def perform(project_id)
Project.transaction do
project_workflows = project.workflows.select(
:id,
:real_set_member_subjects_count,
:retired_set_member_subjects_count
)
project_workflows.each do |workflow|
Expand Down
4 changes: 3 additions & 1 deletion app/workers/refresh_workflow_status_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@ class RefreshWorkflowStatusWorker

def perform(workflow_id)
if Workflow.where(id: workflow_id).exists?
# run the first worker manually to ensure we don't have state race
# run the first two workers manually to ensure we don't have state race
# conditions between workers and/or db transactions
# and to ensure the retired count worker has the latest real subjects count
UnfinishWorkflowWorker.new.perform(workflow_id)
WorkflowSubjectsCountWorker.new.perform(workflow_id)
WorkflowRetiredCountWorker.perform_async(workflow_id)
end
end
Expand Down
6 changes: 6 additions & 0 deletions app/workers/subject_set_subject_counter_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,14 @@ class SubjectSetSubjectCounterWorker
lock: :until_executing

def perform(subject_set_id)
# recount this set's subjects
set = SubjectSet.find(subject_set_id)
set.update_column(:set_member_subjects_count, set.set_member_subjects.count)
set.touch

# recount the subjects for each workflow this set is in
set.workflow_ids.each do |workflow_id|
WorkflowSubjectsCountWorker.perform_async(workflow_id)
end
end
end
23 changes: 23 additions & 0 deletions app/workers/workflow_subjects_count_worker.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
class WorkflowSubjectsCountWorker
include Sidekiq::Worker

sidekiq_options queue: :data_low

sidekiq_options congestion: {
interval: 60,
max_in_interval: 1,
min_delay: 60,
reject_with: :reschedule,
key: ->(workflow_id) {
"workflow_#{workflow_id}_subjects_count_worker"
}
}

sidekiq_options lock: :until_executing

def perform(workflow_id)
workflow = Workflow.find_without_json_attrs(workflow_id)
counter = WorkflowCounter.new(workflow)
workflow.update_column(:real_set_member_subjects_count, counter.subjects)
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class AddSubjectsCounterCacheToWorkflow < ActiveRecord::Migration
def change
add_column :workflows, :real_set_member_subjects_count, :integer, default: 0, null: false
end
end
5 changes: 4 additions & 1 deletion db/structure.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1973,7 +1973,8 @@ CREATE TABLE public.workflows (
minor_version integer DEFAULT 0 NOT NULL,
published_version_id integer,
steps jsonb DEFAULT '[]'::jsonb NOT NULL,
serialize_with_project boolean DEFAULT true
serialize_with_project boolean DEFAULT true,
real_set_member_subjects_count integer DEFAULT 0 NOT NULL
);


Expand Down Expand Up @@ -4774,3 +4775,5 @@ INSERT INTO schema_migrations (version) VALUES ('20190507103007');

INSERT INTO schema_migrations (version) VALUES ('20190524111214');

INSERT INTO schema_migrations (version) VALUES ('20190624094308');

9 changes: 9 additions & 0 deletions lib/tasks/migrate.rake
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,15 @@ namespace :migrate do
minor_version: workflow.primary_content.current_version_number.to_i
end
end

desc "Backfill new real_set_member_subjects_count values for cached subjects_count"
task :backfill_real_set_member_subjects => :environment do
non_backfilled_scope = Workflow.where(real_set_member_subjects_count: 0)
non_backfilled_scope.find_each do |workflow|
counter = WorkflowCounter.new(workflow)
workflow.update!(real_set_member_subjects_count: counter.subjects)
end
end
end

namespace :subject_workflow_status do
Expand Down
66 changes: 43 additions & 23 deletions spec/counters/workflow_counter_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@
let(:workflow) { create(:workflow_with_subjects, num_sets: 1) }
let(:counter) { WorkflowCounter.new(workflow) }

shared_examples "it ignores training data" do
let(:workflow) { create(:workflow_with_subjects, num_sets: 2) }
let(:training_set) { workflow.subject_sets.first }
let(:real_set) { workflow.subject_sets.last }
before do
real_set_ar_collection_proxy = SubjectSet.where(id: real_set)
allow(workflow).to receive(:non_training_subject_sets).and_return(real_set_ar_collection_proxy)
end

it "should return non training data retired count only" do
expect(counted).to eq(expected_count)
end
end

describe 'classifications' do

it "should return 0 if there are none" do
Expand Down Expand Up @@ -42,18 +56,9 @@
end
end

context "with subject sets marked as training data" do
let(:workflow) { create(:workflow_with_subjects, num_sets: 2) }
let(:training_set) { workflow.subject_sets.first }
let(:real_set) { workflow.subject_sets.last }
before do
real_set_ar_collection_proxy = SubjectSet.where(id: real_set)
allow(workflow).to receive(:non_training_subject_sets).and_return(real_set_ar_collection_proxy)
end

it "should return non training data classification count only" do
expect(counter.classifications).to eq(2)
end
it_behaves_like "it ignores training data" do
let(:counted) { counter.classifications }
let(:expected_count) { 2 }
end
end
end
Expand Down Expand Up @@ -89,18 +94,33 @@
expect(counter.retired_subjects).to eq(0)
end

context "with subject sets marked as training data" do
let(:workflow) { create(:workflow_with_subjects, num_sets: 2) }
let(:training_set) { workflow.subject_sets.first }
let(:real_set) { workflow.subject_sets.last }
before do
real_set_ar_collection_proxy = SubjectSet.where(id: real_set)
allow(workflow).to receive(:non_training_subject_sets).and_return(real_set_ar_collection_proxy)
end
it_behaves_like "it ignores training data" do
let(:counted) { counter.retired_subjects }
let(:expected_count) { 2 }
end
end
end

it "should return non training data retired count only" do
expect(counter.retired_subjects).to eq(2)
end
describe 'subjects' do
it "should return 2" do
expect(counter.subjects).to eq(2)
end

it "should return 0 when a subject set was unlinked" do
workflow.subject_sets = []
expect(counter.subjects).to eq(0)
end

it_behaves_like "it ignores training data" do
let(:counted) { counter.subjects }
let(:expected_count) { 2 }
end

context "with no subjects" do
let(:workflow) { create(:workflow) }

it "should return 0 if there are none" do
expect(counter.subjects).to eq(0)
end
end
end
Expand Down
3 changes: 1 addition & 2 deletions spec/factories/projects.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@
after(:create) do |p|
p.avatar = create(:medium, type: "project_avatar", linked: p)
p.background = create(:medium, type: "project_background", linked: p)
workflow = create(:workflow, project: p)
create_list(:subject_set_with_subjects, 2, project: p, workflows: [workflow])
p.workflows = [ create(:workflow_with_subjects, project: p) ]
end
end

Expand Down
3 changes: 2 additions & 1 deletion spec/factories/workflows.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@
end

factory :workflow_with_subjects do
ignore do
transient do
num_sets 2
end

after(:create) do |w, evaluator|
create_list(:subject_set_with_subjects, evaluator.num_sets, workflows: [w], project: w.project)
w.update_column(:real_set_member_subjects_count, w.non_training_subject_sets.sum(:set_member_subjects_count))
end
end

Expand Down
44 changes: 17 additions & 27 deletions spec/models/project_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
let(:not_owned) { build(:project, owner: nil) }
end

it_behaves_like "has subject_count"

it_behaves_like "activatable" do
let(:activatable) { project }
end
Expand Down Expand Up @@ -166,22 +164,6 @@
end
end

describe "#live_subject_sets" do
let(:project) { full_project }
let!(:unlinked_subject_set) do
create(:subject_set, project: project, num_workflows: 0)
end

it "should have many subject_sets" do
expect(project.live_subject_sets).not_to include(unlinked_subject_set)
end

it "should only get subject_sets from active workflows" do
inactive_workflow = create(:workflow_with_subjects, num_sets: 1, active: false, project: project)
expect(project.live_subject_sets).not_to include(inactive_workflow.subject_sets.first)
end
end

describe "#classifications" do
let(:relation_instance) { project }

Expand Down Expand Up @@ -317,29 +299,37 @@

describe "#subjects_count" do
before do
subject_sets.map { |set| set.update_column(:set_member_subjects_count, 1) }
active_workflows.map do |workflow|
workflow.update_column(:real_set_member_subjects_count, 1)
end
end

context "without a loaded association" do
let(:subject_sets) { SubjectSet.where(project_id: full_project.id) }
let(:active_workflows) do
Workflow.active.where(
active: true,
serialize_with_project: true,
project_id: full_project.id
)
end

it "should hit the db with a sum query" do
expect(full_project.live_subject_sets)
it "should hit the db with a sum query across active workflows" do
expect(full_project.active_workflows)
.to receive(:sum)
.with(:set_member_subjects_count)
.with(:real_set_member_subjects_count)
.and_call_original
expect(full_project.subjects_count).to eq(2)
expect(full_project.subjects_count).to eq(1)
end
end

context "with a loaded assocation" do
let(:subject_sets) { full_project.live_subject_sets }
let(:active_workflows) { full_project.active_workflows }

it "should use the association values" do
expect(full_project.live_subject_sets)
expect(full_project.active_workflows)
.to receive(:inject)
.and_call_original
expect(full_project.subjects_count).to eq(2)
expect(full_project.subjects_count).to eq(1)
end
end
end
Expand Down
8 changes: 6 additions & 2 deletions spec/models/workflow_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
let(:locked_update) { {display_name: "A Different Name"} }
end

it_behaves_like "has subject_count"

it_behaves_like "is translatable" do
let(:model) { create :workflow }
end
Expand Down Expand Up @@ -349,6 +347,12 @@
end
end

describe "#subjects_count" do
it "should be an alias for real_set_member_subjects count" do
expect(subject_relation.subjects_count).to eq(subject_relation.real_set_member_subjects_count)
end
end

describe "#finished?" do
let(:workflow) { subject_relation }
let(:subjects_count) { workflow.subjects_count }
Expand Down
12 changes: 0 additions & 12 deletions spec/support/subject_counts.rb

This file was deleted.

Loading

0 comments on commit 5da1aba

Please sign in to comment.