Skip to content
Permalink
Browse files

normalize hostname and pathname columns

to save on disk storage
  • Loading branch information...
dannyvankooten committed Nov 12, 2018
1 parent 69986d9 commit dbcadcd73772258f2d2fd781673e559d5ed74dba
Showing with 415 additions and 43 deletions.
  1. BIN mc4wp.db.orig
  2. +35 −15 pkg/aggregator/store.go
  3. +6 −2 pkg/datastore/datastore.go
  4. +30 −0 pkg/datastore/sqlstore/hostnames.go
  5. +8 −0 pkg/datastore/sqlstore/migrations/mysql/12_create_hostnames_table.sql
  6. +5 −0 pkg/datastore/sqlstore/migrations/mysql/13_create_unique_hostname_index.sql
  7. +8 −0 pkg/datastore/sqlstore/migrations/mysql/14_create_pathnames_table.sql
  8. +5 −0 pkg/datastore/sqlstore/migrations/mysql/15_create_unique_pathname_index.sql
  9. +6 −0 pkg/datastore/sqlstore/migrations/mysql/16_fill_hostnames_table.sql
  10. +6 −0 pkg/datastore/sqlstore/migrations/mysql/17_fill_pathnames_table.sql
  11. +24 −0 pkg/datastore/sqlstore/migrations/mysql/18_alter_page_stats_table.sql
  12. +24 −0 pkg/datastore/sqlstore/migrations/mysql/19_alter_referrer_stats_table.sql
  13. +7 −0 pkg/datastore/sqlstore/migrations/mysql/20_recreate_stats_indices.sql
  14. +8 −0 pkg/datastore/sqlstore/migrations/postgres/13_create_hostnames_table.sql
  15. +5 −0 pkg/datastore/sqlstore/migrations/postgres/14_create_unique_hostname_index.sql
  16. +8 −0 pkg/datastore/sqlstore/migrations/postgres/15_create_pathnames_table.sql
  17. +5 −0 pkg/datastore/sqlstore/migrations/postgres/16_create_unique_pathname_index.sql
  18. +6 −0 pkg/datastore/sqlstore/migrations/postgres/17_fill_hostnames_table.sql
  19. +6 −0 pkg/datastore/sqlstore/migrations/postgres/18_fill_pathnames_table.sql
  20. +24 −0 pkg/datastore/sqlstore/migrations/postgres/19_alter_page_stats_table.sql
  21. +24 −0 pkg/datastore/sqlstore/migrations/postgres/20_alter_referrer_stats_table.sql
  22. +7 −0 pkg/datastore/sqlstore/migrations/postgres/21_recreate_stats_indices.sql
  23. +8 −0 pkg/datastore/sqlstore/migrations/sqlite3/12_create_hostnames_table.sql
  24. +5 −0 pkg/datastore/sqlstore/migrations/sqlite3/13_create_unique_hostname_index.sql
  25. +8 −0 pkg/datastore/sqlstore/migrations/sqlite3/14_create_pathnames_table.sql
  26. +5 −0 pkg/datastore/sqlstore/migrations/sqlite3/15_create_unique_pathname_index.sql
  27. +6 −0 pkg/datastore/sqlstore/migrations/sqlite3/16_fill_hostnames_table.sql
  28. +6 −0 pkg/datastore/sqlstore/migrations/sqlite3/17_fill_pathnames_table.sql
  29. +24 −0 pkg/datastore/sqlstore/migrations/sqlite3/18_alter_page_stats_table.sql
  30. +25 −0 pkg/datastore/sqlstore/migrations/sqlite3/19_alter_referrer_stats_table.sql
  31. +9 −0 pkg/datastore/sqlstore/migrations/sqlite3/20_recreate_stats_indices.sql
  32. +0 −1 pkg/datastore/sqlstore/migrations/sqlite3/7_add_site_id_to_site_stats_table.sql
  33. +0 −1 pkg/datastore/sqlstore/migrations/sqlite3/8_add_site_id_to_page_stats_table.sql
  34. +0 −1 pkg/datastore/sqlstore/migrations/sqlite3/9_add_site_id_to_referrer_stats_table.sql
  35. +14 −11 pkg/datastore/sqlstore/page_stats.go
  36. +30 −0 pkg/datastore/sqlstore/pathnames.go
  37. +14 −12 pkg/datastore/sqlstore/referrer_stats.go
  38. +2 −0 pkg/models/page_stats.go
  39. +2 −0 pkg/models/referrer_stats.go
BIN +10.3 MB mc4wp.db.orig
Binary file not shown.
@@ -40,18 +40,28 @@ func (agg *Aggregator) getPageStats(r *results, siteID int64, t time.Time, hostn
return stats, nil
}

stats, err := agg.database.GetPageStats(siteID, t, hostname, pathname)
hostnameID, err := agg.database.HostnameID(hostname)
if err != nil {
return nil, err
}

pathnameID, err := agg.database.PathnameID(pathname)
if err != nil {
return nil, err
}

stats, err := agg.database.GetPageStats(siteID, t, hostnameID, pathnameID)
if err != nil && err != datastore.ErrNoResults {
return nil, err
}

if stats == nil {
stats = &models.PageStats{
SiteID: siteID,
New: true,
Hostname: hostname,
Pathname: pathname,
Date: t,
SiteID: siteID,
New: true,
HostnameID: hostnameID,
PathnameID: pathnameID,
Date: t,
}

}
@@ -66,24 +76,34 @@ func (agg *Aggregator) getReferrerStats(r *results, siteID int64, t time.Time, h
return stats, nil
}

hostnameID, err := agg.database.HostnameID(hostname)
if err != nil {
return nil, err
}

pathnameID, err := agg.database.PathnameID(pathname)
if err != nil {
return nil, err
}

// get from db
stats, err := agg.database.GetReferrerStats(siteID, t, hostname, pathname)
stats, err := agg.database.GetReferrerStats(siteID, t, hostnameID, pathnameID)
if err != nil && err != datastore.ErrNoResults {
return nil, err
}

if stats == nil {
stats = &models.ReferrerStats{
SiteID: siteID,
New: true,
Hostname: hostname,
Pathname: pathname,
Date: t,
Group: "",
SiteID: siteID,
New: true,
HostnameID: hostnameID,
PathnameID: pathnameID,
Date: t,
Group: "",
}

// TODO: Abstract this
if strings.Contains(stats.Hostname, "www.google.") {
// TODO: Abstract this so we can add more groupings
if strings.Contains(hostname, "www.google.") {
stats.Group = "Google"
}
}
@@ -45,17 +45,21 @@ type Datastore interface {
DeletePageviews([]*models.Pageview) error

// page stats
GetPageStats(int64, time.Time, string, string) (*models.PageStats, error)
GetPageStats(int64, time.Time, int64, int64) (*models.PageStats, error)
SavePageStats(*models.PageStats) error
GetAggregatedPageStats(int64, time.Time, time.Time, int64) ([]*models.PageStats, error)
GetAggregatedPageStatsPageviews(int64, time.Time, time.Time) (int64, error)

// referrer stats
GetReferrerStats(int64, time.Time, string, string) (*models.ReferrerStats, error)
GetReferrerStats(int64, time.Time, int64, int64) (*models.ReferrerStats, error)
SaveReferrerStats(*models.ReferrerStats) error
GetAggregatedReferrerStats(int64, time.Time, time.Time, int64) ([]*models.ReferrerStats, error)
GetAggregatedReferrerStatsPageviews(int64, time.Time, time.Time) (int64, error)

// hostnames
HostnameID(name string) (int64, error)
PathnameID(name string) (int64, error)

// misc
Health() error
Close() error
@@ -0,0 +1,30 @@
package sqlstore

import (
"database/sql"
)

func (db *sqlstore) HostnameID(name string) (int64, error) {
var id int64
query := db.Rebind("SELECT id FROM hostnames WHERE name = ? LIMIT 1")
err := db.Get(&id, query, name)

if err == sql.ErrNoRows {
// Postgres does not support LastInsertID, so use a "... RETURNING" select query
query := db.Rebind(`INSERT INTO hostnames(name) VALUES(?)`)
if db.Driver == POSTGRES {
err := db.Get(&id, query+" RETURNING id", name)
return id, err
}

// MySQL and SQLite do support LastInsertID, so use that
r, err := db.Exec(query, name)
if err != nil {
return 0, err
}

return r.LastInsertId()
}

return id, err
}
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE hostnames(
id INTEGER AUTO_INCREMENT PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
) CHARACTER SET=utf8;

-- +migrate Down
DROP TABLE IF EXISTS hostnames;
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_hostnames_name ON hostnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_hostnames_name;
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE pathnames(
id INTEGER AUTO_INCREMENT PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
) CHARACTER SET=utf8;

-- +migrate Down
DROP TABLE IF EXISTS pathnames;
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_pathnames_name ON pathnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_pathnames_name;
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_page_stats;
INSERT IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_referrer_stats;

-- +migrate Down

@@ -0,0 +1,6 @@
-- +migrate Up
INSERT IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_page_stats;
INSERT IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_referrer_stats;

-- +migrate Down

@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_page_stats_old;
RENAME TABLE daily_page_stats TO daily_page_stats_old;
CREATE TABLE daily_page_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
entries INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
) CHARACTER SET=utf8;
INSERT INTO daily_page_stats
SELECT site_id, h.id, p.id, pageviews, visitors, entries, bounce_rate, known_durations, avg_duration, date
FROM daily_page_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_page_stats_old;

-- +migrate Down

@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_referrer_stats_old;
RENAME TABLE daily_referrer_stats TO daily_referrer_stats_old;
CREATE TABLE daily_referrer_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
groupname VARCHAR(255) NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
) CHARACTER SET=utf8;
INSERT INTO daily_referrer_stats
SELECT site_id, h.id, p.id, groupname, pageviews, visitors, bounce_rate, known_durations, avg_duration, date
FROM daily_referrer_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_referrer_stats_old;

-- +migrate Down

@@ -0,0 +1,7 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_daily_page_stats ON daily_page_stats(site_id, hostname_id, pathname_id, date);
CREATE UNIQUE INDEX unique_daily_referrer_stats ON daily_referrer_stats(site_id, hostname_id, pathname_id, date);

-- +migrate Down
DROP INDEX unique_daily_page_stats ON daily_page_stats;
DROP INDEX unique_daily_referrer_stats ON daily_referrer_stats;
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE hostnames(
id SERIAL PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS hostnames;
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_hostnames_name ON hostnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_hostnames_name;
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE pathnames(
id SERIAL PRIMARY KEY NOT NULL,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS pathnames;
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_pathnames_name ON pathnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_pathnames_name;
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_page_stats;
INSERT INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_referrer_stats ON CONFLICT(name) DO NOTHING;

-- +migrate Down

@@ -0,0 +1,6 @@
-- +migrate Up
INSERT INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_page_stats ON CONFLICT(name) DO NOTHING; ;
INSERT INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_referrer_stats ON CONFLICT(name) DO NOTHING; ;

-- +migrate Down

@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_page_stats_old;
ALTER TABLE daily_page_stats RENAME TO daily_page_stats_old;
CREATE TABLE daily_page_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
entries INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
);
INSERT INTO daily_page_stats
SELECT site_id, h.id, p.id, pageviews, visitors, entries, bounce_rate, known_durations, avg_duration, date
FROM daily_page_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_page_stats_old;

-- +migrate Down

@@ -0,0 +1,24 @@
-- +migrate Up
DROP TABLE IF EXISTS daily_referrer_stats_old;
ALTER TABLE daily_referrer_stats RENAME TO daily_referrer_stats_old;
CREATE TABLE daily_referrer_stats(
site_id INTEGER NOT NULL DEFAULT 1,
hostname_id INTEGER NOT NULL,
pathname_id INTEGER NOT NULL,
groupname VARCHAR(255) NULL,
pageviews INTEGER NOT NULL,
visitors INTEGER NOT NULL,
bounce_rate FLOAT NOT NULL,
known_durations INTEGER NOT NULL DEFAULT 0,
avg_duration FLOAT NOT NULL,
date DATE NOT NULL
);
INSERT INTO daily_referrer_stats
SELECT site_id, h.id, p.id, groupname, pageviews, visitors, bounce_rate, known_durations, avg_duration, date
FROM daily_referrer_stats_old s
LEFT JOIN hostnames h ON h.name = s.hostname
LEFT JOIN pathnames p ON p.name = s.pathname;
DROP TABLE daily_referrer_stats_old;

-- +migrate Down

@@ -0,0 +1,7 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_daily_page_stats ON daily_page_stats(site_id, hostname_id, pathname_id, date);
CREATE UNIQUE INDEX unique_daily_referrer_stats ON daily_referrer_stats(site_id, hostname_id, pathname_id, date);

-- +migrate Down
DROP INDEX unique_daily_page_stats;
DROP INDEX unique_daily_referrer_stats;
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE hostnames(
id INTEGER PRIMARY KEY,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS hostnames;
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_hostnames_name ON hostnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_hostnames_name;
@@ -0,0 +1,8 @@
-- +migrate Up
CREATE TABLE pathnames(
id INTEGER PRIMARY KEY,
name VARCHAR(255) NOT NULL
);

-- +migrate Down
DROP TABLE IF EXISTS pathnames;
@@ -0,0 +1,5 @@
-- +migrate Up
CREATE UNIQUE INDEX unique_pathnames_name ON pathnames(name);

-- +migrate Down
DROP INDEX IF EXISTS unique_pathnames_name;
@@ -0,0 +1,6 @@
-- +migrate Up
INSERT OR IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_page_stats;
INSERT OR IGNORE INTO hostnames(name) SELECT DISTINCT(hostname) FROM daily_referrer_stats;

-- +migrate Down

@@ -0,0 +1,6 @@
-- +migrate Up
INSERT OR IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_page_stats;
INSERT OR IGNORE INTO pathnames(name) SELECT DISTINCT(pathname) FROM daily_referrer_stats;

-- +migrate Down

Oops, something went wrong.

0 comments on commit dbcadcd

Please sign in to comment.
You can’t perform that action at this time.