Permalink
Browse files

Added initial library content.

  • Loading branch information...
1 parent ca99a08 commit e8121bde573e6154ab3a07907696f506124e4b2f @watsonian committed Jan 27, 2010
Showing with 158 additions and 18 deletions.
  1. +1 −1 LICENSE
  2. +44 −12 README.rdoc
  3. +2 −2 Rakefile
  4. +1 −0 VERSION
  5. +53 −0 apache_log_parser.gemspec
  6. +54 −0 lib/apache_log_parser.rb
  7. +3 −3 spec/apache_log_parser_spec.rb
View
@@ -1,4 +1,4 @@
-Copyright (c) 2009 watsonian
+Copyright (c) 2009 Joel Watson
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
View
@@ -1,17 +1,49 @@
-= apache_log_parser
+= Getting Started
-Description goes here.
+This is a library designed to easily parse and access standard Apache log files.
-== Note on Patches/Pull Requests
-
-* Fork the project.
-* Make your feature addition or bug fix.
-* Add tests for it. This is important so I don't break it in a
- future version unintentionally.
-* Commit, do not mess with rakefile, version, or history.
- (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
-* Send me a pull request. Bonus points for topic branches.
+To get started, require the library:
+
+ require 'apache_log_parser'
+
+From there, you would use it as follows:
+
+ ApacheLogParser.parse(logfile, rules) do |parsed|
+ parsed[:ip] #=> "12.12.12.12"
+ parsed[:date] #=> "21/Jan/2010"
+ parsed[:day] #=> 21
+ parsed[:month] #=> "Jan"
+ parsed[:year] #=> 2010
+ parsed[:hour] #=> 14
+ parsed[:zone] #=> "-0800"
+ parsed[:method] #=> "GET"
+ parsed[:resource] #=> "/some/page.php"
+ parsed[:status] #=> 200
+ parsed[:size] #=> "7047"
+ parsed[:referer] #=> "-"
+ parsed[:user_agent] #=> "Mozilla/5.0 (Macintosh; U; Intel..."
+ end
+
+The logfile parameter is simply the path to the logfile in question and the rules
+parameter is a hash of rules to filter the logfile with.
+
+= Using Rules
+
+To use rules, simply build a hash with options you want to filter with as follows:
+
+ rules = {}
+ rules[:hour] = 11..13 # only accept hits between 1100 and 1300 hours
+ rules[:day] = 21 # only accept hits where the day is 21
+ rules[:date] = "12/Jan/2010" # only accept hits on Jan 12, 2010
+ rules[:method] = "GET" # only accept hits where the request method is GET
+ rules[:status] = 404 # only accept hits where the status response is 404
+
+Rules are inclusive, so only hits where ALL rules are met will be kept. From there,
+you would simply pass the rules hash into the parse method as shown above. The logfile
+is read one line at a time, so the memory footprint is quite small and can easily handle
+large logfiles (sizes as large as 6GB have been tested). The larger the file, the longer
+the parse process will take though.
== Copyright
-Copyright (c) 2010 watsonian. See LICENSE for details.
+Copyright (c) 2010 Joel Watson. See LICENSE for details.
View
@@ -5,8 +5,8 @@ begin
require 'jeweler'
Jeweler::Tasks.new do |gem|
gem.name = "apache_log_parser"
- gem.summary = %Q{TODO: one-line summary of your gem}
- gem.description = %Q{TODO: longer description of your gem}
+ gem.summary = %Q{Library to easily parse standard Apache log files.}
+ gem.description = %Q{Library to easily parse standard Apache log files.}
gem.email = "watsonian@gmail.com"
gem.homepage = "http://github.com/watsonian/apache_log_parser"
gem.authors = ["watsonian"]
View
@@ -0,0 +1 @@
+1.0.0
View
@@ -0,0 +1,53 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
+# -*- encoding: utf-8 -*-
+
+Gem::Specification.new do |s|
+ s.name = %q{apache_log_parser}
+ s.version = "1.0.0"
+
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+ s.authors = ["watsonian"]
+ s.date = %q{2010-01-27}
+ s.description = %q{Library to easily parse standard Apache log files.}
+ s.email = %q{watsonian@gmail.com}
+ s.extra_rdoc_files = [
+ "LICENSE",
+ "README.rdoc"
+ ]
+ s.files = [
+ ".document",
+ ".gitignore",
+ "LICENSE",
+ "README.rdoc",
+ "Rakefile",
+ "lib/apache_log_parser.rb",
+ "spec/apache_log_parser_spec.rb",
+ "spec/spec.opts",
+ "spec/spec_helper.rb"
+ ]
+ s.homepage = %q{http://github.com/watsonian/apache_log_parser}
+ s.rdoc_options = ["--charset=UTF-8"]
+ s.require_paths = ["lib"]
+ s.rubygems_version = %q{1.3.5}
+ s.summary = %q{Library to easily parse standard Apache log files.}
+ s.test_files = [
+ "spec/apache_log_parser_spec.rb",
+ "spec/spec_helper.rb"
+ ]
+
+ if s.respond_to? :specification_version then
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+ s.specification_version = 3
+
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
+ else
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
+ end
+ else
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
+ end
+end
+
View
@@ -0,0 +1,54 @@
+class ApacheLogParser
+ def self.parse(filename, rules={}, &block)
+ rules = process_rules(rules)
+ parse_file(filename, rules, &block)
+ end
+
+ private
+ def self.parse_line(line)
+ m = line.match(/^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}).*?(([0-9]{1,2})\/(.*?)\/([0-9]{4})):(([0-9]{2}):[0-9]{2}:[0-9]{2})\s*(.*?)\]\s*"([\w]*)\s(.*?)\s.*?"\s([0-9]{3})\s(.*?)\s*"(.*?)"\s*"(.*?)"/)
+ if m
+ {:ip => m[1],
+ :date => m[2],
+ :day => m[3].to_i,
+ :month => m[4],
+ :year => m[5].to_i,
+ :time => m[6],
+ :hour => m[7].to_i,
+ :zone => m[8],
+ :method => m[9],
+ :resource => m[10],
+ :status => m[11].to_i,
+ :size => m[12],
+ :referer => m[13],
+ :user_agent => m[14]}
+ else
+ {}
+ end
+ end
+
+ def self.parse_file(filename, rules={}, &block)
+ File.foreach(filename) do |line|
+ parsed = parse_line(line)
+ if rules.any?
+ # stop parsing the file if we're past the designated hour range
+ break if rules[:hour] && Array(parsed[:hour]).last > Array(rules[:hour]).last
+
+ # go to the next line if there are any rules that are not matched by this line
+ next if rules.reject{|k,v| Array(v).include?(parsed[k]) }.any?
+ end
+ yield parsed
+ end
+ end
+
+ def self.process_rules(rules)
+ #default_options = {:date => Time.now.strftime("#{"%02d" % rules[:day] || "%d"}/%h/%Y")}
+ #rules = default_options.merge(rules)
+ if rules[:date]
+ rules[:day], rules[:month], rules[:year] = rules[:date].split("/")
+ rules[:day] = rules[:day].to_i
+ rules[:year] = rules[:year].to_i
+ end
+ rules
+ end
+end
@@ -1,7 +1,7 @@
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
describe "ApacheLogParser" do
- it "fails" do
- fail "hey buddy, you should probably rename this file and start specing for real"
- end
+ # it "fails" do
+ # fail "hey buddy, you should probably rename this file and start specing for real"
+ # end
end

0 comments on commit e8121bd

Please sign in to comment.