Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ Style/EmptyElse:
Style/EmptyMethod:
EnforcedStyle: compact

Style/FileName:
Enabled: false

Style/FormatString:
EnforcedStyle: percent

Expand Down Expand Up @@ -110,3 +107,26 @@ RSpec/NestedGroups:

RSpec/ContextWording:
Enabled: false

### Security -----------------------------------------------------------

Security/Open:
Enabled: false

### Naming -------------------------------------------------------------

Naming/FileName:
Enabled: false

Naming/MemoizedInstanceVariableName:
Exclude:
- 'lib/daru/io/exporters/excel.rb'

Naming/UncommunicativeMethodParamName:
AllowedNames:
- 'db'

### Lint ---------------------------------------------------------------

Lint/SplatKeywordArguments:
Enabled: false
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ group :optional do
gem 'mongo'
gem 'nokogiri'
gem 'redis'
gem 'request-log-analyzer', '~> 1.13.4'
gem 'roo', '~> 2.7.0'
gem 'rsruby'
gem 'snappy'
Expand Down
2 changes: 1 addition & 1 deletion daru-io.gemspec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
lib = File.expand_path('../lib', __FILE__)
lib = File.expand_path('lib', __dir__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require 'daru/io/version'

Expand Down
84 changes: 84 additions & 0 deletions lib/daru/io/importers/log.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
require 'daru/io/importers/base'

module Daru
module IO
module Importers
# Log Importer Class, that extends `read_rails_log` method
# to `Daru::DataFrame`
class Log < Base
Daru::DataFrame.register_io_module :read_rails_log, self

def initialize
optional_gem 'request-log-analyzer', '~> 1.13.4', requires: 'request_log_analyzer'
end

ORDERS = {
rails3: %i[method path ip timestamp line_type lineno source
controller action format params rendered_file
partial_duration status duration view db].freeze,

apache: %i[remote_host remote_logname user timestamp http_method
path http_version http_status bytes_sent referer
user_agent line_type lineno source].freeze,

amazon_s3: %i[bucket_owner bucket timestamp remote_ip requester request_id operation
key request_uri http_status error_code bytes_sent object_size total_time
turnaround_time referer user_agent line_type lineno source].freeze
}.freeze

RENAME_FIELDS = {
path: :resource_path
}.freeze

# Reads data from a log file
#
# @!method self.read(path, format: :rails3)
#
# @param path [String] Path to log file, where the dataframe is to be
# imported from.
#
# @param format [Symbol] Format of log file, which can be :rails3, :apache or :amazon_s3
# default format set to :rails3
#
# @return [Daru::IO::Importers::Log]
#
# @example Reading from rails log file
# instance = Daru::IO::Importers::Log.read("rails_test.log")
#
# @example Reading from apache log file
# instance = Daru::IO::Importers::Log.new.read("apache_test.log", format: :apache)
#
# @example Reading from amazon s3 log file
# instance = Daru::IO::Importers::Log.new.read("amazon_s3_test.log", format: :amazon_s3)
def read(path, format: :rails3)
@format = format
@file_data = RequestLogAnalyzer::Source::LogParser
.new(RequestLogAnalyzer::FileFormat.load(@format), source_files: path)
.map do |request|
ORDERS
.fetch(@format)
.map { |attr| request.attributes.include?(attr) ? request.attributes[attr] : nil }
end
self
end

# Imports a `Daru::DataFrame` from a Log Importer instance and log file
#
# @return [Daru::DataFrame]
#
# @example Reading from a log file
# df = instance.call
#
# => #<Daru::DataFrame(150x17)>
# # method resource_path ip timestamp line_type lineno source contr...
# # 0 GET / 127.0.0.1 2018022607 completed 5 /home/roh Rails...
# # 1 GET / 127.0.0.1 2018022716 completed 12 /home/roh Rails...
# # ... ... ... ... ... ... ... ... ...
def call
Daru::DataFrame.rows(@file_data, order: ORDERS.fetch(@format)
.map { |attr| RENAME_FIELDS.fetch(attr, attr) })
end
end
end
end
end
10 changes: 5 additions & 5 deletions lib/daru/io/importers/plaintext.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,14 @@ def process_row(row,empty)
end
end

def try_string_to_number(s)
case s
def try_string_to_number(str)
case str
when INT_PATTERN
s.to_i
str.to_i
when FLOAT_PATTERN
s.tr(',', '.').to_f
str.tr(',', '.').to_f
else
s
str
end
end
end
Expand Down
1 change: 1 addition & 0 deletions lib/daru/io/link.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class << self
# | `Daru::DataFrame.read_json` | {Daru::IO::Importers::JSON#read} |
# | `Daru::DataFrame.from_mongo` | {Daru::IO::Importers::Mongo#from} |
# | `Daru::DataFrame.read_plaintext` | {Daru::IO::Importers::Plaintext#read} |
# | `Daru::DataFrame.read_rails_log` | {Daru::IO::Importers::RailsLog#read} |
# | `Daru::DataFrame.read_rdata` | {Daru::IO::Importers::RData#read} |
# | `Daru::DataFrame.read_rds` | {Daru::IO::Importers::RDS#read} |
# | `Daru::DataFrame.from_redis` | {Daru::IO::Importers::Redis#from} |
Expand Down
46 changes: 46 additions & 0 deletions spec/daru/io/importers/log_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
RSpec.describe Daru::IO::Importers::Log.new do
context 'parsing rails log' do
subject { described_class.read(path,format: :rails3).call }

let(:path) { 'spec/fixtures/log/rails.log' }

it_behaves_like 'exact daru dataframe',
ncols: 17,
nrows: 1,
order: %i[method resource_path ip timestamp line_type lineno
source controller action format params rendered_file
partial_duration status duration view db],
:'timestamp.to_a' => [20_180_312_174_118],
:'duration.to_a' => [0.097]
end

context 'parsing apache log' do
subject { described_class.read(path,format: :apache).call }

let(:path) { 'spec/fixtures/log/apache.log' }

it_behaves_like 'exact daru dataframe',
ncols: 14,
nrows: 1,
order: %i[remote_host remote_logname user timestamp http_method
resource_path http_version http_status bytes_sent
referer user_agent line_type lineno source],
:'timestamp.to_a' => [20_161_207_103_443],
:'bytes_sent.to_a' => [571]
end

context 'parsing amazon_s3 log' do
subject { described_class.read(path,format: :amazon_s3).call }

let(:path) { 'spec/fixtures/log/s3.log' }

it_behaves_like 'exact daru dataframe',
ncols: 20,
nrows: 1,
order: %i[bucket_owner bucket timestamp remote_ip requester request_id operation
key request_uri http_status error_code bytes_sent object_size total_time
turnaround_time referer user_agent line_type lineno source],
:'timestamp.to_a' => [20_150_612_054_010],
:'turnaround_time.to_a' => [0.019]
end
end
1 change: 1 addition & 0 deletions spec/fixtures/log/apache.log
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
77.179.66.156 - - [07/Dec/2016:10:34:43 +0100] "GET /favicon.ico HTTP/1.1" 404 571 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36"
7 changes: 7 additions & 0 deletions spec/fixtures/log/rails.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Started GET "/articles/9" for 127.0.0.1 at 2018-03-12 17:41:18 +0530
Processing by ArticlesController#show as HTML
Parameters: {"id"=>"9"}
Article Load (1.4ms) SELECT "articles".* FROM "articles" WHERE "articles"."id" = ? LIMIT ? [["id", 9], ["LIMIT", 1]]
Rendering articles/show.html.erb within layouts/application
Rendered articles/show.html.erb within layouts/application (2.9ms)
Completed 200 OK in 97ms (Views: 50.6ms | ActiveRecord: 1.4ms)
1 change: 1 addition & 0 deletions spec/fixtures/log/s3.log
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
8aefdcbf18ef13fe9f82be73697a945f423e4299c995e9e96bb619975db40bd6 drat [12/Jun/2015:05:40:10 +0000] 208.54.5.245 - 40D2FE0D1C76A065 WEBSITE.GET.OBJECT file "GET /file HTTP/1.1" 200 - 5 5 20 19 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0" -