# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. amazon_s3 = { plugin_version = "2.0" info.1.manufacturer = "Amazon" info.1.device = "S3" info.1.version.1 = "" # 2011-12-19 - 1.0 - GMF - Initial creation, based on apache_combined # 2012-01-09 - 2.0 - GMF - implemented fields as documented: http://docs.amazonwebservices.com/AmazonS3/latest/dev/LogFormat.html # The name of the log format log.format.format_label = "Amazon S3 Log Format" log.miscellaneous.log_data_type = "http_access" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^[a-z0-9]{64} [^ ]* \\[../.../....[: ]..:..:...*\\] [^ ]+ [^ ]+ [^ ]+ [^ ]+ [^ ]+ \"[A-Z]* [^ ]*(\"| HTTP.*\") [0-9]* [^ ]+ [^ ]+ [^ ]+ [^ ]+ [^ ]+ \"[^\"]*\" \"[^\"]*\" [^ ]+$" # Treat fields surrounded by square brackets (e.g. the date/time field) as a single quoted field. log.format.treat_brackets_as_quotes = "true" log.format.ignore_format_lines = "true" # The format of dates and times in this log log.format.date_format = "dd/mmm/yyyy:hh:mm:ss" log.format.time_format = "dd/mmm/yyyy:hh:mm:ss" # Log fields log.fields = { bucket_owner.index = 1 bucket.index = 2 date_time = { type = "date_time" index = 3 subindex = 1 } # date_time remote_ip = { type = "host" index = 4 } # remote_ip requester.index = 5 requester_id.index = 6 operation.index = 7 key.index = 8 # request_uri.index = 9 # operation = { # index = 10 # subindex = 1 # } # operation request_uri = { type = "page" index = 9 subindex = 2 } # request_uri # protocol = { # index = 10 # subindex = 3 # } # protocol http_status.index = 10 error_code.index = 11 bytes_sent.index = 12 object_size.index = 13 total_time.index = 14 turnaround_time.index = 15 hit_type = "" referrer = { type = "URL" index = 16 hierarchy_dividers = "/?" } # referrer agent = { type = "agent" index = 17 } # agent spiders = "" worms = "" errors = "" broken_links = "" screen_info_hits = "" } # log.fields # Database fields database.fields = { # bucket_owner = "" bucket = "" date_time = "" day_of_week = "" hour_of_day = "" hit_type = "" remote_ip = "" location = "" organization = "" domain_description = "" isp = "" domain = "" requester = "" # requester_id = "" operation = "" key = "" request_uri = { suppress_bottom = 9 display_format_type = "page" } # page file_type = "" worm = "" screen_dimensions = "" screen_depth = "" http_status = "" error_code = "" referrer = { suppress_top = 1 suppress_bottom = 3 } # referrer search_engine = "" search_phrase = "" referrer_description = "" web_browser = "" operating_system = "" spider = "" } # database.fields # Get search engine and search phrase information from the referrer field (before it gets simplified). log.parsing_filters.compute_se_sp = ` if (get_search_engine_info(referrer)) then ( search_engine = volatile.search_engine; search_phrase = volatile.search_phrase; ); ` # Get web browser, operating system, web browser, and spider information from the user-agent field. log.parsing_filters.derive_from_user_agent = ` get_user_agent_info(agent); web_browser = volatile.web_browser; operating_system = volatile.operating_system; spider = volatile.spider; ` # Log Filters log.filters = { simplify_referrer = { label = "$lang_admin.log_filters.simplify_referrer_label" comment = "$lang_admin.log_filters.simplify_referrer_comment" value = "if (referrer eq '-') then referrer = '(no referrer)' else if (matches_regular_expression(referrer, '^([^:]+://[^/]+/)')) then referrer = $1 . '(omitted)'" } # simplify_referrer internal_referrer = { label = "$lang_admin.log_filters.internal_referrer_label" comment = "$lang_admin.log_filters.internal_referrer_comment" value = "if (contains(referrer, 'mydomain.com/')) then referrer = '(internal referrer)';" disabled = true } # internal_referrer # not_authenticated = { # label = "$lang_admin.log_filters.not_authenticated_label" # comment = "$lang_admin.log_filters.not_authenticated_comment" # value = "if (authenticated_user eq '-') then authenticated_user = '(not authenticated)';" # } # not_authenticated remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(page, '?')) then page = substr(page, 0, index(page, '?') + 1) . '(parameters)';" } # remove_query categorize = { label = "$lang_admin.log_filters.categorize_hits_label" comment = "$lang_admin.log_filters.categorize_hits_comment" value = `if (starts_with(http_status, '4')) then ( errors = 1; if (http_status eq '404') then ( broken_links = 1; hit_type = "broken link"; ) else ( hit_type = "error"; ) ) else if (!starts_with(spider, '(')) then ( hit_type = "spider"; spiders = 1; ) else if (!starts_with(worm, '(')) then ( hit_type = "worm"; worms = 1; ) else if (!starts_with(screen_dimensions, '(')) then ( hit_type = "screen info"; screen_info_hits = 1; ) else if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then ( hit_type = "hit"; ) else ( hit_type = "page view"; page_views = 1; )` } # categorize mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'hits = 1;' } # mark_entry set_page_for_worm = { label = "$lang_admin.log_filters.set_page_for_worm_label" comment = "$lang_admin.log_filters.set_page_for_worm_comment" value = "if (worms == 1) then page = '(worm)';" } # set_page_for_worm strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then page = substr(page, 0, last_index(page, '/') + 1) . '(nonpage)';" } # strip_non_page_views } # log.filters log.field_options = { sessions_page_field = "page" sessions_visitor_id_field = "hostname" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { hits = { label = "$lang_stats.field_labels.hits" default = false requires_log_field = false type = "int" display_format_type = "integer" entries_field = true } # hits page_views = { label = "$lang_stats.field_labels.page_views" default = true requires_log_field = false type = "int" display_format_type = "integer" } # page_views spiders = { label = "$lang_stats.field_labels.spiders" default = true requires_log_field = false type = "int" display_format_type = "integer" } # spiders worms = { label = "$lang_stats.field_labels.worms" default = false requires_log_field = false type = "int" display_format_type = "integer" } # worms errors = { label = "$lang_stats.field_labels.errors" default = false requires_log_field = false type = "int" display_format_type = "integer" } # errors broken_links = { label = "$lang_stats.field_labels.broken_links" default = true requires_log_field = false type = "int" display_format_type = "integer" } # broken_links screen_info_hits = { label = "$lang_stats.field_labels.screen_info_hits" default = false requires_log_field = false type = "int" display_format_type = "integer" } # screen_info_hits visitors = { label = "$lang_stats.field_labels.visitors" default = false requires_log_field = true log_field = "hostname" type = "unique" display_format_type = "integer" } # visitors bytes_sent = { default = true type = "int" integer_bits = 64 display_format_type = "bandwidth" } # bytes_sent total_time = { default = false type = "int" integer_bits = 64 display_format_type = duration_milliseconds } # total_time turnaround_time = { default = false type = "int" integer_bits = 64 display_format_type = duration_milliseconds } # turnaround_time } # database.numerical_fields create_profile_wizard_options = { date_time_tracking = true host_tracking = true # How the reports should be grouped in the report menu report_groups = { date_time_group = "" hit_type = "" content_group = { bucket = true request_uri = true file_type = true broken_links = { label = "$lang_stats.menu.reports.broken_links" filter = "http_status within '404'" columns = { 0.field_name = "referrer" 1.field_name = "request_uri" } subtable = false } # broken_links } visitor_demographics_group = { remote_ip = true requester = true domain_description = true location = true organization = true isp = true domain = true authenticated_user = true } visitor_systems_group = { screen_dimensions = true screen_depth = true web_browser = true operating_system = true } referrer_group = { referrer = true referrer_description = true search_engine = true search_phrase = true search_phrase_by_search_engine = true } other_group = { http_status = true error_code = true worm = true spider = true key = true operation = true } } # report_groups } # create_profile_wizard_options } # amazon_s3