# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. generic_w3_cweb = { plugin_version = "1.1" # ????-??-?? - 1.0 - GMF - Initial creation # 2009-04-20 - 1.0.1 - KBB - Fixed bug where the absence of a cs_uri_stem field will cause an error. # Added an alternate strip_non_page_views filter to work with cs_uri. Moved some derived fields, and # some other fields that occur in one sample, into report groups. # 2009-10-28 - 1.1 - KBB - Switched to use log.filter_preprocessor for parsing to fix a problem # with errors when derived fields extracted by log filters did not exist as log fields, and also to # handle the case where the name of the referrer field is cs-referrer instead of cs-referer. The # field separator is tab or space based on #Fields line. Added numeric field filesize. # 2010-02-01 - GMF - GMF - Switched default separator from " " to "" # The name of the log format log.format.format_label = "W3C Web Server Log Format" log.miscellaneous.log_data_type = "generic_w3c" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression #log.format.autodetect_regular_expression = "^#Fields:( | )" # This version excludes vbrick_ethernetv_portal_server - todo: exclude other specific w3c formats. log.format.autodetect_expression = `matches_regular_expression(volatile.log_data_line, "^#Fields:( | )") and (volatile.log_data_line ne "#Fields: cs-username c-ip cs-uri-stem x-ip x-port c-max-bandwidth x-type s-ip time date x-duration c-os c-osversion")` # The format of dates and times in this log #log.format.date_format = "yyyy-mm-dd" #log.format.time_format = "hh:mm:ss" log.format.date_format = "auto" log.format.time_format = "auto" # Treat fields surrounded by square brackets (e.g. the date/time field) as a single quoted field. log.format.treat_brackets_as_quotes = "true" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( string fields = $1; string fieldname; v.logfieldindex = 1; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; # This subroutine creates a database field subroutine(create_database_field(string fieldname, int top, int bottom), ( #echo("create_database_field: " . fieldname); # debug debug_message("create_database_field(" . fieldname . ")\n"); string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; node databasefield = databasefieldpath; if (top ne 0) then set_subnode_value(databasefield, "suppress_top", top); if (bottom ne 0) then set_subnode_value(databasefield, "suppress_bottom", bottom); databasefield; )); subroutine(create_log_field(string fieldname, string type, bool withindex), ( debug_message("create_log_field(" . fieldname . "; type=" . type . ")\n"); string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; if (withindex) then ( set_subnode_value(logfield, "index", v.logfieldindex); v.logfieldindex++; ); set_subnode_value(logfield, "subindex", 0); if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Assume there isn't a localtime field until we see one. v.parse_localtime = false; # Get the log format separator. Assume at least two fields. # 2010-02-01 - GMF - Changed the default from " " to "" (any whitespace). With " " as the default, data which # has a spaces in the header, but tabs in the data, doesn't parse, because this assumes space # separators. Truly generic W3C header parsing appears to be impossible, which is why we can do # per-plugin parsing now; but that's just moved the problem here, where we'll still have to # fight it forever. Hopefully, "" gives better coverage to the universe of W3C variants than " ". # log.format.field_separator = " "; log.format.field_separator = ""; if (matches_regular_expression(fields, '^[^ ]+ [^ ]+')) then ( log.format.field_separator = " "; ); # Extract the fields on at a time while (matches_regular_expression(fields, '^([^ ]+)[ ](.*)$')) ( string unconverted_fieldname = $1; fields = $2; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_referrer') then ( fieldname = 'cs_referer'; ); if (fieldname eq 'cs_uri') then ( fieldname = 'cs_uri_stem'; ); if (fieldname eq 'cs_referer') then ( log_field_type = 'url'; ); else if (fieldname eq 'cs_uri_stem') then ( log_field_type = 'page'; ); else if (fieldname eq 'cs_user_agent') then ( log_field_type = 'agent'; ); else if (fieldname eq 'c_ip') then ( log_field_type = 'host'; ); # Create the log field create_log_field(fieldname, log_field_type, true); if (fieldname eq "localtime") then v.parse_localtime = true; # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle localtime by creating date_time and derived database fields if (fieldname eq "localtime") then ( create_log_field('date', '', false); create_log_field('time', '', false); create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if localtime # Handle date by creating date_time and derived database fields else if (fieldname eq "date") then ( create_log_field('localtime', '', false); # placeholder - 7/Nov/2006 - KBB create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if date else if (fieldname eq "time") then ( create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if time # Create derived fields for agent else if (fieldname eq "cs_user_agent") then ( create_database_field('operating_system', 0, 0); create_database_field('web_browser', 0, 0); create_database_field('spider', 0, 0); ); # Create database field cs_ip and derived field for client IP else if (fieldname eq "c_ip") then ( create_database_field('c_ip', 0, 0); create_database_field('location', 0, 0); ); # Create database field cs_referer and derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('cs_referer', 1, 9); create_database_field('search_engine', 0, 0); create_database_field('search_phrase', 0, 0); ); # Create derived file type field else if (fieldname eq "cs_uri_stem") then ( create_database_field('cs_uri_stem', 0, 9); create_database_field('file_type', 0, 0); ); # Don't add a database field for numerical fields else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname, 0, 0); ); # if creating profile ); # while another field # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` # Get search engine and search phrase information from the cs_referer field (before it gets simplified). log.parsing_filters.compute_se_sp = { value = `if (get_search_engine_info(cs_referer)) then ( search_engine = volatile.search_engine; search_phrase = volatile.search_phrase; ); ` requires_fields = { cs_referer = true } } # Get web browser, operating system, web browser, and spider information from the user-agent field. log.parsing_filters.derive_from_user_agent = { value = `get_user_agent_info(replace_all(cs_user_agent, '+', ' ')); web_browser = volatile.web_browser; operating_system = volatile.operating_system; spider = volatile.spider; ` requires_fields = { cs_user_agent = true } } # Log Filters log.filters = { simplify_referrer = { label = "$lang_admin.log_filters.simplify_referrer_label" comment = "$lang_admin.log_filters.simplify_referrer_comment" value = "if (cs_referer eq '-') then cs_referer = '(no referrer)' else if (matches_regular_expression(cs_referer, '^([^:]+://[^/]+/)')) then cs_referer = $1 . '(omitted)'" requires_fields = { cs_referer = true } } # simplify_referrer internal_referrer = { label = "$lang_admin.log_filters.internal_referrer_label" comment = "$lang_admin.log_filters.internal_referrer_comment" value = "if (contains(cs_referer, 'mydomain.com/')) then cs_referer = '(internal referrer)';" disabled = true requires_fields = { cs_referer = true } } # internal_referrer set_page_for_worm = { label = "$lang_admin.log_filters.set_page_for_worm_label" comment = "$lang_admin.log_filters.set_page_for_worm_comment" value = "if (starts_with(worm, '(')) then '' else cs_uri_stem = '(worm)';" requires_fields = { worm = true cs_uri_stem = true } } # set_page_for_worm # This filter tacks the page parameters ("URL query") onto the end of the page field empty_uri_query = { label = "$lang_admin.log_filters.empty_uri_query_label" comment = "$lang_admin.log_filters.empty_uri_query_comment" value = "if (cs_uri_query eq '-') then cs_uri_query = '(empty)';" disabled = true requires_fields = { cs_uri_query = true } } # empty_uri_query add_cs_uri_query = { label = "$lang_admin.log_filters.add_cs_uri_query_label" comment = "$lang_admin.log_filters.add_cs_uri_query_comment" value = "cs_uri_stem = cs_uri_stem . '?' . cs_uri_query" disabled = true requires_fields = { cs_uri_query = true cs_uri_stem = true } } # add_cs_uri_query detect_page_views = { label = '$lang_admin.log_filters.detect_page_views_label' comment = '$lang_admin.log_filters.detect_page_views_comment' value = "if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then page_views = 0; else page_views = 1;" requires_fields = { file_type = true } } # detect_page_views strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then cs_uri_stem = substr(cs_uri_stem, 0, last_index(cs_uri_stem, '/') + 1) . '(nonpage)';" requires_fields = { cs_uri_stem = true } } # strip_non_page_views # strip_non_page_views2 = { # label = '$lang_admin.log_filters.strip_non_page_views_label' # comment = '$lang_admin.log_filters.strip_non_page_views_comment' # value = "if (page_views == 0) then cs_uri = substr(cs_uri, 0, last_index(cs_uri, '/') + 1) . '(nonpage)';" # requires_fields = { # cs_uri = true # } # } # strip_non_page_views2 mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'hits = 1;' } # mark_entry } # log.filters database.numerical_fields = { hits = { label = "$lang_stats.field_labels.hits" default = false requires_log_field = false type = "int" display_format_type = "integer" entries_field = true } # hits page_views = { label = "$lang_stats.field_labels.page_views" default = true requires_log_field = false type = "int" display_format_type = "integer" } # page_views visitors = { label = "$lang_stats.field_labels.visitors" default = false requires_log_field = true log_field = "c_ip" type = "unique" display_format_type = "integer" } # visitors filesize = { label = "$lang_stats.field_labels.filesize" default = false requires_log_field = true log_field = "filesize" type = "float" display_format_type = "bandwidth" } # filesize bytes = { label = "$lang_stats.field_labels.bytes" default = false log_field = "bytes" requires_log_field = true type = "float" display_format_type = "bandwidth" } sc_bytes = { label = "$lang_stats.field_labels.sc_bytes" default = false log_field = "sc_bytes" requires_log_field = true type = "float" display_format_type = "bandwidth" } cs_bytes = { label = "$lang_stats.field_labels.cs_bytes" default = false log_field = "cs_bytes" requires_log_field = true type = "float" display_format_type = "bandwidth" } time_taken = { label = $lang_stats.field_labels.time_taken default = false requires_log_field = true type = float display_format_type = duration_milliseconds } # time_taken } # database.numerical_fields create_profile_wizard_options = { date_time_tracking = true host_tracking = true # How the reports should be grouped in the report menu report_groups = { date_time_group = "" content_group = { file_type = true #cs_uri = true cs_uri_stem = true cs_uri_query = true x_category = true } visitor_demographics_group = { c_ip = true cs_ip = true domain_description = true location = true cs_username = true organization = true isp = true domain = true x_user = true c_dns = true } visitor_systems_group = { screen_dimensions = true screen_depth = true web_browser = true operating_system = true } referrer_group = { cs_referer = true search_engine = true search_phrase = true referrer_description = true search_phrase_by_search_engine = true } server_group = { s_sitename = true s_computername = true s_ip = true s_port = true cs_host = true } other_group = { cs_version = true worm = true spider = true cs_method = true sc_status = true sc_substatus = true sc_win32_status = true cs_cookie = true x_req_size = true } } # report_groups final_step = ` include "templates.admin.profiles.setup_reports_util"; string profile = "profiles." . volatile.new_profile_name; # Remove cs_uri_query and cs_cookie from the database fields delete_database_field(profile, 'cs_uri_query'); delete_database_field(profile, 'cs_cookie'); # Create the standard reports add_standard_reports(profile); # Disable referrer filters if there's no referrer field if (!node_exists(profile . ".database.fields.cs_referer")) then ( (profile . ".log.filters.simplify_referrer.disabled") = true; (profile . ".log.filters.internal_referrer.disabled") = true; ); ` # end final_step } # create_profile_wizard_options log.field_options = { sessions_page_field = "{=if (node_exists('profiles.' . volatile.new_profile_name . '.database.fields.cs_uri')) then 'cs_uri' else 'cs_uri_stem'=}" # sessions_page_field = "cs_uri_stem" sessions_visitor_id_field = "{=if (node_exists('profiles.' . volatile.new_profile_name . '.database.fields.c_ip')) then 'c_ip' else 'cs_ip'=}" sessions_event_field = "page_views" } # log.field_options } # generic_w3_cweb