# Copyright (c) 2012 Flowerfire, Inc. All Rights Reserved. media_flow_controller_w3c = { plugin_version = "1.1" info.1.manufacturer = "Juniper" info.1.device = "Media Flow Controller Access (W3C)" info.1.version.1 = "mfc-11.B.3" # 2012-07-06 - 1.0 - GMF - Initial creation # 2012-07-17 - 1.0.1 - GMF - Fixed bug in naming of page_views instance (no spaces allowed); capitalized page_views field. # 2012-07-23 - 1.1 - GMF - Added support for Countries/Regions/Cities based on cs-host field (and c-ip as before). Fixed support for cs_user_agent derived fields, and cs_referrer-derived fields. # The name of the log format log.format.format_label = "Juniper Media Flow Controller Access (W3C) Log Format" log.miscellaneous.log_data_type = "http_access" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^# Software: Media Flow Controller " # This handles "# Format" lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^# Format: +([^ ].*)$')) then ( string fields = $1; string fieldname; v.logfieldindex = 1; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; # This subroutine creates a database field subroutine(create_database_field(string fieldname, int top, int bottom), ( #echo("create_database_field: " . fieldname); # debug debug_message("create_database_field(" . fieldname . ")\n"); string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; node databasefield = databasefieldpath; if (top ne 0) then @databasefield{"suppress_top"} = top; if (bottom ne 0) then @databasefield{"suppress_bottom"} = bottom; # sc_age is an integer field, non-aggregating but with many values. Leave it unnormalized and unindexed. if (fieldname eq "sc_age") then ( @databasefield{"type"} = "int"; @databasefield{"aggregation_method"} = "none"; @databasefield{"index"} = false; ); databasefield; )); subroutine(create_log_field(string fieldname, string type), ( debug_message("create_log_field(" . fieldname . "; type=" . type . ")\n"); string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); log.format.parsing_regular_expression = '^'; node log_fields_at_end = new_node(); subroutine(create_log_field_at_end(node log_fields_at_end, string fieldname, string type), ( log_fields_at_end{fieldname}{"type"} = type; )); # Extract the fields on at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$') or matches_regular_expression(fields, '^([^ ]+)$')) ( string unconverted_fieldname = $1; fields = $2; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; if (fieldname eq 'cs_uri') then ( fieldname = 'cs_uri_stem'; ); #echo("fieldname: " . fieldname); # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_referer') then ( log_field_type = 'url'; ); else if (fieldname eq 'cs_uri_stem') then ( log_field_type = 'page'; ); else if (fieldname eq 'cs_user_agent') then ( log_field_type = 'agent'; ); else if ((fieldname eq 'c_ip') or (fieldname eq "cs_host")) then ( log_field_type = 'host'; ); # Create the log field (but not cs_request, which is really three log fields, created below. if (fieldname eq "cs_request") then ( create_log_field('cs_method', ''); create_log_field('cs_uri_stem', 'page'); create_log_field('cs_protocol', ''); create_log_field_at_end(log_fields_at_end, 'file_type', 'file'); create_log_field_at_end(log_fields_at_end, 'screen_dimensions', ''); create_log_field_at_end(log_fields_at_end, 'screen_depth', ''); log.format.parsing_regular_expression .= '([^ ]+) ([^ ]+) ([^ ]+)'; ); else if (fieldname eq 'cs_referer') then ( create_log_field(fieldname, log_field_type); create_log_field_at_end(log_fields_at_end, 'search_engine', ''); create_log_field_at_end(log_fields_at_end, 'search_phrase', ''); log.format.parsing_regular_expression .= '([^ ]+)'; ); else if (fieldname eq 'cs_user_agent') then ( create_log_field(fieldname, log_field_type); create_log_field_at_end(log_fields_at_end, 'operating_system', ''); create_log_field_at_end(log_fields_at_end, 'web_browser', ''); create_log_field_at_end(log_fields_at_end, 'spider', ''); log.format.parsing_regular_expression .= '(.*)'; ); else if (fieldname eq "time") then ( create_log_field('date', ''); create_log_field('time', ''); log.format.parsing_regular_expression .= '[[]([^:]+):([^ ]+) [^]]+[]]'; ); else ( create_log_field(fieldname, log_field_type); log.format.parsing_regular_expression .= '([^ ]+)'; ); # If there's another field after this one, add a space to the regular expression if (length(fields) > 0) then log.format.parsing_regular_expression .= ' '; # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle localtime by creating date_time and derived database fields if (fieldname eq "time") then ( create_database_field('date_time', 0, 0); create_database_field('day_of_week', 0, 0); create_database_field('hour_of_day', 0, 0); ); # if time # Create derived fields for agent else if (fieldname eq "cs_user_agent") then ( create_database_field('operating_system', 0, 0); create_database_field('web_browser', 0, 0); create_database_field('spider', 0, 0); ); # Create database field cs_ip and derived field for client IP else if ((fieldname eq "c_ip") or (fieldname eq "cs_host")) then ( create_database_field(fieldname, 0, 0); create_database_field('location', 0, 0); ); # Create database field cs_referer and derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('cs_referer', 1, 9); create_database_field('search_engine', 0, 0); create_database_field('search_phrase', 0, 0); ); # Create derived file type field else if (fieldname eq "cs_request") then ( create_database_field('cs_method', 0, 0); create_database_field('cs_uri_stem', 0, 9); create_database_field('file_type', 0, 0); create_database_field('screen_dimensions', 0, 0); create_database_field('screen_depth', 0, 0); create_database_field('cs_protocol', 0, 0); ); # Don't add a database field for numerical fields else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname, 0, 0); ); # if creating profile ); # while another field # Create any final log fields node lfae; foreach lfae log_fields_at_end ( #echo("Final log field creation: " . node_name(lfae)); create_log_field(node_name(lfae), @lfae{"type"}); ); # Don't parse the #Fields line as a data line 'reject'; ); # if # Format # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` # Log fields log.fields = { } # log.fields # Database fields database.fields = { } # database.fields # Get web browser, operating system, web browser, and spider information from the user-agent field. log.parsing_filters.derive_from_user_agent = { value = ` get_user_agent_info(cs_user_agent); web_browser = volatile.web_browser; operating_system = volatile.operating_system; spider = volatile.spider; ` requires_fields = { cs_user_agent = true } } # derive_from_user_agent # Log Filters log.filters = { remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(page, '?')) then page = substr(page, 0, index(page, '?') + 1) . '(parameters)';" } # remove_query suppress_cs_range = { label = "$lang_admin.log_filters.suppress_field_label" comment = "$lang_admin.log_filters.suppress_field_comment" value = "cs_range = '[suppressed]'" } # suppress_cs_range suppress_etag = { label = "$lang_admin.log_filters.suppress_field_label" comment = "$lang_admin.log_filters.suppress_field_comment" value = "sc_etag = '[suppressed]'" } # suppress_etag mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'accesses = 1;' } # mark_entry } # log.filters log.field_options = { sessions_page_field = "cs_uri_stem" sessions_visitor_id_field = "cs_host" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { accesses = { default = true requires_log_field = false entries_field = true } # accesses unique_client_ips = { log_field = "cs_host" type = "unique" } # unique_client_ips sc_bytes_content = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } # sc_bytes_content sc_content_length = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } # sc_content_length time_taken = { type = "int" integer_bits = 64 display_format_type = duration_milliseconds } # time_taken } # database.numerical_fields create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" } # report_groups snapons = { # Attach a page_views field page_views = { snapon = "page_views" name = "page_views" label = "$lang_admin.snapons.page_views.label" prompt_to_attach = true prompt_to_attach_default = true parameters = { file_type_field.parameter_value = "file_type" server_response_field.parameter_value = "sc_status" page_views_field_name.final_node_name = "page_views" page_views_field_name.parameter_value = "{=capitalize(lang_stats.field_labels.page_views)=}" } # parameters requires_log_fields = { cs_uri_stem = true sc_status = true } } # page_views # Add the standard reports add_standard_reports = { name = "add_standard_reports" label = "add_standard_reports" snapon = "add_standard_reports" } # add_standard_reports } # snapons } # create_profile_wizard_options } # media_flow_controller_w3c