# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. juniper_mfc = { plugin_version = "2.0.4" info.1.manufacturer = "Juniper" info.1.device = "Media Flow Controller (Access Logs) (NCSA)" info.1.version.1 = "" # 2011/10/27 - 1.0 - GMF - cloned apache_combined # 2011/11/18 - 1.0.1 - GMF - improved autodetect to look for the final numerical field # 2011-11-24 - 1.0.2 - GMF - Modified autodetection to look for this specific #Fields line, as requested ThreadID:1206750. # 2011-12-03 - 1.0.3 - GMF - Changed name to "Media Flow Controller" at Juniper's suggestion. # 2011-12-03 - 1.0.4 - GMF - Changed name to "Media Flow Controller Access Log" at Juniper's suggestion. # 2012-07-30 - 2.0 - GMF - Enhanced plug-in to handle any #Fields line, and to set up log fields, database fields, parsing, and filters appropriately. # 2012-08-01 - 2.0.1 - GMF - Added support for quotes around field values # 2012-08-10 - 2.0.2 - GMF - Changed name to have "Access Logs" in parentheses # 2012-09-18 - 2.0.3 - GMF - Added NCSA to label # 2012-10-19 - 2.0.4 - GMF - Added plugin_description # The name of the log format log.format.format_label = "Juniper Media Flow Controller Access Log Format (NCSA)" log.miscellaneous.log_data_type = "http_access" log.miscellaneous.log_format_type = "web_server" # Description contributed by Juniper create_profile_wizard_options.plugin_description = `

This plugin detects and analyzes Media Flow Controller Access Logs that are in the NCSA Combined Log Format followed by many popular web servers including Apache. Juniper Media Flow Controller version 11.A and earlier releases generated access logs that conformed to the NCSA standard. All access logs recorded the following fields that collectively referred to as the as Combined Log Format (CLF):

Please see Media Flow Controller CLI Reference Guide for a description of these fields. Later releases of Juniper Media Flow Controller support the more flexible W3C access log format which is supported by a different Sawmill plug-in.

The reports generated by this plugin can be categorized as:

` # The log is in this format if any of the first ten lines match this regular expression # log.format.autodetect_regular_expression = "^[^ ]* [^ ]* .* \\[../.../....[: ]..:..:...*\\] \"[A-Z]* [^ ]*(\"| HTTP.*\") [0-9]* [-0-9]* [^ ]* \"[^\"]*\" [0-9]+$" # log.format.autodetect_regular_expression = `#Fields: %h %V %u %t "%r" %s %b "%[{]Referer[}]i" "%[{]User-Agent[}]i" %y` log.format.autodetect_regular_expression = `^#Fields: .*%h` # Treat fields surrounded by square brackets (e.g. the date/time field) as a single quoted field. # log.format.treat_brackets_as_quotes = "true" # log.format.common_log_format = "true" log.format.ignore_format_lines = "true" # The format of dates and times in this log # log.format.date_format = "dd/mmm/yyyy:hh:mm:ss" # log.format.time_format = "dd/mmm/yyyy:hh:mm:ss" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( string fields = $1; string fieldname; v.logfieldindex = 1; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; log.format.parsing_regular_expression = '^'; node log_fields_at_end = new_node(); subroutine(create_log_field_at_end(node log_fields_at_end, string fieldname, string type), ( log_fields_at_end{fieldname}{"type"} = type; )); # This subroutine creates a database field subroutine(create_database_field(string fieldname), ( #echo("create_database_field: " . fieldname); debug_message("create_database_field(" . fieldname . ")\n"); string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; node databasefield = databasefieldpath; # set_subnode_value(databasefield, "label", fieldname); databasefield; )); subroutine(create_log_field(string fieldname, string type), ( # echo("create_log_field(" . fieldname . "; type=" . type . ")"); string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; # set_subnode_value(logfield, "label", fieldname); if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Extract the fields on at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$')) ( string format_specifier = $1; fields = $2; bool quoted = false; if (matches_regular_expression(format_specifier, '^"([^"]+)"')) then ( quoted = true; format_specifier = $1; ); #echo("format_specifier=" . format_specifier); string fieldname; # Handle special case hard-coded fields string field_regexp = '([^ ]+)'; if (matches_regular_expression(format_specifier, '^R[0-9]+')) then fieldname = 'store_id'; else if (matches_regular_expression(format_specifier, '^US|EMEA|JAPAN|CHINA|AUS$')) then fieldname = 'store_region'; else if (matches_regular_expression(format_specifier, '^[A-Z][a-z]+-Profile$')) then fieldname = 'store_category'; else if (format_specifier eq '%b') then fieldname = 'sc_bytes_content'; else if (format_specifier eq '%c') then fieldname = 'x_cache_hit'; else if (format_specifier eq '%d') then fieldname = 'date'; else if (format_specifier eq '%f') then fieldname = 'cs_uri_stem'; else if (format_specifier eq '%h') then fieldname = 'cs_host'; else if (format_specifier eq '%m') then fieldname = 'cs_method'; else if (format_specifier eq '%p') then fieldname = 'x_hotness'; else if (format_specifier eq '%q') then fieldname = 'cs_uri_query'; else if (format_specifier eq '%r') then ( field_regexp = '([^ ]+) ([^ ]+) ([^ ]+)'; create_log_field('operation', 'flat'); create_log_field('cs_uri_stem', 'page'); create_log_field_at_end(log_fields_at_end, 'file_type', 'flat'); fieldname = 'protocol'; ) # %r else if (format_specifier eq '%s') then fieldname = 'sc_status'; else if (format_specifier eq '%t') then ( field_regexp = '[[]([^:]+):([^ ]+) [^]]+[]]'; create_log_field('date', 'date'); fieldname = 'time'; ); else if (format_specifier eq '%u') then fieldname = 'user'; else if (format_specifier eq '%v') then fieldname = 'x_server'; else if (format_specifier eq '%y') then fieldname = 'sc_substatus'; else if (format_specifier eq '%A') then fieldname = 'x-request-time'; else if (format_specifier eq '%B') then fieldname = 'x-first-byte-out-time'; else if (format_specifier eq '%C') then fieldname = 'cs(Cookie)'; else if (format_specifier eq '%D') then fieldname = 'time-taken'; else if (format_specifier eq '%E') then fieldname = 'x-time-used-ms'; else if (format_specifier eq '%F') then fieldname = 'x-last-byte-out-time'; else if (format_specifier eq '%H') then fieldname = 'cs-proto'; else if (format_specifier eq '%I') then fieldname = 'cs-bytes'; else if (format_specifier eq '%L') then fieldname = 'x-latency'; else if (format_specifier eq '%M') then fieldname = 'x-data-len-ms'; else if (format_specifier eq '%N') then fieldname = 'x-namespace'; else if (format_specifier eq '%O') then fieldname = 'sc-bytes'; else if (format_specifier eq '%R') then fieldname = 'x-revalidate-cache'; else if (format_specifier eq '%U') then fieldname = 'cs-uri'; else if (format_specifier eq '%V') then fieldname = 'server_domain'; else if (format_specifier eq '%X') then fieldname = 'c-ip'; else if (format_specifier eq '%Y') then fieldname = 's-ip'; else if (format_specifier eq '%Z') then fieldname = 's-port'; else if (matches_regular_expression(format_specifier, '^%[{]([^}]+)[}]([io]?)$')) then ( fieldname = $1; if ($2 eq 'o') then fieldname .= '_out'; else if ($2 eq 'i') then fieldname .= '_in'; ); else ( error("Unknown format specified in #Fields header line: '" . format_specifier . "'"); ); if (log.format.parsing_regular_expression ne '^') then log.format.parsing_regular_expression .= ' '; if (quoted) then log.format.parsing_regular_expression .= '"'; log.format.parsing_regular_expression .= field_regexp; if (quoted) then log.format.parsing_regular_expression .= '"'; string unconverted_fieldname = fieldname; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_uri_stem') then ( log_field_type = 'page'; ("profiles." . internal.profile_name . ".log.fields.url.type") = 'flat'; ); if (fieldname eq 'cs_host') then ( log_field_type = 'host'; create_log_field_at_end(log_fields_at_end, 'location', ''); ); if (fieldname eq 'user_agent') then ( log_field_type = 'agent'; create_log_field_at_end(log_fields_at_end, 'web_browser', ''); create_log_field_at_end(log_fields_at_end, 'operating_system', ''); ); if (fieldname eq 'cs_referer') then ( log_field_type = 'url'; create_log_field_at_end(log_fields_at_end, 'search_engine', ''); create_log_field_at_end(log_fields_at_end, 'search_phrase', ''); ); # Create the log field create_log_field(fieldname, log_field_type); # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle time by creating date_time and derived database fields if (fieldname eq "time") then ( create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); ); # if localtime # Create derived field for agent else if (fieldname eq "cs_user_agent") then ( create_database_field('operating_system'); create_database_field('web_browser'); ); # Create derived field for IP else if (fieldname eq "cs_host") then ( create_database_field('cs_host'); create_database_field('location'); ); # Create derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('search_engine'); create_database_field('search_phrase'); ); # Create derived file type field else if (fieldname eq "cs_uri_stem") then ( create_database_field('file_type'); ); # Create derived file type field, and other %r fields else if (fieldname eq "protocol") then ( create_database_field('operation'); create_database_field('cs_uri_stem'); create_database_field('file_type'); ); # Don't add a database field for numerical fields # else if (subnode_exists('database.fields', fieldname)) then ( else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname); ); # if creating profile ); # while another field # Create any final log fields node lfae; foreach lfae log_fields_at_end ( # echo("Final log field creation: " . node_name(lfae)); create_log_field(node_name(lfae), @lfae{"type"}); ); # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` # Get search engine and search phrase information from the referrer field (before it gets simplified). log.parsing_filters.compute_se_sp = { value = ` if (get_search_engine_info(cs_referer)) then ( search_engine = volatile.search_engine; search_phrase = volatile.search_phrase; ); ` requires_fields = { cs_referer = true # search_engine = true # search_phrase = true } } # Get web browser, operating system, web browser, and spider information from the user-agent field. log.parsing_filters.derive_from_user_agent = { value = ` get_user_agent_info(cs_user_agent); web_browser = volatile.web_browser; operating_system = volatile.operating_system; #spider = volatile.spider; ` requires_fields = { cs_user_agent = true } } # Log Filters log.filters = { simplify_referrer = { label = "$lang_admin.log_filters.simplify_referrer_label" comment = "$lang_admin.log_filters.simplify_referrer_comment" value = "if (cs_referer eq '-') then cs_referer = '(no referrer)' else if (matches_regular_expression(cs_referer, '^([^:]+://[^/]+/)')) then cs_referer = $1 . '(omitted)'" requires_fields = { cs_referer = true } } # simplify_referrer internal_referrer = { label = "$lang_admin.log_filters.internal_referrer_label" comment = "$lang_admin.log_filters.internal_referrer_comment" value = "if (contains(referrer, 'mydomain.com/')) then referrer = '(internal referrer)';" requires_fields = { cs_referer = true } disabled = true } # internal_referrer not_authenticated = { label = "$lang_admin.log_filters.not_authenticated_label" comment = "$lang_admin.log_filters.not_authenticated_comment" value = "if (user eq '-') then user = '(not authenticated)';" requires_fields = { user = true } } # not_authenticated remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(cs_uri_stem, '?')) then cs_uri_stem = substr(cs_uri_stem, 0, index(cs_uri_stem, '?') + 1) . '(parameters)';" requires_fields = { cs_uri_stem = true } } # remove_query categorize = { label = "$lang_admin.log_filters.categorize_hits_label" comment = "$lang_admin.log_filters.categorize_hits_comment" value = `if (starts_with(sc_status, '4')) then ( errors = 1; # if (sc_status eq '404') then ( # broken_links = 1; # hit_type = "broken link"; # ) # else ( # hit_type = "error"; # ) ) #else if (!starts_with(spider, '(')) then ( # hit_type = "spider"; # spiders = 1; #) #else if (!starts_with(worm, '(')) then ( # hit_type = "worm"; # worms = 1; #) #else if (!starts_with(screen_dimensions, '(')) then ( # hit_type = "screen info"; # screen_info_hits = 1; #) else if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then ( # hit_type = "hit"; ) else ( # hit_type = "page view"; page_views = 1; )` requires_fields = { sc_status = true } } # categorize mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'accesses = 1;' } # mark_entry # set_page_for_worm = { # label = "$lang_admin.log_filters.set_page_for_worm_label" # comment = "$lang_admin.log_filters.set_page_for_worm_comment" # value = "if (worms == 1) then page = '(worm)';" # } # set_page_for_worm strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then cs_uri_stem = substr(cs_uri_stem, 0, last_index(cs_uri_stem, '/') + 1) . '(nonpage)';" requires_fields = { cs_uri_stem = true } } # strip_non_page_views } # log.filters log.field_options = { sessions_page_field = "cs_uri_stem" sessions_visitor_id_field = "cs_host" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { accesses = { default = false requires_log_field = false entries_field = true } # accesses page_views = { default = true requires_log_field = false } # page_views errors = { requires_log_field = false } # errors # broken_links = { # label = "$lang_stats.field_labels.broken_links" # default = true # requires_log_field = false # type = "int" # display_format_type = "integer" # } # broken_links # screen_info_hits = { # label = "$lang_stats.field_labels.screen_info_hits" # default = false # requires_log_field = false # type = "int" # display_format_type = "integer" # } # screen_info_hits unique_client_ips = { log_field = "cs_host" type = "unique" } # unique_client_ips sc_bytes_content = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } # sc_bytes_content } # database.numerical_fields create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" hit_type = "" content_group = { cs_uri_stem = true file_type = true x_namespace = true cs_host = true } users_group = { user = true } visitor_demographics_group = { hostname = true domain_description = true location = true organization = true isp = true domain = true authenticated_user = true } visitor_systems_group = { screen_dimensions = true screen_depth = true web_browser = true operating_system = true } referrer_group = { referrer = true referrer_description = true search_engine = true search_phrase = true # search_phrase_by_search_engine = true } other_group = { store_id = true store_region = true store_category = true sc_status = true worm = true spider = true server_domain = true server_response = true pragma_in = true pragma_out = true cache_control_in = true cache_control_out = true vary_out = true x_cache_hit = true protocol = true operation = true } } # report_groups snapons = { # Attach a top_level_domain snapon top_level_domain = { snapon = "top_level_domain" name = "top_level_domain" label = "$lang_admin.snapons.top_level_domain.label" parameters = { url_field.parameter_value = "cs_uri_stem" field_name = { parameter_value = "$lang_admin.field_labels.top_level_domain" final_node_name = "top_level_domain" } } # parameters requires_log_fields = { cs_uri_stem = true } } # top_level_domain # Attach a gateway_reports snapon gateway_reports = { snapon = "gateway_reports" name = "gateway_reports" label = "$lang_admin.snapons.gateway_reports.label" parameters = { user_field.parameter_value = "cs_host" have_category_field.parameter_value = false # category_field.parameter_value = "category" host_field.parameter_value = "top_level_domain" page_views_field.parameter_value = "page_views" bytes_in_field.parameter_value = "sc_bytes_content" sort_by_field.parameter_value = "page_views" } # parameters } # gateway_reports # 2013-02-06 - GMF - Now added in gateway_reports # # Add the standard reports # add_standard_reports = { # name = "add_standard_reports" # label = "add_standard_reports" # snapon = "add_standard_reports" # } # add_standard_reports } # snapons } # create_profile_wizard_options } # juniper_mfc