# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. juniper_mfc = { plugin_version = "2.0.1" info.1.manufacturer = "Juniper" info.1.device = "Media Flow Controller Access" info.1.version.1 = "" # 2011/10/27 - 1.0 - GMF - cloned apache_combined # 2011/11/18 - 1.0.1 - GMF - improved autodetect to look for the final numerical field # 2011-11-24 - 1.0.2 - GMF - Modified autodetection to look for this specific #Fields line, as requested ThreadID:1206750. # 2011-12-03 - 1.0.3 - GMF - Changed name to "Media Flow Controller" at Juniper's suggestion. # 2011-12-03 - 1.0.4 - GMF - Changed name to "Media Flow Controller Access Log" at Juniper's suggestion. # 2012-07-30 - 2.0 - GMF - Enhanced plug-in to handle any #Fields line, and to set up log fields, database fields, parsing, and filters appropriately. # 2012-08-01 - 2.0.1 - GMF - Added support for quotes around field values # The name of the log format log.format.format_label = "Juniper Media Flow Controller Access Log Format" log.miscellaneous.log_data_type = "http_access" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression # log.format.autodetect_regular_expression = "^[^ ]* [^ ]* .* \\[../.../....[: ]..:..:...*\\] \"[A-Z]* [^ ]*(\"| HTTP.*\") [0-9]* [-0-9]* [^ ]* \"[^\"]*\" [0-9]+$" # log.format.autodetect_regular_expression = `#Fields: %h %V %u %t "%r" %s %b "%[{]Referer[}]i" "%[{]User-Agent[}]i" %y` log.format.autodetect_regular_expression = `^#Fields: .*%h` # Treat fields surrounded by square brackets (e.g. the date/time field) as a single quoted field. # log.format.treat_brackets_as_quotes = "true" # log.format.common_log_format = "true" log.format.ignore_format_lines = "true" # The format of dates and times in this log # log.format.date_format = "dd/mmm/yyyy:hh:mm:ss" # log.format.time_format = "dd/mmm/yyyy:hh:mm:ss" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( string fields = $1; string fieldname; v.logfieldindex = 1; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; log.format.parsing_regular_expression = '^'; node log_fields_at_end = new_node(); subroutine(create_log_field_at_end(node log_fields_at_end, string fieldname, string type), ( log_fields_at_end{fieldname}{"type"} = type; )); # This subroutine creates a database field subroutine(create_database_field(string fieldname), ( #echo("create_database_field: " . fieldname); debug_message("create_database_field(" . fieldname . ")\n"); string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; node databasefield = databasefieldpath; # set_subnode_value(databasefield, "label", fieldname); databasefield; )); subroutine(create_log_field(string fieldname, string type), ( # echo("create_log_field(" . fieldname . "; type=" . type . ")"); string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; # set_subnode_value(logfield, "label", fieldname); if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Extract the fields on at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$')) ( string format_specifier = $1; fields = $2; bool quoted = false; if (matches_regular_expression(format_specifier, '^"([^"]+)"')) then ( quoted = true; format_specifier = $1; ); #echo("format_specifier=" . format_specifier); string fieldname; # Handle special case hard-coded fields string field_regexp = '([^ ]+)'; if (matches_regular_expression(format_specifier, '^R[0-9]+')) then fieldname = 'store_id'; else if (matches_regular_expression(format_specifier, '^US|EMEA|JAPAN|CHINA|AUS$')) then fieldname = 'store_region'; else if (matches_regular_expression(format_specifier, '^[A-Z][a-z]+-Profile$')) then fieldname = 'store_category'; else if (format_specifier eq '%b') then fieldname = 'sc_bytes_content'; else if (format_specifier eq '%c') then fieldname = 'x_cache_hit'; else if (format_specifier eq '%d') then fieldname = 'date'; else if (format_specifier eq '%f') then fieldname = 'cs_uri_stem'; else if (format_specifier eq '%h') then fieldname = 'cs_host'; else if (format_specifier eq '%m') then fieldname = 'cs_method'; else if (format_specifier eq '%p') then fieldname = 'x_hotness'; else if (format_specifier eq '%q') then fieldname = 'cs_uri_query'; else if (format_specifier eq '%r') then ( field_regexp = '([^ ]+) ([^ ]+) ([^ ]+)'; create_log_field('operation', 'flat'); create_log_field('cs_uri_stem', 'page'); create_log_field_at_end(log_fields_at_end, 'file_type', 'flat'); fieldname = 'protocol'; ) # %r else if (format_specifier eq '%s') then fieldname = 'sc_status'; else if (format_specifier eq '%t') then ( field_regexp = '[[]([^:]+):([^ ]+) [^]]+[]]'; create_log_field('date', 'date'); fieldname = 'time'; ); else if (format_specifier eq '%u') then fieldname = 'user'; else if (format_specifier eq '%v') then fieldname = 'x_server'; else if (format_specifier eq '%y') then fieldname = 'sc_substatus'; else if (format_specifier eq '%A') then fieldname = 'x-request-time'; else if (format_specifier eq '%B') then fieldname = 'x-first-byte-out-time'; else if (format_specifier eq '%C') then fieldname = 'cs(Cookie)'; else if (format_specifier eq '%D') then fieldname = 'time-taken'; else if (format_specifier eq '%E') then fieldname = 'x-time-used-ms'; else if (format_specifier eq '%F') then fieldname = 'x-last-byte-out-time'; else if (format_specifier eq '%H') then fieldname = 'cs-proto'; else if (format_specifier eq '%I') then fieldname = 'cs-bytes'; else if (format_specifier eq '%L') then fieldname = 'x-latency'; else if (format_specifier eq '%M') then fieldname = 'x-data-len-ms'; else if (format_specifier eq '%N') then fieldname = 'x-namespace'; else if (format_specifier eq '%O') then fieldname = 'sc-bytes'; else if (format_specifier eq '%R') then fieldname = 'x-revalidate-cache'; else if (format_specifier eq '%U') then fieldname = 'cs-uri'; else if (format_specifier eq '%V') then fieldname = 'server_domain'; else if (format_specifier eq '%X') then fieldname = 'c-ip'; else if (format_specifier eq '%Y') then fieldname = 's-ip'; else if (format_specifier eq '%Z') then fieldname = 's-port'; else if (matches_regular_expression(format_specifier, '^%[{]([^}]+)[}]([io]?)$')) then ( fieldname = $1; if ($2 eq 'o') then fieldname .= '_out'; else if ($2 eq 'i') then fieldname .= '_in'; ); else ( error("Unknown format specified in #Fields header line: '" . format_specifier . "'"); ); if (log.format.parsing_regular_expression ne '^') then log.format.parsing_regular_expression .= ' '; if (quoted) then log.format.parsing_regular_expression .= '"'; log.format.parsing_regular_expression .= field_regexp; if (quoted) then log.format.parsing_regular_expression .= '"'; string unconverted_fieldname = fieldname; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_uri_stem') then ( log_field_type = 'page'; ("profiles." . internal.profile_name . ".log.fields.url.type") = 'flat'; ); if (fieldname eq 'cs_host') then ( log_field_type = 'host'; create_log_field_at_end(log_fields_at_end, 'location', ''); ); if (fieldname eq 'user_agent') then ( log_field_type = 'agent'; create_log_field_at_end(log_fields_at_end, 'web_browser', ''); create_log_field_at_end(log_fields_at_end, 'operating_system', ''); ); if (fieldname eq 'cs_referer') then ( log_field_type = 'url'; create_log_field_at_end(log_fields_at_end, 'search_engine', ''); create_log_field_at_end(log_fields_at_end, 'search_phrase', ''); ); # Create the log field create_log_field(fieldname, log_field_type); # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle time by creating date_time and derived database fields if (fieldname eq "time") then ( create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); ); # if localtime # Create derived field for agent else if (fieldname eq "cs_user_agent") then ( create_database_field('operating_system'); create_database_field('web_browser'); ); # Create derived field for IP else if (fieldname eq "cs_host") then ( create_database_field('cs_host'); create_database_field('location'); ); # Create derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('search_engine'); create_database_field('search_phrase'); ); # Create derived file type field else if (fieldname eq "cs_uri_stem") then ( create_database_field('file_type'); ); # Create derived file type field, and other %r fields else if (fieldname eq "protocol") then ( create_database_field('operation'); create_database_field('cs_uri_stem'); create_database_field('file_type'); ); # Don't add a database field for numerical fields # else if (subnode_exists('database.fields', fieldname)) then ( else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname); ); # if creating profile ); # while another field # Create any final log fields node lfae; foreach lfae log_fields_at_end ( # echo("Final log field creation: " . node_name(lfae)); create_log_field(node_name(lfae), @lfae{"type"}); ); # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` # Get search engine and search phrase information from the referrer field (before it gets simplified). log.parsing_filters.compute_se_sp = { value = ` if (get_search_engine_info(cs_referer)) then ( search_engine = volatile.search_engine; search_phrase = volatile.search_phrase; ); ` requires_fields = { cs_referer = true # search_engine = true # search_phrase = true } } # Get web browser, operating system, web browser, and spider information from the user-agent field. log.parsing_filters.derive_from_user_agent = { value = ` get_user_agent_info(cs_user_agent); web_browser = volatile.web_browser; operating_system = volatile.operating_system; #spider = volatile.spider; ` requires_fields = { cs_user_agent = true } } # Log Filters log.filters = { simplify_referrer = { label = "$lang_admin.log_filters.simplify_referrer_label" comment = "$lang_admin.log_filters.simplify_referrer_comment" value = "if (cs_referer eq '-') then cs_referer = '(no referrer)' else if (matches_regular_expression(cs_referer, '^([^:]+://[^/]+/)')) then cs_referer = $1 . '(omitted)'" requires_fields = { cs_referer = true } } # simplify_referrer internal_referrer = { label = "$lang_admin.log_filters.internal_referrer_label" comment = "$lang_admin.log_filters.internal_referrer_comment" value = "if (contains(referrer, 'mydomain.com/')) then referrer = '(internal referrer)';" requires_fields = { cs_referer = true } disabled = true } # internal_referrer not_authenticated = { label = "$lang_admin.log_filters.not_authenticated_label" comment = "$lang_admin.log_filters.not_authenticated_comment" value = "if (user eq '-') then user = '(not authenticated)';" requires_fields = { user = true } } # not_authenticated remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(cs_uri_stem, '?')) then cs_uri_stem = substr(cs_uri_stem, 0, index(cs_uri_stem, '?') + 1) . '(parameters)';" requires_fields = { cs_uri_stem = true } } # remove_query categorize = { label = "$lang_admin.log_filters.categorize_hits_label" comment = "$lang_admin.log_filters.categorize_hits_comment" value = `if (starts_with(sc_status, '4')) then ( errors = 1; # if (sc_status eq '404') then ( # broken_links = 1; # hit_type = "broken link"; # ) # else ( # hit_type = "error"; # ) ) #else if (!starts_with(spider, '(')) then ( # hit_type = "spider"; # spiders = 1; #) #else if (!starts_with(worm, '(')) then ( # hit_type = "worm"; # worms = 1; #) #else if (!starts_with(screen_dimensions, '(')) then ( # hit_type = "screen info"; # screen_info_hits = 1; #) else if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then ( # hit_type = "hit"; ) else ( # hit_type = "page view"; page_views = 1; )` requires_fields = { sc_status = true } } # categorize mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'accesses = 1;' } # mark_entry # set_page_for_worm = { # label = "$lang_admin.log_filters.set_page_for_worm_label" # comment = "$lang_admin.log_filters.set_page_for_worm_comment" # value = "if (worms == 1) then page = '(worm)';" # } # set_page_for_worm strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then cs_uri_stem = substr(cs_uri_stem, 0, last_index(cs_uri_stem, '/') + 1) . '(nonpage)';" requires_fields = { cs_uri_stem = true } } # strip_non_page_views } # log.filters log.field_options = { sessions_page_field = "cs_uri_stem" sessions_visitor_id_field = "cs_host" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { accesses = { default = false requires_log_field = false entries_field = true } # accesses page_views = { default = true requires_log_field = false } # page_views errors = { requires_log_field = false } # errors # broken_links = { # label = "$lang_stats.field_labels.broken_links" # default = true # requires_log_field = false # type = "int" # display_format_type = "integer" # } # broken_links # screen_info_hits = { # label = "$lang_stats.field_labels.screen_info_hits" # default = false # requires_log_field = false # type = "int" # display_format_type = "integer" # } # screen_info_hits unique_client_ips = { log_field = "cs_host" type = "unique" } # unique_client_ips sc_bytes_content = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } # sc_bytes_content } # database.numerical_fields create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" hit_type = "" content_group = { cs_uri_stem = true file_type = true x_namespace = true cs_host = true } users_group = { user = true } visitor_demographics_group = { hostname = true domain_description = true location = true organization = true isp = true domain = true authenticated_user = true } visitor_systems_group = { screen_dimensions = true screen_depth = true web_browser = true operating_system = true } referrer_group = { referrer = true referrer_description = true search_engine = true search_phrase = true # search_phrase_by_search_engine = true } other_group = { store_id = true store_region = true store_category = true sc_status = true worm = true spider = true server_domain = true server_response = true pragma_in = true pragma_out = true cache_control_in = true cache_control_out = true vary_out = true x_cache_hit = true protocol = true operation = true } } # report_groups snapons = { # Attach a top_level_domain snapon top_level_domain = { snapon = "top_level_domain" name = "top_level_domain" label = "$lang_admin.snapons.top_level_domain.label" parameters = { url_field.parameter_value = "cs_uri_stem" top_level_domain_field.parameter_value = "top_level_domain" top_level_domain_field_name = { parameter_value = "$lang_admin.field_labels.top_level_domain" final_node_name = "top_level_domain" } } # parameters requires_log_fields = { cs_uri_stem = true } } # top_level_domain # Attach a gateway_reports snapon gateway_reports = { snapon = "gateway_reports" name = "gateway_reports" label = "$lang_admin.snapons.gateway_reports.label" parameters = { user_field.parameter_value = "cs_host" have_category_field.parameter_value = false # category_field.parameter_value = "category" host_field.parameter_value = "top_level_domain" page_views_field.parameter_value = "page_views" bytes_field.parameter_value = "sc_bytes_content" sort_by_field.parameter_value = "page_views" } # parameters } # gateway_reports # Add the standard reports add_standard_reports = { name = "add_standard_reports" label = "add_standard_reports" snapon = "add_standard_reports" } # add_standard_reports } # snapons } # create_profile_wizard_options } # juniper_mfc