# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved.
juniper_mfc = {
plugin_version = "2.0.4"
info.1.manufacturer = "Juniper"
info.1.device = "Media Flow Controller (Access Logs) (NCSA)"
info.1.version.1 = ""
# 2011/10/27 - 1.0 - GMF - cloned apache_combined
# 2011/11/18 - 1.0.1 - GMF - improved autodetect to look for the final numerical field
# 2011-11-24 - 1.0.2 - GMF - Modified autodetection to look for this specific #Fields line, as requested ThreadID:1206750.
# 2011-12-03 - 1.0.3 - GMF - Changed name to "Media Flow Controller" at Juniper's suggestion.
# 2011-12-03 - 1.0.4 - GMF - Changed name to "Media Flow Controller Access Log" at Juniper's suggestion.
# 2012-07-30 - 2.0 - GMF - Enhanced plug-in to handle any #Fields line, and to set up log fields, database fields, parsing, and filters appropriately.
# 2012-08-01 - 2.0.1 - GMF - Added support for quotes around field values
# 2012-08-10 - 2.0.2 - GMF - Changed name to have "Access Logs" in parentheses
# 2012-09-18 - 2.0.3 - GMF - Added NCSA to label
# 2012-10-19 - 2.0.4 - GMF - Added plugin_description
# The name of the log format
log.format.format_label = "Juniper Media Flow Controller Access Log Format (NCSA)"
log.miscellaneous.log_data_type = "http_access"
log.miscellaneous.log_format_type = "web_server"
# Description contributed by Juniper
create_profile_wizard_options.plugin_description = `
This plugin detects and analyzes Media Flow Controller Access Logs that are in the NCSA Combined Log Format followed by many popular web servers including Apache. Juniper Media Flow Controller version 11.A and earlier releases generated access logs that conformed to the NCSA standard. All access logs recorded the following fields that collectively referred to as the as Combined Log Format (CLF):
%h %V %u %t "%r" %s %b "%{Referer}i" "%{User-Agent}i" %y
Please see Media Flow Controller CLI Reference Guide for a description of these fields. Later releases of Juniper Media Flow Controller support the more flexible W3C access log format which is supported by a different Sawmill plug-in.
The reports generated by this plugin can be categorized as:
- Date and Time based reports such as daily, weekly, monthly & yearly reports of user
- Content analysis reports based on content popularity, server domain(s), server response status and content referrers
- User demographic reports based on user's Country/State/City/Region, and ISP
`
# The log is in this format if any of the first ten lines match this regular expression
# log.format.autodetect_regular_expression = "^[^ ]* [^ ]* .* \\[../.../....[: ]..:..:...*\\] \"[A-Z]* [^ ]*(\"| HTTP.*\") [0-9]* [-0-9]* [^ ]* \"[^\"]*\" [0-9]+$"
# log.format.autodetect_regular_expression = `#Fields: %h %V %u %t "%r" %s %b "%[{]Referer[}]i" "%[{]User-Agent[}]i" %y`
log.format.autodetect_regular_expression = `^#Fields: .*%h`
# Treat fields surrounded by square brackets (e.g. the date/time field) as a single quoted field.
# log.format.treat_brackets_as_quotes = "true"
# log.format.common_log_format = "true"
log.format.ignore_format_lines = "true"
# The format of dates and times in this log
# log.format.date_format = "dd/mmm/yyyy:hh:mm:ss"
# log.format.time_format = "dd/mmm/yyyy:hh:mm:ss"
# This handles #Fields lines, and creates log and database fields from them
log.filter_preprocessor = `
if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then (
string fields = $1;
string fieldname;
v.logfieldindex = 1;
string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields";
log.format.parsing_regular_expression = '^';
node log_fields_at_end = new_node();
subroutine(create_log_field_at_end(node log_fields_at_end, string fieldname, string type), (
log_fields_at_end{fieldname}{"type"} = type;
));
# This subroutine creates a database field
subroutine(create_database_field(string fieldname), (
#echo("create_database_field: " . fieldname);
debug_message("create_database_field(" . fieldname . ")\n");
string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname;
(databasefieldpath . "") = "";
node databasefield = databasefieldpath;
# set_subnode_value(databasefield, "label", fieldname);
databasefield;
));
subroutine(create_log_field(string fieldname, string type), (
# echo("create_log_field(" . fieldname . "; type=" . type . ")");
string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname;
(logfieldpath . "") = "";
node logfield = logfieldpath;
# set_subnode_value(logfield, "label", fieldname);
if (type ne '') then
set_subnode_value(logfield, "type", type);
logfield;
));
# Extract the fields on at a time
while (matches_regular_expression(fields, '^([^ ]+) (.*)$')) (
string format_specifier = $1;
fields = $2;
bool quoted = false;
if (matches_regular_expression(format_specifier, '^"([^"]+)"')) then (
quoted = true;
format_specifier = $1;
);
#echo("format_specifier=" . format_specifier);
string fieldname;
# Handle special case hard-coded fields
string field_regexp = '([^ ]+)';
if (matches_regular_expression(format_specifier, '^R[0-9]+')) then
fieldname = 'store_id';
else if (matches_regular_expression(format_specifier, '^US|EMEA|JAPAN|CHINA|AUS$')) then
fieldname = 'store_region';
else if (matches_regular_expression(format_specifier, '^[A-Z][a-z]+-Profile$')) then
fieldname = 'store_category';
else if (format_specifier eq '%b') then
fieldname = 'sc_bytes_content';
else if (format_specifier eq '%c') then
fieldname = 'x_cache_hit';
else if (format_specifier eq '%d') then
fieldname = 'date';
else if (format_specifier eq '%f') then
fieldname = 'cs_uri_stem';
else if (format_specifier eq '%h') then
fieldname = 'cs_host';
else if (format_specifier eq '%m') then
fieldname = 'cs_method';
else if (format_specifier eq '%p') then
fieldname = 'x_hotness';
else if (format_specifier eq '%q') then
fieldname = 'cs_uri_query';
else if (format_specifier eq '%r') then (
field_regexp = '([^ ]+) ([^ ]+) ([^ ]+)';
create_log_field('operation', 'flat');
create_log_field('cs_uri_stem', 'page');
create_log_field_at_end(log_fields_at_end, 'file_type', 'flat');
fieldname = 'protocol';
) # %r
else if (format_specifier eq '%s') then
fieldname = 'sc_status';
else if (format_specifier eq '%t') then (
field_regexp = '[[]([^:]+):([^ ]+) [^]]+[]]';
create_log_field('date', 'date');
fieldname = 'time';
);
else if (format_specifier eq '%u') then
fieldname = 'user';
else if (format_specifier eq '%v') then
fieldname = 'x_server';
else if (format_specifier eq '%y') then
fieldname = 'sc_substatus';
else if (format_specifier eq '%A') then
fieldname = 'x-request-time';
else if (format_specifier eq '%B') then
fieldname = 'x-first-byte-out-time';
else if (format_specifier eq '%C') then
fieldname = 'cs(Cookie)';
else if (format_specifier eq '%D') then
fieldname = 'time-taken';
else if (format_specifier eq '%E') then
fieldname = 'x-time-used-ms';
else if (format_specifier eq '%F') then
fieldname = 'x-last-byte-out-time';
else if (format_specifier eq '%H') then
fieldname = 'cs-proto';
else if (format_specifier eq '%I') then
fieldname = 'cs-bytes';
else if (format_specifier eq '%L') then
fieldname = 'x-latency';
else if (format_specifier eq '%M') then
fieldname = 'x-data-len-ms';
else if (format_specifier eq '%N') then
fieldname = 'x-namespace';
else if (format_specifier eq '%O') then
fieldname = 'sc-bytes';
else if (format_specifier eq '%R') then
fieldname = 'x-revalidate-cache';
else if (format_specifier eq '%U') then
fieldname = 'cs-uri';
else if (format_specifier eq '%V') then
fieldname = 'server_domain';
else if (format_specifier eq '%X') then
fieldname = 'c-ip';
else if (format_specifier eq '%Y') then
fieldname = 's-ip';
else if (format_specifier eq '%Z') then
fieldname = 's-port';
else if (matches_regular_expression(format_specifier, '^%[{]([^}]+)[}]([io]?)$')) then (
fieldname = $1;
if ($2 eq 'o') then
fieldname .= '_out';
else if ($2 eq 'i') then
fieldname .= '_in';
);
else (
error("Unknown format specified in #Fields header line: '" . format_specifier . "'");
);
if (log.format.parsing_regular_expression ne '^') then
log.format.parsing_regular_expression .= ' ';
if (quoted) then
log.format.parsing_regular_expression .= '"';
log.format.parsing_regular_expression .= field_regexp;
if (quoted) then
log.format.parsing_regular_expression .= '"';
string unconverted_fieldname = fieldname;
# Clean up the field name
fieldname = '';
for (int i = 0; i < length(unconverted_fieldname); i++) (
string c = lowercase(substr(unconverted_fieldname, i, 1));
if (!matches_regular_expression(c, '^[a-z0-9]$')) then
c = '_';
fieldname .= c;
);
while (matches_regular_expression(fieldname, '^(.*)_$'))
fieldname = $1;
# Get the log field type
string log_field_type = '';
if (fieldname eq 'cs_uri_stem') then (
log_field_type = 'page';
("profiles." . internal.profile_name . ".log.fields.url.type") = 'flat';
);
if (fieldname eq 'cs_host') then (
log_field_type = 'host';
create_log_field_at_end(log_fields_at_end, 'location', '');
);
if (fieldname eq 'user_agent') then (
log_field_type = 'agent';
create_log_field_at_end(log_fields_at_end, 'web_browser', '');
create_log_field_at_end(log_fields_at_end, 'operating_system', '');
);
if (fieldname eq 'cs_referer') then (
log_field_type = 'url';
create_log_field_at_end(log_fields_at_end, 'search_engine', '');
create_log_field_at_end(log_fields_at_end, 'search_phrase', '');
);
# Create the log field
create_log_field(fieldname, log_field_type);
# If we're creating a profile, create the database fields too.
if (node_exists("volatile.creating_profile")) then (
# Handle time by creating date_time and derived database fields
if (fieldname eq "time") then (
create_database_field('date_time');
create_database_field('day_of_week');
create_database_field('hour_of_day');
); # if localtime
# Create derived field for agent
else if (fieldname eq "cs_user_agent") then (
create_database_field('operating_system');
create_database_field('web_browser');
);
# Create derived field for IP
else if (fieldname eq "cs_host") then (
create_database_field('cs_host');
create_database_field('location');
);
# Create derived fields for referrer
else if (fieldname eq "cs_referer") then (
create_database_field('search_engine');
create_database_field('search_phrase');
);
# Create derived file type field
else if (fieldname eq "cs_uri_stem") then (
create_database_field('file_type');
);
# Create derived file type field, and other %r fields
else if (fieldname eq "protocol") then (
create_database_field('operation');
create_database_field('cs_uri_stem');
create_database_field('file_type');
);
# Don't add a database field for numerical fields
# else if (subnode_exists('database.fields', fieldname)) then (
else if (subnode_exists(numerical_fields, fieldname)) then (
debug_message("Not adding numerical field: " . fieldname . "\n");
);
# Create a normal database field
else
create_database_field(fieldname);
); # if creating profile
); # while another field
# Create any final log fields
node lfae;
foreach lfae log_fields_at_end (
# echo("Final log field creation: " . node_name(lfae));
create_log_field(node_name(lfae), @lfae{"type"});
);
# Don't parse the #Fields line as a data line
'reject';
); # if #Fields
# Don't parse any other # lines as data lines
else if (starts_with(current_log_line(), '#')) then (
'reject';
);
`
# Get search engine and search phrase information from the referrer field (before it gets simplified).
log.parsing_filters.compute_se_sp = {
value = `
if (get_search_engine_info(cs_referer)) then (
search_engine = volatile.search_engine;
search_phrase = volatile.search_phrase;
);
`
requires_fields = {
cs_referer = true
# search_engine = true
# search_phrase = true
}
}
# Get web browser, operating system, web browser, and spider information from the user-agent field.
log.parsing_filters.derive_from_user_agent = {
value = `
get_user_agent_info(cs_user_agent);
web_browser = volatile.web_browser;
operating_system = volatile.operating_system;
#spider = volatile.spider;
`
requires_fields = {
cs_user_agent = true
}
}
# Log Filters
log.filters = {
simplify_referrer = {
label = "$lang_admin.log_filters.simplify_referrer_label"
comment = "$lang_admin.log_filters.simplify_referrer_comment"
value = "if (cs_referer eq '-') then cs_referer = '(no referrer)' else if (matches_regular_expression(cs_referer, '^([^:]+://[^/]+/)')) then cs_referer = $1 . '(omitted)'"
requires_fields = {
cs_referer = true
}
} # simplify_referrer
internal_referrer = {
label = "$lang_admin.log_filters.internal_referrer_label"
comment = "$lang_admin.log_filters.internal_referrer_comment"
value = "if (contains(referrer, 'mydomain.com/')) then referrer = '(internal referrer)';"
requires_fields = {
cs_referer = true
}
disabled = true
} # internal_referrer
not_authenticated = {
label = "$lang_admin.log_filters.not_authenticated_label"
comment = "$lang_admin.log_filters.not_authenticated_comment"
value = "if (user eq '-') then user = '(not authenticated)';"
requires_fields = {
user = true
}
} # not_authenticated
remove_query = {
label = "$lang_admin.log_filters.remove_query_label"
comment = "$lang_admin.log_filters.remove_query_comment"
value = "if (contains(cs_uri_stem, '?')) then cs_uri_stem = substr(cs_uri_stem, 0, index(cs_uri_stem, '?') + 1) . '(parameters)';"
requires_fields = {
cs_uri_stem = true
}
} # remove_query
categorize = {
label = "$lang_admin.log_filters.categorize_hits_label"
comment = "$lang_admin.log_filters.categorize_hits_comment"
value = `if (starts_with(sc_status, '4')) then (
errors = 1;
# if (sc_status eq '404') then (
# broken_links = 1;
# hit_type = "broken link";
# )
# else (
# hit_type = "error";
# )
)
#else if (!starts_with(spider, '(')) then (
# hit_type = "spider";
# spiders = 1;
#)
#else if (!starts_with(worm, '(')) then (
# hit_type = "worm";
# worms = 1;
#)
#else if (!starts_with(screen_dimensions, '(')) then (
# hit_type = "screen info";
# screen_info_hits = 1;
#)
else if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then (
# hit_type = "hit";
)
else (
# hit_type = "page view";
page_views = 1;
)`
requires_fields = {
sc_status = true
}
} # categorize
mark_entry = {
label = '$lang_admin.log_filters.mark_entry_label'
comment = '$lang_admin.log_filters.mark_entry_comment'
value = 'accesses = 1;'
} # mark_entry
# set_page_for_worm = {
# label = "$lang_admin.log_filters.set_page_for_worm_label"
# comment = "$lang_admin.log_filters.set_page_for_worm_comment"
# value = "if (worms == 1) then page = '(worm)';"
# } # set_page_for_worm
strip_non_page_views = {
label = '$lang_admin.log_filters.strip_non_page_views_label'
comment = '$lang_admin.log_filters.strip_non_page_views_comment'
value = "if (page_views == 0) then cs_uri_stem = substr(cs_uri_stem, 0, last_index(cs_uri_stem, '/') + 1) . '(nonpage)';"
requires_fields = {
cs_uri_stem = true
}
} # strip_non_page_views
} # log.filters
log.field_options = {
sessions_page_field = "cs_uri_stem"
sessions_visitor_id_field = "cs_host"
sessions_event_field = "page_views"
} # log.field_options
database.numerical_fields = {
accesses = {
default = false
requires_log_field = false
entries_field = true
} # accesses
page_views = {
default = true
requires_log_field = false
} # page_views
errors = {
requires_log_field = false
} # errors
# broken_links = {
# label = "$lang_stats.field_labels.broken_links"
# default = true
# requires_log_field = false
# type = "int"
# display_format_type = "integer"
# } # broken_links
# screen_info_hits = {
# label = "$lang_stats.field_labels.screen_info_hits"
# default = false
# requires_log_field = false
# type = "int"
# display_format_type = "integer"
# } # screen_info_hits
unique_client_ips = {
log_field = "cs_host"
type = "unique"
} # unique_client_ips
sc_bytes_content = {
type = "int"
integer_bits = 64
display_format_type = "bandwidth"
} # sc_bytes_content
} # database.numerical_fields
create_profile_wizard_options = {
# How the reports should be grouped in the report menu
report_groups = {
date_time_group = ""
hit_type = ""
content_group = {
cs_uri_stem = true
file_type = true
x_namespace = true
cs_host = true
}
users_group = {
user = true
}
visitor_demographics_group = {
hostname = true
domain_description = true
location = true
organization = true
isp = true
domain = true
authenticated_user = true
}
visitor_systems_group = {
screen_dimensions = true
screen_depth = true
web_browser = true
operating_system = true
}
referrer_group = {
referrer = true
referrer_description = true
search_engine = true
search_phrase = true
# search_phrase_by_search_engine = true
}
other_group = {
store_id = true
store_region = true
store_category = true
sc_status = true
worm = true
spider = true
server_domain = true
server_response = true
pragma_in = true
pragma_out = true
cache_control_in = true
cache_control_out = true
vary_out = true
x_cache_hit = true
protocol = true
operation = true
}
} # report_groups
snapons = {
# Attach a top_level_domain snapon
top_level_domain = {
snapon = "top_level_domain"
name = "top_level_domain"
label = "$lang_admin.snapons.top_level_domain.label"
parameters = {
url_field.parameter_value = "cs_uri_stem"
field_name = {
parameter_value = "$lang_admin.field_labels.top_level_domain"
final_node_name = "top_level_domain"
}
} # parameters
requires_log_fields = {
cs_uri_stem = true
}
} # top_level_domain
# Attach a gateway_reports snapon
gateway_reports = {
snapon = "gateway_reports"
name = "gateway_reports"
label = "$lang_admin.snapons.gateway_reports.label"
parameters = {
user_field.parameter_value = "cs_host"
have_category_field.parameter_value = false
# category_field.parameter_value = "category"
host_field.parameter_value = "top_level_domain"
page_views_field.parameter_value = "page_views"
bytes_in_field.parameter_value = "sc_bytes_content"
sort_by_field.parameter_value = "page_views"
} # parameters
} # gateway_reports
# Add the standard reports
add_standard_reports = {
name = "add_standard_reports"
label = "add_standard_reports"
snapon = "add_standard_reports"
} # add_standard_reports
} # snapons
} # create_profile_wizard_options
} # juniper_mfc