# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. cloudfront_download = { plugin_version = "1.0" info.1.manfacturer = "Amazon" info.1.device = "Cloudfront Download" info.1.version = "" # 2012-09-14 - MSG - 1.0 - Initial plug-in based on Cloudfront Streaming # The name of the log format log.format.format_label = "Amazon Cloudfront Download Log Format" log.miscellaneous.log_data_type = "generic_w3c" log.miscellaneous.log_format_type = "web_server" # The log is in this format if any of the first ten lines match this regular expression log.format.autodetect_regular_expression = "^#Fields: .*x-edge-location" # Literal apostrophes can appear in field values, and should not be treated as quotes log.format.treat_apostrophes_as_quotes = false # Expire entries after 100,000 lines have gone by (during database filtering) log.format.collected_entry_lifespan = "100000" # Use single-process builds for profiles based on this plug-in, because the filter code requires in-order log data to get correct results. # Not any more--it uses a database filter now, and pre-sorts the data chronologically. # log.processing.distributed.method = "1" # This handles #Fields lines, and creates log and database fields from them log.filter_preprocessor = ` if (matches_regular_expression(current_log_line(), '^#Fields: (.*)$')) then ( string fields = $1; string fieldname; v.logfieldindex = 1; string numerical_fields = "profiles." . internal.profile_name . ".database.numerical_fields"; # This subroutine creates a database field subroutine(create_database_field(string fieldname), ( #echo("create_database_field: " . fieldname); debug_message("create_database_field(" . fieldname . ")\n"); string databasefieldpath = "profiles." . internal.profile_name . ".database.fields." . fieldname; (databasefieldpath . "") = ""; node databasefield = databasefieldpath; # set_subnode_value(databasefield, "label", fieldname); databasefield; )); subroutine(create_log_field(string fieldname, string type, bool withindex), ( debug_message("create_log_field(" . fieldname . "; type=" . type . ")\n"); string logfieldpath = "profiles." . internal.profile_name . ".log.fields." . fieldname; (logfieldpath . "") = ""; node logfield = logfieldpath; # set_subnode_value(logfield, "label", fieldname); if (withindex) then ( set_subnode_value(logfield, "index", v.logfieldindex); v.logfieldindex++; ); set_subnode_value(logfield, "subindex", 0); if (type ne '') then set_subnode_value(logfield, "type", type); logfield; )); # Assume there isn't a localtime field until we see one. v.parse_localtime = false; # Extract the fields on at a time while (matches_regular_expression(fields, '^([^ ]+) (.*)$')) ( string unconverted_fieldname = $1; fields = $2; # Clean up the field name fieldname = ''; for (int i = 0; i < length(unconverted_fieldname); i++) ( string c = lowercase(substr(unconverted_fieldname, i, 1)); if (!matches_regular_expression(c, '^[a-z0-9]$')) then c = '_'; fieldname .= c; ); while (matches_regular_expression(fieldname, '^(.*)_$')) fieldname = $1; # Get the log field type string log_field_type = ''; if (fieldname eq 'cs_uri_path') then ( log_field_type = 'page'; ("profiles." . internal.profile_name . ".log.fields.url.type") = 'flat'; ); if (fieldname eq 'cs_user_agent') then log_field_type = 'agent'; if (fieldname eq 'cs_referer') then log_field_type = 'url'; # Create the log field create_log_field(fieldname, log_field_type, true); if (fieldname eq "localtime") then v.parse_localtime = true; # If we're creating a profile, create the database fields too. if (node_exists("volatile.creating_profile")) then ( # Handle localtime by creating date_time and derived database fields if (fieldname eq "localtime") then ( create_log_field('date', '', false); create_log_field('time', '', false); create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); # ("profiles." . internal.profile_name . ".log.parsing_filters.parse_localtime.disabled") = false; ); # if localtime # Handle date by creating date_time and derived database fields else if (fieldname eq "date") then ( create_log_field('localtime', '', false); # placeholder - 7/Nov/2006 - KBB create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); # ("profiles." . internal.profile_name . ".log.parsing_filters.parse_localtime.disabled") = true; ); # if date else if (fieldname eq "time") then ( create_database_field('date_time'); create_database_field('day_of_week'); create_database_field('hour_of_day'); # ("profiles." . internal.profile_name . ".log.parsing_filters.parse_localtime.disabled") = true; ); # if time # Create derived field for agent else if (fieldname eq "cs_user_agent") then ( create_database_field('operating_system'); create_database_field('web_browser'); ); # Create derived field for agent else if (fieldname eq "c_ip") then ( create_database_field('c_ip'); create_database_field('location'); ); # Create derived fields for referrer else if (fieldname eq "cs_referer") then ( create_database_field('search_engine'); create_database_field('search_phrase'); ); # Create derived file type field else if (fieldname eq "cs_uri_path") then ( create_database_field('file_type'); ); # Don't add a database field for numerical fields # else if (subnode_exists('database.fields', fieldname)) then ( else if (subnode_exists(numerical_fields, fieldname)) then ( debug_message("Not adding numerical field: " . fieldname . "\n"); ); # Create a normal database field else create_database_field(fieldname); ); # if creating profile ); # while another field # Don't parse the #Fields line as a data line 'reject'; ); # if #Fields # Don't parse any other # lines as data lines else if (starts_with(current_log_line(), '#')) then ( 'reject'; ); ` log.format.field_separator = " " log.fields = { saved_cs_uri_stem = "" saved_vhost = "" location = "" session_bytes = "" connections = "" plays = "" } # From http://livedocs.adobe.com/fms/2/docs/wwhelp/wwhimpl/common/html/wwhelp.htm?context=LiveDocs_Parts&file=00000181.html # cs-bytes - This field shows the number of bytes transferred from the client to the server. # This information can be used to bill customers per session. To calculate the bandwidth # usage per session, subtract the 'cs-bytes' in the 'connect' event from the 'cs-bytes' in # the 'disconnect' event. # sc-bytes - This field shows the number of bytes transferred from the server to the client. # This information can be used to bill customers per session. To calculate the bandwidth # usage per session, subtract the 'sc-bytes' in the 'connect' event by the 'sc-bytes' in # the 'disconnect' event # cs-stream-bytes - This field shows the number of bytes transferred from the client to the server # per stream. To calculate the bandwidth usage per stream, subtract the 'cs-stream-bytes' # in the 'publish' event by the 'cs-stream-bytes' in the 'unpublish' event. # sc-stream-bytes - This field shows the number of bytes transferred from the server to the client per # stream. To calculate the bandwidth usage per stream, subtract the 'sc-stream-bytes' # in the 'play' event by the 'sc-stream-bytes' in the 'stop' event. log.filter_initialization = ` #v.last_cs_bytes = ""; #v.last_sc_bytes = ""; #v.last_cs_stream_bytes = ""; #v.last_sc_stream_bytes = ""; #float cs_bytes_for_db; #float sc_bytes_for_db; #float cs_stream_bytes_for_db; #float sc_stream_bytes_for_db; string visitor_id; # For sessions int login_date_time_epoc; int logout_date_time_epoc; string logout_date_time; string session_id; #int session_id_counter = 0; int date_offset_seconds = log.processing.date_offset * (60*60); ` log.parsing_filters = { # Logs will have c_ip or c_client_id or both. Set visitor_id, used in other filters # based on whichever is available. c_client_id will be used if both exist. set_visitor_to_c_ip = { value = `visitor_id = replace_all(c_ip, '.', '_');` requires_fields = { c_ip = true } } # set_visitor_to_c_ip # Keep this filter 2nd. c_client_id is the more precise id. set_visitor_to_c_client_id = { value = `visitor_id = c_client_id;` requires_fields = { c_client_id = true } } # set_visitor_to_c_client_id # # Fix the cs_bytes field by subtracting this value from the previous one (it's a running total, # # which otherwise will be aggregated to give too-large numbers). # fix_cs_bytes = { # value = ` #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # #session disconnect 2008-06-13 16:42:59 4882 222.22.222.2 -1746602884 73147 - - - - - # # # If there was a previous value, use the difference in the database entry # v.last_cs_bytes = get_collected_field(visitor_id, 'last_cs_bytes'); # #if (subnode_exists('v.last_cs_bytes', visitor_id)) then ( # if (v.last_cs_bytes ne '') then ( # # # If value is negative due to logging bug (above), then set it to the previous # # value as if there have been no bytes. The result is the same as setting it # # to zero if the x-event is disconnect as it is in all examples seen so far. # if (cs_bytes < 0) then cs_bytes = v.last_cs_bytes; # # cs_bytes_for_db = 0.0 + cs_bytes - v.last_cs_bytes; # if (cs_bytes_for_db < 0) then ( # cs_bytes_for_db = cs_bytes; # ); # ); # else ( # if (cs_bytes < 0) then cs_bytes = 0; # This compensates for a logging bug. # cs_bytes_for_db = cs_bytes; # ); # # # Remember the current cs_bytes value for a later event for this visitor # if (x_event eq "disconnect") then ( # #set_subnode_value('v.last_cs_bytes', visitor_id, 0); # set_collected_field(visitor_id, 'last_cs_bytes', 0); # ); # else ( # #set_subnode_value('v.last_cs_bytes', visitor_id, cs_bytes); # set_collected_field(visitor_id, 'last_cs_bytes', cs_bytes); # ); # # # In the database, the cs_bytes field should be the difference # cs_bytes = cs_bytes_for_db; # #); # if visitor_id #else ( # cs_bytes = 0; #); #` # requires_fields = { # cs_bytes = true # x_event = true # } # } # fix_cs_bytes # 2011-12-19 - GMF - Moved this to a snapon, so the input log data doesn't need to be sorted. # # Session bytes, intended to emulate Amazon's own byte reporting, is the bytes at disconnect, minus the bytes at connect # compute_session_bytes = { # value = ` #session_bytes = 0; #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # if (x_event eq "connect") then ( # set_collected_field(visitor_id, 'connect_bytes', sc_bytes); # ); # # else if (x_event eq "disconnect") then # session_bytes = sc_bytes - get_collected_field(visitor_id, 'connect_bytes'); # #); ##echo("session_bytes=" . session_bytes); #` # requires_fields = { # sc_bytes = true # x_event = true # } # } # compute_session_bytes # Fix the sc_stream_bytes field by subtracting this value from the previous one (it's a running total, # which otherwise will be aggregated to give too-large numbers). # fix_sc_stream_bytes = { # value = ` #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # # If there was a previous value, use the difference in the database entry # v.last_sc_stream_bytes = get_collected_field(visitor_id, 'last_sc_stream_bytes'); # #if (subnode_exists('v.last_sc_stream_bytes', visitor_id)) then ( # if (v.last_sc_stream_bytes ne '') then ( # # # Compensates for a logging bug - seen only with cs-bytes so far. # if (sc_stream_bytes < 0) then sc_stream_bytes = v.last_sc_stream_bytes; # # #sc_stream_bytes_for_db = 0.0 + sc_stream_bytes - node_value(subnode_by_name('v.last_sc_stream_bytes', visitor_id)); # sc_stream_bytes_for_db = 0.0 + sc_stream_bytes - v.last_sc_stream_bytes; # if (sc_stream_bytes_for_db < 0) then ( # sc_stream_bytes_for_db = sc_stream_bytes; # ); # ); # else ( # # Compensates for a logging bug - seen only with cs-bytes so far. # if (sc_stream_bytes < 0) then sc_stream_bytes = 0; # sc_stream_bytes_for_db = sc_stream_bytes; # ); # # # Remember the current sc_stream_bytes value for a later event for this visitor # if (x_event eq "stop") then ( # #set_subnode_value('v.last_sc_stream_bytes', visitor_id, 0); # set_collected_field(visitor_id, 'last_sc_stream_bytes', 0); # ); # else ( # #set_subnode_value('v.last_sc_stream_bytes', visitor_id, sc_stream_bytes); # set_collected_field(visitor_id, 'last_sc_stream_bytes', sc_stream_bytes); # ); # # # In the database, the sc_stream_bytes field should be the difference # sc_stream_bytes = sc_stream_bytes_for_db; # #); # if visitor_id #else ( # sc_stream_bytes = 0; #); #` # requires_fields = { # sc_stream_bytes = true # x_event = true # } # } # fix_sc_stream_bytes # Fix the cs_stream_bytes field by subtracting this value from the previous one (it's a running total, # which otherwise will be aggregated to give too-large numbers). # fix_cs_stream_bytes = { # value = ` #if (visitor_id ne "(empty)" and visitor_id ne "-") then ( # # # If there was a previous value, use the difference in the database entry # v.last_cs_stream_bytes = get_collected_field(visitor_id, 'last_cs_stream_bytes'); # #if (subnode_exists('v.last_cs_stream_bytes', visitor_id)) then ( # if (v.last_cs_stream_bytes ne '') then ( # # # Compensates for a logging bug - seen only with cs-bytes so far. # if (cs_stream_bytes < 0) then cs_stream_bytes = v.last_cs_stream_bytes; # # #cs_stream_bytes_for_db = 0.0 + cs_stream_bytes - node_value(subnode_by_name('v.last_cs_stream_bytes', visitor_id)); # cs_stream_bytes_for_db = 0.0 + cs_stream_bytes - v.last_cs_stream_bytes; # if (cs_stream_bytes_for_db < 0) then ( # cs_stream_bytes_for_db = cs_stream_bytes; # ); # ); # else ( # # Compensates for a logging bug - seen only with cs-bytes so far. # if (cs_stream_bytes < 0) then cs_stream_bytes = 0; # cs_stream_bytes_for_db = cs_stream_bytes; # ); # # # Remember the current cs_stream_bytes value for a later event for this visitor # if (x_event eq "unpublish") then ( # #set_subnode_value('v.last_cs_stream_bytes', visitor_id, 0); # set_collected_field(visitor_id, 'last_cs_stream_bytes', 0); # ); # else ( # #set_subnode_value('v.last_cs_stream_bytes', visitor_id, cs_stream_bytes); # set_collected_field(visitor_id, 'last_cs_stream_bytes', cs_stream_bytes); # ); # # # In the database, the cs_stream_bytes field should be the difference # cs_stream_bytes = cs_stream_bytes_for_db; # #); # if visitor_id #else ( # cs_stream_bytes = 0; #); #` # requires_fields = { # cs_stream_bytes = true # x_event = true # } # } # fix_cs_stream_bytes # set_duration = { # value = ` #if ((x_event eq "stop") and (x_category eq "stream")) then # stream_duration = x_duration; #else if ((x_event eq "disconnect") and (x_category eq "session")) then # session_duration = x_duration; #` # requires_fields = { # x_duration = true # x_event = true # x_category = true # stream_duration = true # } # } # set_duration # set x-sname to (empty) when '-' for session connect/disconnect lines # so it does not appear in the stream name report as it is only session traffic set_stream_name = { value = `if (x_sname eq '-') then x_sname = '(empty)';` requires_fields = { x_sname = true } # requires_fields } # set_stream_name # count_connections = { # value = "if (x_event eq 'connect') then connections = 1" # } # count_connection # # count_plays = { # value = "if (x_event eq 'play') then plays = 1" # } # count_connection } # Log.parsing_filters log.filters = { mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'accesses = 1;' } # mark_entry } # log.filters database.numerical_fields = { accesses = { default = true requires_log_field = false } unique_client_ips = { default = false requires_log_field = true log_field = "c_ip" type = "unique" } # visitors sc_bytes = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } } # database.numerical_fields create_profile_wizard_options = { # How the reports should be grouped in the report menu report_groups = { date_time_group = "" content_group = { cs_uri_stem = true cs_uri_query = true file_type = true } # content_group client_group = { c_ip = true cs_referrer = true cs_user_agent = true domain = true isp = true location = true organization = true } # client_group other_group = { } # other_group } # report_groups } # create_profile_wizard_options } # cloudfront_download