# # nginx_logformat_parser snapon # # This snapon implements nginx parsing functionality, based on the HttpLogModule log_format # ( http://wiki.nginx.org/HttpLogModule#log_format ). # nginx_logformat_parser = { label = "$lang_admin.snapons.nginx_parser.label" comment = "$lang_admin.snapons.nginx_parser.comment" config_snapon_category = "" version = "1.0" # 2013-03-23 - 1.0 - GMF - initial creation # 2013-06-12 - 1.1 - GMF - Fixed bug with parsing lines where URL is /; fixed bug with extracting referrer and agent fields parameters = { log_format = { parameter_value = "" validation_type = "string" form_element_label = "$lang_admin.snapons.nginx_parser.parameters.log_format.form_element_label" form_element_type = "text" form_element_width = "500" description = "" } # log_format } # parameters parameters_form = { group_1 = { description = "$lang_admin.snapons.nginx_parser.parameters_form.group_1.description" parameters = { log_format = true } # parameters } # group 1 } # parameters_form attach_operations = { configure_parsing = { type = "execute_expression" expression = ` string log_format = @profile{"create_profile_wizard_info"}{"snapons"}{"parser"}{"parameters"}{"log_format"}{"parameter_value"}; # This subroutine creates a database field subroutine(create_database_field(node profile, string fieldname), ( node database_field = profile{"database"}{"fields"}{fieldname}; )); # create_database_field() # This subroutine creates an aggregating database field subroutine(create_aggregating_database_field(node profile, string fieldname, string type, string display_format_type), ( node database_field = profile{"database"}{"fields"}{fieldname}; @database_field{"type"} = type; @database_field{"integer_bits"} = 64; # if ('lang_stats.field_labels_by_log_format.nginx_pattern'?{fieldname}) then # @database_field{"label"} = "{" . "=capitalize(expand(lang_stats.field_labels_by_log_format.nginx_pattern." . fieldname . "))=" . "}"; if ('lang_stats.field_labels'?{fieldname}) then @database_field{"label"} = "{" . "=capitalize(expand(lang_stats.field_labels." . fieldname . "))=" . "}"; else @database_field{"label"} = fieldname; @database_field{"display_format_type"} = display_format_type; @database_field{"aggregation_method"} = "sum"; )); # create_database_field() subroutine(create_log_field(node profile, string fieldname, string type), ( node logfield = profile{"log"}{"fields"}{fieldname}; @logfield{"type"} = type; )); ##=== create_log_field() ===## node log_fields_at_end = new_node(); subroutine(create_log_field_at_end(node log_fields_at_end, string fieldname, string type), ( log_fields_at_end{fieldname}{"type"} = type; )); # Delete any existing log fields delete_node(profile{"log"}{"fields"}); # If the pattern is the whole line, remove the log_format and name and quotes if (matches_regular_expression(log_format, "^log_format [^ ]+ '(.*)'; *$")) then log_format = $1; # If the log_format is in single quotes, remove them else if (matches_regular_expression(log_format, "^'(.*)' *$")) then log_format = $1; # Parse the string, building the regular expression from it, and the fields string regexp = "^"; string field_regexp; string field_name; string log_field_type; #string c; while (length(log_format) != 0) ( #for (int i = 0; i < length(log_format); i++) ( # echo("i: " . i); # c = substr(log_format, i, 1); # echo("c: " . c); #echo('log_format="' . log_format . '"'); # Handle '', which is a no-op if (matches_regular_expression(log_format, "^''(.*)$")) then log_format = $1; # Handle $ variables else if (matches_regular_expression(log_format, '^[$]([a-z_]+)([^a-z_].*)$')) then ( # if (matches_regular_expression(log_format, '[$]([^] "]*)([]" ].*)$') or # matches_regular_expression(log_format, '[$]([^ ]*)$')) then ( field_name = $1; log_format = $2; field_regexp = '([^ "]+)'; log_field_type = "flat"; #echo('field_name: "' . field_name . '"'); #echo('log_format remainder: "' . log_format . '"'); # For time_local, call it date_time and ignore the timezone if (field_name eq 'time_local') then ( field_regexp = '([^ ]+) [^ ]+'; field_name = 'date_time'; log_field_type = 'date_time'; ); # Handle a "request", which is something like "GET /index.html HTTP/1.1" (three fields) else if (field_name eq "request") then ( field_regexp = '([A-Z]+) (/[^ ]*) ([^ ]+)'; create_log_field(profile, 'method', 'flat'); create_log_field(profile, 'cs_uri_stem', 'flat'); # create_log_field('protocol', 'flat'); field_name = 'protocol'; ); # Allow spaces in http_referrer and http_user_agent if (field_name eq 'http_referrer') then field_regexp = '([^"]+)'; if (field_name eq 'http_user_agent') then field_regexp = '([^"]+)'; log_format = $2; # Add this field to the regular expression regexp .= field_regexp; #echo("adding '" . field_regexp . "' to regexp: now regexp=" . regexp); # Make sure the field name is unique if (profile{'log'}{'fields'}?{field_name}) then ( int field_name_id = 2; while (profile{'log'}{'fields'}?{field_name . field_name_id}) field_name_id++; field_name .= field_name_id; #echo("Chose unique field name: " . field_name); ); # if field name exists create_log_field(profile, field_name, log_field_type); if (field_name eq "date_time") then ( # create_log_field_at_end(log_fields_at_end, 'date_time', 'date_time'); create_log_field_at_end(log_fields_at_end, 'day_of_week', 'flat'); create_log_field_at_end(log_fields_at_end, 'hour_of_day', 'flat'); create_database_field(profile, 'date_time'); create_database_field(profile, 'day_of_week'); create_database_field(profile, 'hour_of_day'); ); # Create derived field for user_agent # else if (field_name eq "user_agent") then ( # create_log_field(profile, 'operating_system', 'flat'); # create_database_field(profile, 'operating_system'); # create_log_field_at_end(log_fields_at_end, 'web_browser', 'flat'); # create_database_field(profile, 'web_browser'); # ); # Create derived field for remote_host # else if (field_name eq "remote_host") then ( # create_database_field(profile, 'remote_host'); # create_log_field_at_end(log_fields_at_end, 'location', 'flat'); # create_log_field_at_end(log_fields_at_end, 'country', 'flat'); # create_log_field_at_end(log_fields_at_end, 'region', 'flat'); # create_log_field_at_end(log_fields_at_end, 'city', 'flat'); # create_database_field(profile, 'location'); # ); # Create derived fields for referrer # else if (field_name eq "referrer") then ( # create_database_field(profile, 'referrer'); # create_log_field_at_end(log_fields_at_end, 'search_engine', 'flat'); # create_database_field(profile, 'search_engine'); # create_log_field_at_end(log_fields_at_end, 'search_phrase', 'flat'); # create_database_field(profile, 'search_phrase'); # ); # Create derived file type field # else if (field_name eq "url") then ( # create_database_field(profile, 'url'); # create_log_field_at_end(log_fields_at_end, 'file_type', 'flat'); # create_database_field(profile, 'file_type'); # ); # Handle aggregating fields (by else if (field_name eq "body_bytes_sent") then create_aggregating_database_field(profile, field_name, "int", "bandwidth"); # This is created by the web_server_package snapon else if (field_name eq "bytes_sent") then (); # create_aggregating_database_field(profile, field_name, "int", "bandwidth"); else if (field_name eq "request_time") then create_aggregating_database_field(profile, field_name, "int", "duration_millis"); # Don't put the full agent in the database else if (field_name eq "http_user_agent") then (); # Create a normal database field else create_database_field(profile, field_name); # ); # if matches expected format # # else # error("nginx log_format parsing error"); ); # if $ # If it's not a variable, add it literally to the regular expression. else if (matches_regular_expression(log_format, '^(.)(.*)$')) then ( if (($1 eq '[') or ($1 eq ']')) then regexp .= ('[' . $1 . ']'); else regexp .= $1; #echo("adding '$1' to regexp: now regexp=" . regexp); log_format = $2; ); ); # for i #regexp .= "$"; # Install the parsing_regular_expression #echo("Final regular expression: " . regexp); @profile{"log"}{"format"}{"parsing_regular_expression"} = regexp; #echo("numerical fields: " . node_as_string(profile{"database"}{"numerical_fields"})); # Create any final log fields node lfae; foreach lfae log_fields_at_end ( #echo("Final log field creation: " . node_name(lfae)); create_log_field(profile, node_name(lfae), @lfae{"type"}); ); create_log_field(profile, "events", "flat"); #echo("Final log fields: " . node_as_string(profile{"log"}{"fields"})); #echo("Final database fields: " . node_as_string(profile{"database"}{"fields"})); ` } # configure_parsing } # attach_operations } # nginx_logformat_parser