# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. iissmtpw3_c = { plugin_version = "1.4.2" info.1.manufacturer = "Microsoft" info.1.device = "IIS SMTP W3C" info.1.version.1 = "6.0" # 2006-10-02 - GMF - 1.0beta - Initial implementation # 2006-11-09 - KBB - 1.1beta - Added support for a format variant. Added operation and # server_response fields and connect/disconnect counts. An entry is now accepted on server # response for new format and on TO/FROM for old format. # 2007-04-26 - KBB - 1.2beta - Added support for bytes (cs+sc+total). Server response is now # taken from the sc_status field if it is non-zero, since no server response line follows. # Added separate accept for each TO operation that precedes a server response. # Maintained compatibility with format with no sc_status. # 2007-09-11 - KBB - 1.2 - Renumbered per new beta policy. # 2008-01-31 - GMF - 1.3 - Added support for combined-format logs, without sc-status field. # 2010-05-14 - GMF - 1.3.1 - Improved recovery for status_wait, when end of multi-line DATA is not found # 2010-10-06 - 1..3.2 - MSG - Edited info lines. # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code # 2010-12-06 - 1.3.4 - gas - added support for log witout cs_uri_query by changing the # volatile.log_data_line regex and the required_fields values for # parse_method and parse_from_to # 2011-03-02 - 1.3.5 - GMF - Made email address fields non-hierarchical by default # 2011-03-02 - 1.3.6 - GMF - Added field associations for recipient, to clean up pointless fields (and "(empty)") # 2011-03-28 - 1.4 - GMF - Improved tracking of response codes; added messages_failed. # 2011-04-05 - 1.4.1 - GMF - Added extraction of response codes on lines where the status code is on the same line as MAIL FROM or RCPT TO. # 2011-08-30 - 1.4.2 - GMF - Fixed reporting of connect/disconnect in logs with response lines; removed per-line-byte fields # The name of the log format log.format.format_label = "IIS SMTP W3C Log Format" log.miscellaneous.log_data_type = "mail_server" log.miscellaneous.log_format_type = "mail_server" # This is IIS SMTP format if it has an IIS Software line, and a +FROM or FROM line. log.format.autodetect_lines = 100 log.format.autodetect_expression = ` if (matches_regular_expression(volatile.log_data_line, '^#Software: Microsoft Internet Information Services')) then ( v.iis = true; ); if (node_exists("v.iis") and matches_regular_expression(volatile.log_data_line, '([+ ]FROM:| RCPT )')) then true; else false; ` # W3C uses double quotes only, but apostrophes may occur in email addresses. log.format.treat_apostrophes_as_quotes = false # Accept collected entries after they're not used for 1000 lines log.format.collected_entry_lifespan = 1000 # 2008-01-31 - GMF - This has to be true, because otherwise all collected entries # will be accepted at the end, so a file containing only # MAIL FROM and RCPT TO lines will accept additional entries at # the end of processing. log.format.discard_expired_entries = true # Log fields log.fields = { sender = { # 2011-03-02 - 1.3.5 - GMF - Made email address fields non-hierarchical by default # type = "hierarchical" # hierarchy_dividers = "@" # left_to_right = false # leading_divider = "false" } # sender recipient = { # 2011-03-02 - 1.3.5 - GMF - Made email address fields non-hierarchical by default # type = "hierarchical" # hierarchy_dividers = "@" # left_to_right = false # leading_divider = "false" } # recipient operation = "" server_response = "" error_message = "" # numeric messages_queued = "" messages_delivered = "" messages_failed = "" bytes_queued = "" bytes_delivered = "" connections = "" disconnections = "" events = "" # total_bytes = "" already_accepted_messages_queued = "" already_accepted_messages_delivered = "" last_request = "" resets = "" } # log.fields log.filter_initialization = ` string status = ""; # For use in filter where sc_status not required bool status_wait = false; v.size = ""; ` # Log Parsing Filters # Please note that filters are separate because of the need to use requires_field, # but the order of the filters is important, so be careful in changing it. log.parsing_filters = { # This is done for all lines first because some lines are broken # by email messages and we need to accept on a line without a date. set_date = { value = ` if (!status_wait) then ( # Bad datetime values in this section. set_collected_field(c_ip, 'date', date); set_collected_field(c_ip, 'time', time); set_collected_field(c_ip, 'c_ip', c_ip); ); ` } # set_date # Capture the fact that the format has the sc_status field. check_status = { requires_fields = { sc_status = true } value = ` status = sc_status; ` } # check_status parse_method = { requires_fields = { cs_method = true cs_uri_query = true } value = ` if (!status_wait) then ( # The regexps wouldn't match anyway, so this is to save time. # This is a server response line. # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code. Added detection of RSET. Accepting only on 2xx code. # 2010-10-26 - GMF - Modified this to accept only on status code 2xx if ((cs_method eq '-') and matches_regular_expression(cs_uri_query, '^([0-9][0-9][0-9])(.*)$')) then ( v.server_response = $1; v.remainder = $2; set_collected_field(c_ip, 'server_response', v.server_response); # If this is in response to a MAIL FROM line, we're queuing the message now if (get_collected_field(c_ip, 'operation') eq "MAIL FROM") then ( set_collected_field(c_ip, 'messages_failed', 0); set_collected_field(c_ip, 'messages_queued', 1); set_collected_field(c_ip, 'messages_delivered', 0); set_collected_field(c_ip, 'recipient', ""); set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); ); # If response to MAIL FROM # If this is in response to a RCPT TO line, we're sending the message now. else if (get_collected_field(c_ip, 'operation') eq "RCPT TO") then ( if (matches_regular_expression(v.server_response, "^2")) then ( set_collected_field(c_ip, 'messages_failed', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_delivered', 1); v.recipients = get_collected_field(c_ip, 'recipient'); while (matches_regular_expression(v.recipients, '^([^*]+)\\\\*(.*)$')) ( v.recipients = $2; set_collected_field(c_ip, 'recipient', $1); set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); ); # 2008-01-31 - GMF - Don't allow more than one connection per HELO (or disconnection per QUIT), even if there are multiple server response lines. set_collected_field(c_ip, 'connections', 0); set_collected_field(c_ip, 'disconnections', 0); set_collected_field(c_ip, 'recipient', v.recipients); # last one or only one set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); # set_collected_field(c_ip, 'last_request', ""); ); else if (matches_regular_expression(v.server_response, "^[45]")) then ( set_collected_field(c_ip, 'messages_delivered', 0); set_collected_field(c_ip, 'messages_failed', 1); set_collected_field(c_ip, 'server_response', v.server_response); set_collected_field(c_ip, 'error_message', v.remainder); accept_collected_entry(c_ip, false); # set_collected_field(c_ip, 'last_request', ""); ); # If it's not 2xx or 4xx or 5xx, maybe it's something like 354 "go ahead"; ignore it else ( ); ); # if this is response to RCPT TO # If this is in response to a QUIT, just accept a disconnect event if (get_collected_field(c_ip, 'operation') eq "QUIT") then ( accept_collected_entry(c_ip, false); ); ); # if server response line # Handle QUIT lines else if (matches_regular_expression(cs_method, '[Qq][Uu][Ii][tT]')) then ( set_collected_field(c_ip, 'messages_failed', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_delivered', 0); set_collected_field(c_ip, 'disconnections', 1); set_collected_field(c_ip, 'sender', ''); set_collected_field(c_ip, 'recipient', ''); set_collected_field(c_ip, 'operation', 'QUIT'); ); # if QUIT # Handle RSET lines else if (matches_regular_expression(cs_method, '[Rr][Ss][Ee][Tt]')) then ( set_collected_field(c_ip, 'resets', 1); set_collected_field(c_ip, 'bytes_delivered', ""); set_collected_field(c_ip, 'bytes_queued', ""); set_collected_field(c_ip, 'messages_delivered', ""); set_collected_field(c_ip, 'messages_queued', ""); set_collected_field(c_ip, 'messages_failed', ""); set_collected_field(c_ip, 'operation', 'RSET'); ); # if RSET # Handle EHLO/HELO lines else if (matches_regular_expression(cs_method, '([hH][eE][lL][oO]|[eE][hH][lL][oO])')) then ( set_collected_field(c_ip, 'connections', 1); set_collected_field(c_ip, 'operation', uppercase($1)); # This is the beginning of a connection, so clear all accumulated fields. set_collected_field(c_ip, 'bytes_delivered', ''); set_collected_field(c_ip, 'bytes_queued', ''); set_collected_field(c_ip, 'cs_bytes', ''); set_collected_field(c_ip, 'disconnections', ''); set_collected_field(c_ip, 'messages_delivered', ''); set_collected_field(c_ip, 'messages_queued', ''); set_collected_field(c_ip, 'recipient', ''); set_collected_field(c_ip, 'sc_bytes', ''); set_collected_field(c_ip, 'sender', ''); set_collected_field(c_ip, 'server_response', ''); set_collected_field(c_ip, 'size', ''); # set_collected_field(c_ip, 'total_bytes', ''); ); # if HELO/EHLO # 2008-01-31 - GMF - Do nothing for RCPT (to) or MAIL (from) lines; in particular, don't clear all the fields like we do for unknown cs_methods, below. else if (matches_regular_expression(cs_method, '^[Rr][Cc][Pp][Tt]$')) then ( ); else if (matches_regular_expression(cs_method, '^[Mm][Aa][Ii][Ll]$')) then ( ); # 2008-01-31 - GMF - If it's some other operation, remember what it is, so we can associated the server responses with it. else ( set_collected_field(c_ip, 'operation', cs_method); # 2008-01-31 - GMF - This isn't a standard event (RCPT TO, EHLO, MAIL FROM, etc.), so none of the numerical fields apply--clear them. set_collected_field(c_ip, 'bytes_delivered', ''); set_collected_field(c_ip, 'bytes_queued', ''); set_collected_field(c_ip, 'cs_bytes', ''); set_collected_field(c_ip, 'disconnections', ''); set_collected_field(c_ip, 'messages_delivered', ''); set_collected_field(c_ip, 'messages_queued', ''); # set_collected_field(c_ip, 'recipient', ''); set_collected_field(c_ip, 'sc_bytes', ''); # set_collected_field(c_ip, 'sender', ''); set_collected_field(c_ip, 'server_response', ''); set_collected_field(c_ip, 'size', ''); # set_collected_field(c_ip, 'total_bytes', ''); ); # else ); # if !status_wait ` } # parse_method # Do these methods separately since more fields are required. parse_data = { requires_fields = { cs_method = true cs_bytes = true sc_bytes = true } value = ` # Don't check for status_wait, since these lines will match. # Handle DATA lines #2007-03-24 03:37:13 199.9.99.99 abc.xyz.com.do SMTPSVC1 MAILER1 199.99.199.79 0 DATA - +<001601c76da4$1d75ec00$07755854@xyz> 250 0 124 13549 31359 SMTP - - - - if (matches_regular_expression(cs_method, '([bB][dD][aA][tT]|[aD][aA][tT][aA])')) then ( set_collected_field(c_ip, 'operation', uppercase($1)); set_collected_field(c_ip, 'cs_bytes', cs_bytes); set_collected_field(c_ip, 'sc_bytes', sc_bytes); # set_collected_field(c_ip, 'total_bytes', 0.0 + cs_bytes + sc_bytes); ); # if DATA/BDAT ` } # parse_data parse_from_to = { requires_fields = { cs_uri_query = true } value = ` if (!status_wait) then ( # The regexps wouldn't match anyway, so this is to save time. # Extract server response if (matches_regular_expression(sc_status, '^([0-9][0-9][0-9])$')) then set_collected_field(c_ip, 'server_response', $1); # Handle FROM lines if (matches_regular_expression(cs_uri_query, '^([+]*)[Ff][Rr][Oo][Mm]: *(.*)$')) then ( set_collected_field(c_ip, 'operation', 'MAIL FROM'); # Get the size, if any v.optional_plus = $1; v.sender = $2; v.size = 0; if (matches_regular_expression(v.sender, '^(.*).[Ss][Ii][Zz][Ee]=([0-9]+)')) then ( v.sender = $1; v.size = $2; ); # Remove <> from the address #if (matches_regular_expression(v.sender, '^<(.*)>$')) then # There may be a + before < and perhaps info after > if (matches_regular_expression(v.sender, '<(.*)>')) then v.sender = $1; set_collected_field(c_ip, 'recipient', ''); # Probably only needed for no sc_status case. set_collected_field(c_ip, 'sender', v.sender); set_collected_field(c_ip, 'size', v.size); set_collected_field(c_ip, 'messages_queued', 1); set_collected_field(c_ip, 'messages_delivered', 0); set_collected_field(c_ip, 'bytes_queued', v.size); set_collected_field(c_ip, 'bytes_delivered', 0); # 2011-08-30 - GMF - Setting connections=0 here overwrites the connections=1 we set in the EHLO line, if there's a response line to the EHLO. If we do that, we'll never count any connections on logs with separate response lines for EHLO. # set_collected_field(c_ip, 'connections', 0); # 2008-01-31 - GMF - Oddly, in all examples we see, a leading + on the cs_uri_query field indicates that there # will *not* be a following status code line, so we have to accept it here. if (v.optional_plus eq "+") then ( set_collected_field(c_ip, 'events', 1); time = get_collected_field(c_ip, 'time'); accept_collected_entry(c_ip, true); # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code. Added detection of RSET. Accepting only on 2xx code. # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code # See already_accepted_messages_queued comment set_collected_field(c_ip, 'already_accepted_messages_queued', 1); ); ); # if FROM # Handle TO lines if (matches_regular_expression(cs_uri_query, '^([+]*)[Tt][Oo]: *(.*)$')) then ( set_collected_field(c_ip, 'operation', 'RCPT TO'); v.recipients = get_collected_field(c_ip, 'recipient'); v.optional_plus = $1; v.recipient = $2; # Remove <> from the address #if (matches_regular_expression(v.recipient, '^<(.*)>$')) then # There may be a + before < and a + and status info after > if (matches_regular_expression(v.recipient, '<(.*)>')) then v.recipient = $1; # Extract server response if (matches_regular_expression(v.recipient, '^[^ ]+ ([0-9][0-9][0-9]) ')) then set_collected_field(c_ip, 'server_response', $1); set_collected_field(c_ip, 'recipient', v.recipient); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_delivered', 1); set_collected_field(c_ip, 'bytes_queued', 0); set_collected_field(c_ip, 'bytes_delivered', v.size); set_collected_field(c_ip, 'connections', 0); # 2008-01-31 - GMF - Oddly, in all examples we see, a leading + on the cs_uri_query field indicates that there # will *not* be a following status code line, so we have to accept it here. if (v.optional_plus eq "+") then ( set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code. Added detection of RSET. Accepting only on 2xx code. # 2010-10-26 - GMF - But sometimes, we do get a response code in the same line. So, remember that we already accepted this. # 2010-09-30 05:14:28 1.2.3.4 ABC01 SMTPSVC1 SPAMFILTER1 9.8.7.6 0 RCPT - +TO: 250 0 37 34 0 SMTP - - - - # 2010-09-30 05:14:28 1.2.3.4 ABC01 SMTPSVC1 SPAMFILTER1 9.8.7.6 0 DATA - +<930201080723PM55db5f2948f24e73b4c92a01487eb75d@somewhere.com+> 250 0 157 3507 15 SMTP - - - - set_collected_field(c_ip, 'already_accepted_messages_delivered', 1); ); # If this won't be accepted based on sc_status below, gather # until server_response found. else if (status eq '0' and v.recipients ne '' and v.recipients ne '(empty)') then ( # This is why | isn't a good separator! #2007-03-24 03:47:23 18.188.8.88 OutboundConnectionCommand SMTPSVC1 MAILER1 - 25 RCPT - TO: 0 0 4 0 0 SMTP - - - - set_collected_field(c_ip, 'recipient', v.recipients . "*" . v.recipient); ); ); # if TO ); # if !status_wait ` } # parse_from_to accept_if_status = { requires_fields = { sc_status = true } value = ` if (!status_wait) then ( # Assume that if sc_status is zero that a server response line will follow. # It might also be '(empty)' if the line is broken by a mail message (bug?). if (matches_regular_expression(sc_status, '^[0-9][0-9][0-9]$')) then ( if (contains(get_collected_field(c_ip, 'recipient'), '*')) then ( if ((get_collected_field(c_ip, 'already_accepted_messages_queued') != 1) and (get_collected_field(c_ip, 'already_accepted_messages_queued') != 1)) then ( if (get_collected_field(c_ip, 'resets') == 1) then ( ); # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code # 2010-10-26 - GMF - If it starts with 2, that's a successful result, and we should accept. Otherwise, don't accept. else if (matches_regular_expression(sc_status, '^[2]')) then ( set_collected_field(c_ip, 'server_response', sc_status); set_collected_field(c_ip, 'events', 1); set_collected_field(c_ip, 'connections', 0); accept_collected_entry(c_ip, true); ); ); # if not already_accepted_messages_queued ); # recipient * ); # Example of email message breaking entry: #2007-03-24 03:37:45 188.48.88.228 friend SMTPSVC1 MAILER1 188.68.168.88 0 DATA - +<000001c76dc5$c554c380$0100007f@perez-d6fec328d> #From:+"Edward Scissorhands"+ #(....) #--------------ms070109000304010705030302 #Content-Type:+text/html;+charset="koi8-r" #Content-Transfer-Encoding:+quoted-printable # #