# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. iissmtpw3_c = { plugin_version = "1.6" info.1.manufacturer = "Microsoft" info.1.device = "IIS SMTP W3C" info.1.version.1 = "6.0" # 2006-10-02 - GMF - 1.0beta - Initial implementation # 2006-11-09 - KBB - 1.1beta - Added support for a format variant. Added operation and # server_response fields and connect/disconnect counts. An entry is now accepted on server # response for new format and on TO/FROM for old format. # 2007-04-26 - KBB - 1.2beta - Added support for bytes (cs+sc+total). Server response is now # taken from the sc_status field if it is non-zero, since no server response line follows. # Added separate accept for each TO operation that precedes a server response. # Maintained compatibility with format with no sc_status. # 2007-09-11 - KBB - 1.2 - Renumbered per new beta policy. # 2008-01-31 - GMF - 1.3 - Added support for combined-format logs, without sc-status field. # 2010-05-14 - GMF - 1.3.1 - Improved recovery for status_wait, when end of multi-line DATA is not found # 2010-10-06 - 1..3.2 - MSG - Edited info lines. # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code # 2010-12-06 - 1.3.4 - gas - added support for log witout cs_uri_query by changing the # volatile.log_data_line regex and the required_fields values for # parse_method and parse_from_to # 2011-03-02 - 1.3.5 - GMF - Made email address fields non-hierarchical by default # 2011-03-02 - 1.3.6 - GMF - Added field associations for recipient, to clean up pointless fields (and "(empty)") # 2011-03-28 - 1.4 - GMF - Improved tracking of response codes; added messages_failed. # 2011-04-05 - 1.4.1 - GMF - Added extraction of response codes on lines where the status code is on the same line as MAIL FROM or RCPT TO. # 2011-08-30 - 1.4.2 - GMF - Fixed reporting of connect/disconnect in logs with response lines; removed per-line-byte fields # 2011-11-26 - 1.5 - GMF - Added support for OutboundConnection lines; split fields into messages_received (inbound messages received by server, i.e. MAIL FROM lines inbound), messages_queued (RCPT TO lines inbound), and messages_delivered (MAIL FROM lines for outbound connections) # 2013-02-25 - 1.6 - GMF - Added extraction and reporting of s-computername and s-sitename fields # The name of the log format log.format.format_label = "IIS SMTP W3C Log Format" log.miscellaneous.log_data_type = "mail_server" log.miscellaneous.log_format_type = "mail_server" # This is IIS SMTP format if it has an IIS Software line, and a +FROM or FROM line. log.format.autodetect_lines = 100 log.format.autodetect_expression = ` if (matches_regular_expression(volatile.log_data_line, '^#Software: Microsoft Internet Information Services')) then ( v.iis = true; ); if (node_exists("v.iis") and matches_regular_expression(volatile.log_data_line, '([+ ]FROM:| RCPT )')) then true; else false; ` # W3C uses double quotes only, but apostrophes may occur in email addresses. log.format.treat_apostrophes_as_quotes = false # Accept collected entries after they're not used for 1000 lines log.format.collected_entry_lifespan = 1000 # 2008-01-31 - GMF - This has to be true, because otherwise all collected entries # will be accepted at the end, so a file containing only # MAIL FROM and RCPT TO lines will accept additional entries at # the end of processing. log.format.discard_expired_entries = true # Log fields log.fields = { sender = { # 2011-03-02 - 1.3.5 - GMF - Made email address fields non-hierarchical by default # type = "hierarchical" # hierarchy_dividers = "@" # left_to_right = false # leading_divider = "false" } # sender recipient = { # 2011-03-02 - 1.3.5 - GMF - Made email address fields non-hierarchical by default # type = "hierarchical" # hierarchy_dividers = "@" # left_to_right = false # leading_divider = "false" } # recipient operation = "" server_response = "" error_message = "" # numeric messages_received = "" messages_queued = "" messages_relayed = "" messages_failed = "" bytes_received = "" bytes_queued = "" bytes_relayed = "" connections = "" disconnections = "" events = "" # total_bytes = "" already_accepted_messages_queued = "" already_accepted_messages_delivered = "" last_request = "" resets = "" } # log.fields log.filter_initialization = ` string status = ""; # For use in filter where sc_status not required bool status_wait = false; v.size = ""; ` # Log Parsing Filters # Please note that filters are separate because of the need to use requires_field, # but the order of the filters is important, so be careful in changing it. log.parsing_filters = { # This is done for all lines first because some lines are broken # by email messages and we need to accept on a line without a date. set_date = { value = ` if (!status_wait) then ( # Bad datetime values in this section. set_collected_field(c_ip, 'date', date); set_collected_field(c_ip, 'time', time); set_collected_field(c_ip, 'c_ip', c_ip); ); ` } # set_date # Capture the fact that the format has the sc_status field. check_status = { requires_fields = { sc_status = true } value = ` status = sc_status; ` } # check_status parse_method = { requires_fields = { cs_method = true cs_uri_query = true } value = ` if (!status_wait) then ( # The regexps wouldn't match anyway, so this is to save time. # This is a server response line. # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code. Added detection of RSET. Accepting only on 2xx code. # 2010-10-26 - GMF - Modified this to accept only on status code 2xx if ((cs_method eq '-') and matches_regular_expression(cs_uri_query, '^([0-9][0-9][0-9])(.*)$')) then ( v.server_response = $1; v.remainder = $2; set_collected_field(c_ip, 'server_response', v.server_response); set_collected_field(c_ip, 's_computername', s_computername); set_collected_field(c_ip, 's_sitename', s_sitename); # If this is in response to a MAIL FROM line, we're queuing the message now if (get_collected_field(c_ip, 'operation') eq "MAIL FROM") then ( set_collected_field(c_ip, 'messages_failed', 0); # If this is OutboundConnectionCommand, it's a relayed messages sent to another SMTP server if (matches_regular_expression(cs_username, "^OutboundConnection")) then ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 1); ); # If this is not OutboundConnectionCommand, it's an inbound connection to this server else ( set_collected_field(c_ip, 'messages_received', 1); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 0); ); set_collected_field(c_ip, 'recipient', ""); set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); ); # If response to MAIL FROM # If this is in response to a RCPT TO line, we're sending the message now. else if (get_collected_field(c_ip, 'operation') eq "RCPT TO") then ( if (matches_regular_expression(v.server_response, "^2")) then ( set_collected_field(c_ip, 'messages_failed', 0); # If this is OutboundConnectionCommand, it's the recipient for an outbound connection to another SMTP server. We don't care about recipients in that case, only about messages relayed, which was set on MAIL FROM. if (matches_regular_expression(cs_username, "^OutboundConnection")) then ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 0); ); # If this is not OutboundConnectionCommand, it's an inbound connection to this server else ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 1); set_collected_field(c_ip, 'messages_relayed', 0); ); v.recipients = get_collected_field(c_ip, 'recipient'); while (matches_regular_expression(v.recipients, '^([^*]+)\\\\*(.*)$')) ( v.recipients = $2; set_collected_field(c_ip, 'recipient', $1); set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); ); # 2008-01-31 - GMF - Don't allow more than one connection per HELO (or disconnection per QUIT), even if there are multiple server response lines. set_collected_field(c_ip, 'connections', 0); set_collected_field(c_ip, 'disconnections', 0); set_collected_field(c_ip, 'recipient', v.recipients); # last one or only one set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); # set_collected_field(c_ip, 'last_request', ""); ); else if (matches_regular_expression(v.server_response, "^[45]")) then ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 0); set_collected_field(c_ip, 'messages_failed', 1); set_collected_field(c_ip, 'server_response', v.server_response); set_collected_field(c_ip, 'error_message', v.remainder); accept_collected_entry(c_ip, false); # set_collected_field(c_ip, 'last_request', ""); ); # If it's not 2xx or 4xx or 5xx, maybe it's something like 354 "go ahead"; ignore it else ( ); ); # if this is response to RCPT TO # If this is in response to a QUIT, just accept a disconnect event if (get_collected_field(c_ip, 'operation') eq "QUIT") then ( accept_collected_entry(c_ip, false); ); ); # if server response line # Handle QUIT lines else if (matches_regular_expression(cs_method, '[Qq][Uu][Ii][tT]')) then ( set_collected_field(c_ip, 'messages_failed', 0); set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 0); set_collected_field(c_ip, 'disconnections', 1); set_collected_field(c_ip, 'sender', ''); set_collected_field(c_ip, 'recipient', ''); set_collected_field(c_ip, 'operation', 'QUIT'); ); # if QUIT # Handle RSET lines else if (matches_regular_expression(cs_method, '[Rr][Ss][Ee][Tt]')) then ( set_collected_field(c_ip, 'resets', 1); set_collected_field(c_ip, 'bytes_received', ""); set_collected_field(c_ip, 'bytes_queued', ""); set_collected_field(c_ip, 'bytes_relayed', ""); set_collected_field(c_ip, 'messages_received', ""); set_collected_field(c_ip, 'messages_queued', ""); set_collected_field(c_ip, 'messages_relayed', ""); set_collected_field(c_ip, 'messages_failed', ""); set_collected_field(c_ip, 'operation', 'RSET'); ); # if RSET # Handle EHLO/HELO lines else if (matches_regular_expression(cs_method, '([hH][eE][lL][oO]|[eE][hH][lL][oO])')) then ( set_collected_field(c_ip, 'connections', 1); set_collected_field(c_ip, 'operation', uppercase($1)); # This is the beginning of a connection, so clear all accumulated fields. set_collected_field(c_ip, 'bytes_received', ""); set_collected_field(c_ip, 'bytes_queued', ""); set_collected_field(c_ip, 'bytes_relayed', ""); set_collected_field(c_ip, 'cs_bytes', ''); set_collected_field(c_ip, 'disconnections', ''); set_collected_field(c_ip, 'messages_received', ""); set_collected_field(c_ip, 'messages_queued', ""); set_collected_field(c_ip, 'messages_relayed', ""); set_collected_field(c_ip, 'recipient', ''); set_collected_field(c_ip, 'sc_bytes', ''); set_collected_field(c_ip, 'sender', ''); set_collected_field(c_ip, 'server_response', ''); set_collected_field(c_ip, 'size', ''); # set_collected_field(c_ip, 'total_bytes', ''); ); # if HELO/EHLO # 2008-01-31 - GMF - Do nothing for RCPT (to) or MAIL (from) lines; in particular, don't clear all the fields like we do for unknown cs_methods, below. else if (matches_regular_expression(cs_method, '^[Rr][Cc][Pp][Tt]$')) then ( ); else if (matches_regular_expression(cs_method, '^[Mm][Aa][Ii][Ll]$')) then ( ); # 2008-01-31 - GMF - If it's some other operation, remember what it is, so we can associated the server responses with it. else ( set_collected_field(c_ip, 'operation', cs_method); set_collected_field(c_ip, 's_computername', s_computername); set_collected_field(c_ip, 's_sitename', s_sitename); # 2008-01-31 - GMF - This isn't a standard event (RCPT TO, EHLO, MAIL FROM, etc.), so none of the numerical fields apply--clear them. set_collected_field(c_ip, 'bytes_received', ""); set_collected_field(c_ip, 'bytes_queued', ""); set_collected_field(c_ip, 'bytes_relayed', ""); set_collected_field(c_ip, 'cs_bytes', ''); set_collected_field(c_ip, 'disconnections', ''); set_collected_field(c_ip, 'messages_received', ""); set_collected_field(c_ip, 'messages_queued', ""); set_collected_field(c_ip, 'messages_relayed', ""); # set_collected_field(c_ip, 'recipient', ''); set_collected_field(c_ip, 'sc_bytes', ''); # set_collected_field(c_ip, 'sender', ''); set_collected_field(c_ip, 'server_response', ''); set_collected_field(c_ip, 'size', ''); # set_collected_field(c_ip, 'total_bytes', ''); ); # else ); # if !status_wait ` } # parse_method # Do these methods separately since more fields are required. parse_data = { requires_fields = { cs_method = true cs_bytes = true sc_bytes = true } value = ` # Don't check for status_wait, since these lines will match. # Handle DATA lines #2007-03-24 03:37:13 199.9.99.99 abc.xyz.com.do SMTPSVC1 MAILER1 199.99.199.79 0 DATA - +<001601c76da4$1d75ec00$07755854@xyz> 250 0 124 13549 31359 SMTP - - - - if (matches_regular_expression(cs_method, '([bB][dD][aA][tT]|[aD][aA][tT][aA])')) then ( set_collected_field(c_ip, 'operation', uppercase($1)); set_collected_field(c_ip, 'cs_bytes', cs_bytes); set_collected_field(c_ip, 'sc_bytes', sc_bytes); # set_collected_field(c_ip, 'total_bytes', 0.0 + cs_bytes + sc_bytes); ); # if DATA/BDAT ` } # parse_data parse_from_to = { requires_fields = { cs_uri_query = true } value = ` if (!status_wait) then ( # The regexps wouldn't match anyway, so this is to save time. set_collected_field(c_ip, 's_computername', s_computername); set_collected_field(c_ip, 's_sitename', s_sitename); # Extract server response if (matches_regular_expression(sc_status, '^([0-9][0-9][0-9])$')) then set_collected_field(c_ip, 'server_response', $1); # Handle FROM lines if (matches_regular_expression(cs_uri_query, '^([+]*)[Ff][Rr][Oo][Mm]: *(.*)$')) then ( set_collected_field(c_ip, 'operation', 'MAIL FROM'); # Get the size, if any v.optional_plus = $1; v.sender = $2; v.size = 0; if (matches_regular_expression(v.sender, '^(.*).[Ss][Ii][Zz][Ee]=([0-9]+)')) then ( v.sender = $1; v.size = $2; ); # Remove <> from the address #if (matches_regular_expression(v.sender, '^<(.*)>$')) then # There may be a + before < and perhaps info after > if (matches_regular_expression(v.sender, '<(.*)>')) then v.sender = $1; set_collected_field(c_ip, 'recipient', ''); # Probably only needed for no sc_status case. set_collected_field(c_ip, 'sender', v.sender); set_collected_field(c_ip, 'size', v.size); if (matches_regular_expression(cs_username, "^OutboundConnection")) then ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 1); set_collected_field(c_ip, 'bytes_received', 0); set_collected_field(c_ip, 'bytes_queued', 0); set_collected_field(c_ip, 'bytes_relayed', v.size); ); else ( set_collected_field(c_ip, 'messages_received', 1); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 0); set_collected_field(c_ip, 'bytes_received', v.size); set_collected_field(c_ip, 'bytes_queued', 0); set_collected_field(c_ip, 'bytes_relayed', 0); ); set_collected_field(c_ip, 'messages_delivered', 0); # 2011-08-30 - GMF - Setting connections=0 here overwrites the connections=1 we set in the EHLO line, if there's a response line to the EHLO. If we do that, we'll never count any connections on logs with separate response lines for EHLO. # set_collected_field(c_ip, 'connections', 0); # 2008-01-31 - GMF - Oddly, in all examples we see, a leading + on the cs_uri_query field indicates that there # will *not* be a following status code line, so we have to accept it here. if (v.optional_plus eq "+") then ( set_collected_field(c_ip, 'events', 1); time = get_collected_field(c_ip, 'time'); accept_collected_entry(c_ip, true); # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code. Added detection of RSET. Accepting only on 2xx code. # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code # See already_accepted_messages_queued comment set_collected_field(c_ip, 'already_accepted_messages_queued', 1); ); ); # if FROM # Handle TO lines if (matches_regular_expression(cs_uri_query, '^([+]*)[Tt][Oo]: *(.*)$')) then ( set_collected_field(c_ip, 'operation', 'RCPT TO'); v.recipients = get_collected_field(c_ip, 'recipient'); v.optional_plus = $1; v.recipient = $2; # Remove <> from the address #if (matches_regular_expression(v.recipient, '^<(.*)>$')) then # There may be a + before < and a + and status info after > if (matches_regular_expression(v.recipient, '<(.*)>')) then v.recipient = $1; # Extract server response if (matches_regular_expression(v.recipient, '^[^ ]+ ([0-9][0-9][0-9]) ')) then set_collected_field(c_ip, 'server_response', $1); set_collected_field(c_ip, 'recipient', v.recipient); if (matches_regular_expression(cs_username, "^OutboundConnection")) then ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 0); set_collected_field(c_ip, 'messages_relayed', 0); set_collected_field(c_ip, 'bytes_received', 0); set_collected_field(c_ip, 'bytes_queued', 0); set_collected_field(c_ip, 'bytes_relayed', 0); ); else ( set_collected_field(c_ip, 'messages_received', 0); set_collected_field(c_ip, 'messages_queued', 1); set_collected_field(c_ip, 'messages_relayed', 0); set_collected_field(c_ip, 'bytes_received', 0); set_collected_field(c_ip, 'bytes_queued', v.size); set_collected_field(c_ip, 'bytes_relayed', 0); ); set_collected_field(c_ip, 'connections', 0); # 2008-01-31 - GMF - Oddly, in all examples we see, a leading + on the cs_uri_query field indicates that there # will *not* be a following status code line, so we have to accept it here. if (v.optional_plus eq "+") then ( set_collected_field(c_ip, 'events', 1); accept_collected_entry(c_ip, true); # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code. Added detection of RSET. Accepting only on 2xx code. # 2010-10-26 - GMF - But sometimes, we do get a response code in the same line. So, remember that we already accepted this. # 2010-09-30 05:14:28 1.2.3.4 ABC01 SMTPSVC1 SPAMFILTER1 9.8.7.6 0 RCPT - +TO: 250 0 37 34 0 SMTP - - - - # 2010-09-30 05:14:28 1.2.3.4 ABC01 SMTPSVC1 SPAMFILTER1 9.8.7.6 0 DATA - +<930201080723PM55db5f2948f24e73b4c92a01487eb75d@somewhere.com+> 250 0 157 3507 15 SMTP - - - - set_collected_field(c_ip, 'already_accepted_messages_delivered', 1); ); # If this won't be accepted based on sc_status below, gather # until server_response found. else if (status eq '0' and v.recipients ne '' and v.recipients ne '(empty)') then ( # This is why | isn't a good separator! #2007-03-24 03:47:23 18.188.8.88 OutboundConnectionCommand SMTPSVC1 MAILER1 - 25 RCPT - TO: 0 0 4 0 0 SMTP - - - - set_collected_field(c_ip, 'recipient', v.recipients . "*" . v.recipient); ); ); # if TO ); # if !status_wait ` } # parse_from_to accept_if_status = { requires_fields = { sc_status = true } value = ` if (!status_wait) then ( # Assume that if sc_status is zero that a server response line will follow. # It might also be '(empty)' if the line is broken by a mail message (bug?). if (matches_regular_expression(sc_status, '^[0-9][0-9][0-9]$')) then ( if (contains(get_collected_field(c_ip, 'recipient'), '*')) then ( if ((get_collected_field(c_ip, 'already_accepted_messages_queued') != 1) and (get_collected_field(c_ip, 'already_accepted_messages_queued') != 1)) then ( if (get_collected_field(c_ip, 'resets') == 1) then ( ); # 2010-10-26 - GMF - 1.3.3 - Added already_accepted_messages_queued and already_accepted_messages_delivered to keep from accepting twice on + lines with response code # 2010-10-26 - GMF - If it starts with 2, that's a successful result, and we should accept. Otherwise, don't accept. else if (matches_regular_expression(sc_status, '^[2]')) then ( set_collected_field(c_ip, 'server_response', sc_status); set_collected_field(c_ip, 'events', 1); set_collected_field(c_ip, 'connections', 0); accept_collected_entry(c_ip, true); ); ); # if not already_accepted_messages_queued ); # recipient * ); # Example of email message breaking entry: #2007-03-24 03:37:45 188.48.88.228 friend SMTPSVC1 MAILER1 188.68.168.88 0 DATA - +<000001c76dc5$c554c380$0100007f@perez-d6fec328d> #From:+"Edward Scissorhands"+ #(....) #--------------ms070109000304010705030302 #Content-Type:+text/html;+charset="koi8-r" #Content-Transfer-Encoding:+quoted-printable # #