# Copyright (c) 2010 Flowerfire, Inc. All Rights Reserved. dans_guardian29 = { plugin_version = "1.5" # 2006-10-26: 1.0beta: KBB - based on dans_guardian29.cfg - adds support for 2.9.8, plus # search_engine and search_phrase fields derived from the url field # 2007-09-11 - 1.0 - KBB - renumbered per new beta policy and changed # file name from beta_dans_guardian29.cfg # 2007-12-24 - 1.1 - KBB - increased flexibility of autodetect regular expression and parsing # 2009-10-29 - 1.2 - KBB & SAA - Fixed a bug in autodetection. Added support for 2.9.9.8 # and 2.10.1.1 (which may be the same). Added support for a tab delimited variant which is # available for both versions, and a new field, user_agent, which may contain a "-". # 2012-07-16 - 1.3 - GMF - Added support for syslog header; changed to syslog_optional # 2012-08-30 - 1.4 - GMF - Added support for CSV variant # 2013-01-11 - 1.5 - GMF - Improved parsing of categories containing spaces. info.1.manufacturer = "DansGuardian" info.1.device = "Content Filter 2.9" info.1.version.1 = "2.9.x alpha" info.1.version.2 = "2.9.8 beta" info.1.version.3 = "2.9.9.8" info.1.version.4 = "2.10.1.1" # The name of the log format log.format.format_label = "DansGuardian 2.9+ Log Format" log.miscellaneous.log_data_type = "syslog_optional" log.miscellaneous.log_format_type = "internet_device" # The log is in this format if any of the first ten lines match this regular expression #2007.11.15 11:37:28 - 192.168.99.99 http://www.motorvationmotorcars.com/browse.phtml GET 2675 0 1 200 text/html - #2009.9.23 13:02:04 jproudfoot 192.168.55.55 http://www.shire.com/images?q=tbn:2DqiV9r3CdeoTM:http://img147.imageshack.us/img147/4250/closeupdarkhl1.jpg *DENIED* Banned site: imageshack.us GET 0 0 2 403 - l2-15.school.edu.au MiddleEarth - #2008.12.28 4:02:52 bbaggins 144.44.4.244 http://www.middleearth.fm/www/delivery/lg.php?bannerid=4&campaignid=2&zoneid=3&loc=http%3A%2F%2Fwww.shire.com%2F%3Fstation%3D0lr-top40%26description%3DTop%252040%26bitrate%3Dlow&cb=c63ab08b4b GET 63 0 ADs 5 200 image/gif MiddleEarth Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) # log.format.autodetect_regular_expression = "[0-9][0-9][0-9][0-9]\\.[0-9]{1,2}\\.[0-9]{1,2}[ ][0-9]{1,2}:[0-9]{1,2}:[0-9][0-9][ ][^ ]+[ ][^ ]*[ ]*[0-9.]*[ ]http://[^ ]+[ ].*(GET|POST|PUT|DELETE|TRACE|CONNECT|HEAD)[ ][0-9]*[ ][0-9]+[ ]+[^ ]+[ ][0-9]+" # log.format.autodetect_regular_expression = "^[0-9][0-9][0-9][0-9]\\.[0-9]{1,2}\\.[0-9]{1,2}[ ][0-9]{1,2}:[0-9]{1,2}:[0-9][0-9][ ][^ ]+[ ][^ ]*[ ]*[0-9.]*[ ]http://[^ ]+[ ].*(GET|POST|PUT|DELETE|TRACE|CONNECT|HEAD)[ ][0-9]*[ ][0-9]+[ ]+[^ ]+[ ][0-9]+" log.format.autodetect_expression = ` matches_regular_expression(volatile.log_data_line, "[0-9][0-9][0-9][0-9]\\.[0-9]{1,2}\\.[0-9]{1,2}[ ][0-9]{1,2}:[0-9]{1,2}:[0-9][0-9][ ][^ ]+[ ][^ ]*[ ]*[0-9.]*[ ]http://[^ ]+[ ].*(GET|POST|PUT|DELETE|TRACE|CONNECT|HEAD)[ ][0-9]*[ ][0-9]+[ ]+[^ ]+[ ][0-9]+") or # Aug 27 22:40:46 abcdef dansguardian[27311]: "2012.8.27 22:40:46","-","12.34.56.78","http://somewhere.com/dir1/something.html","*EXCEPTION* Exception site match.","GET","718","0","","1","200","-","","ISAC","Software%20Update/359 CFNetwork/520.4.3 Darwin/11.4.0 (x86_64) (MacBookPro6%2C2)","","-","-","" matches_regular_expression(volatile.log_data_line, '"[0-9][0-9][0-9][0-9][.][0-9]{1,2}[.][0-9]{1,2} [0-9]{1,2}:[0-9]{1,2}:[0-9][0-9]","[^"]*","[^"]+","[^"]+","[^"]+","(GET|POST|PUT|DELETE|TRACE|CONNECT|HEAD)"') ` # Setting this to false allows setting field variables without using collect/accept log.format.parse_only_with_filters = "true" # The format of dates and times in this log log.format.date_format = "yyyy/m/d" log.format.time_format = "h:mm:ss" # Log fields log.fields = { date.type = "date" time.type = "time" authenticated_user = "" filter_name = "" browsing_host.type = "host" url.type = "page" referrer.type = "URL" message = "" operation = "" server_response = "" size.type = "size" phrase_score = "" filter_category = "" filter_group_number = "" mime_type = "" client_hostname = "" group_name = "" user_agent.type = "agent" web_browser = "" operating_system = "" } # log.fields log.parsing_filters.parse = ` #v.line = current_log_line(); if (matches_regular_expression(v.syslog_message, '^dansguardian[[][0-9]+[]]: (.*)$')) then v.syslog_message = $1; # 2.9.8 example: 2006.10.18 9:30:13 username 10.0.0.1 http://www.hsbc.com.br/common/css/comum.css *EXCEPTION* Sítio está na lista de exceções. GET 26822 0 1 200 - # 2.9.8 example: 2006.10.18 9:30:14 username 10.0.0.1 http://shared.live.com/~live.themes/~11.5.1926/~/~live.search/~/~pt-BR/Base/search.css *DENIED* Sítio proibido: live.com GET 0 0 Chat 1 403 text/css # 2.9.8 example: 2006.10.18 9:31:56 username 10.0.0.1 http://baym-wm11.webmessenger.msn.com/session/driver.ashx?s=f8a37d7e120a92cba4ed5f40636f7da70000000b00037ee5 *EXCEPTION* Sítio está na lista de exceções. GET 395 0 3 200 - gr_libera_googlemaps # 2.9.8 example: 2006.10.18 9:32:30 username 10.0.0.1 http://br.f328.mail.yahoo.com/ym/Compose?box=Inbox&Mid=9106_22600964_399825_1728_1174_0_8177_3053_685739490&inc=&Search=&YY=36402&y5beta=yes&y5beta=yes&order=down&sort=date&pos=0&view=a&head=b POST 52818 -70 1 200 text/html # 2.9.9.8 example: 2008.12.28 4:02:52 bbaggins 144.44.4.244 http://www.middleearth.fm/www/delivery/lg.php?bannerid=4&campaignid=2&zoneid=3&loc=http%3A%2F%2Fwww.shire.com%2F%3Fstation%3D0lr-top40%26description%3DTop%252040%26bitrate%3Dlow&cb=c63ab08b4b GET 63 0 ADs 5 200 image/gif MiddleEarth Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) # 2.10.1.1 example: 2009.9.23 13:02:04 jproudfoot 192.168.55.55 http://www.shire.com/images?q=tbn:2DqiV9r3CdeoTM:http://img147.imageshack.us/img147/4250/closeupdarkhl1.jpg *DENIED* Banned site: imageshack.us GET 0 0 2 403 - l2-15.school.edu.au MiddleEarth - #if (matches_regular_expression(v.line, '^([0-9][0-9][0-9][0-9]\.[0-9]{1,2}\.[0-9]{1,2}) ([0-9]{1,2}:[0-9]{1,2}:[0-9][0-9]) ([^ ]*) ([^ ]*) ([^ ]*) (.*) ([A-Z]*) ([0-9]*) ([0-9-]*) (.*) ([0-9]*) +([0-9]*) ([^ ]*) ([^ ]*) +(.*)$')) then ( # 2013-01-11 - GMF - Oh, for crying out loud, look at this one: #2012.12.5 16:54:52 - 192.168.100.2 http://www.galeriexxx.net *DENIED* Weighted phrase limit of 50 : 2375 GET 82703 2375 Pornography, Bad words (French), Pornography (French), Pornography (German), Pornografia, Pornography (Portuguese), Pornography (Spanish), Pornography (Norwegian), Weapons, Chat (Italian) 1 403 text/html - - # Not only does it have spaces in the filter_category field (in a space-separated format), but it even has additional spaces after mime_type, which makes it *seem* like "1 403 text/html" really is part of the filter category, and those fields are blank. What to do about *this* one? OK, I'll change filter_category from (.*) to ([^/[0-9]]*) so it can't contain /s or digits; that will work as long as there isnt a really category containing those.... And, I'll change the client_hostname field to be + instead of *, so it accepts that - instead of "". All motivated by ThreadID:1280538. if (matches_regular_expression(v.syslog_message, '^([0-9][0-9][0-9][0-9]\.[0-9]{1,2}\.[0-9]{1,2})[ ]([0-9]{1,2}:[0-9]{1,2}:[0-9][0-9])[ ]([^ ]*)[ ]([^ ]*)[ ]([^ ]*)[ ](.*)[ ]([A-Z]*)[ ]([0-9]*)[ ]([0-9-]*)[ ]([^/0-9]*)[ ]([0-9]*)[ ]+([0-9]*)[ ]([^ ]*)[ ]+([^ ]+)[ ]+([^ ].*)?$')) then ( set_collected_field('', 'date', $1); set_collected_field('', 'time', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'browsing_host', $4); set_collected_field('', 'url', $5); set_collected_field('', 'message', $6); set_collected_field('', 'operation', $7); set_collected_field('', 'size', $8); set_collected_field('', 'phrase_score', $9); set_collected_field('', 'filter_category', $10); set_collected_field('', 'filter_group_number', $11); set_collected_field('', 'server_response', $12); set_collected_field('', 'mime_type', $13); set_collected_field('', 'client_hostname', $14); set_collected_field('', 'group_name', $15); if (matches_regular_expression(get_collected_field('', 'group_name'), '^([^ ]+)[ ](-|.*)')) then ( set_collected_field('', 'group_name', $1); set_collected_field('', 'user_agent', $2); if (starts_with(user_agent, '-')) then ( set_collected_field('', 'user_agent', '(empty)'); ); ); ); # CSV # Aug 27 22:40:46 abcdef dansguardian[27311]: "2012.8.27 22:40:46","-","12.34.56.78","http://somewhere.com/dir1/something.html","*EXCEPTION* Exception site match.","GET","718","0","","1","200","-","","ISAC","Software%20Update/359 CFNetwork/520.4.3 Darwin/11.4.0 (x86_64) (MacBookPro6%2C2)","","-","-","" else if (matches_regular_expression(v.syslog_message, '"([0-9][0-9][0-9][0-9][.][0-9]{1,2}[.][0-9]{1,2}) ([0-9]{1,2}:[0-9]{1,2}:[0-9][0-9])","([^"]*)","([^"]+)","([^"]+)","([^"]*)","(GET|POST|PUT|DELETE|TRACE|CONNECT|HEAD)","([^"]*)","([^"]*)","([^"]*)","([^"]*)","([^"]*)","([^"]*)","([^"]*)","([^"]*)","([^"]*)"')) then ( set_collected_field('', 'date', $1); set_collected_field('', 'time', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'browsing_host', $4); set_collected_field('', 'url', $5); set_collected_field('', 'message', $6); set_collected_field('', 'operation', $7); set_collected_field('', 'size', $8); set_collected_field('', 'phrase_score', $9); set_collected_field('', 'filter_category', $10); set_collected_field('', 'filter_group_number', $11); set_collected_field('', 'server_response', $12); set_collected_field('', 'mime_type', $13); set_collected_field('', 'client_hostname', $14); set_collected_field('', 'group_name', $15); set_collected_field('', 'user_agent', $16); # four unknown fields not extracted here ); # if CSV 2.9 # 2.9.? example (pre 2.9.8): 2006.1.25 14:31:04 - filter1 10.10.10.10 http://10.10.10.1:8987/?dp+templates.admin.new_profile_wizard.index GET 200 35149 text/html -20 N/A else if (matches_regular_expression(v.syslog_message, '^([0-9][0-9][0-9][0-9]\.[0-9]{1,2}\.[0-9]{1,2}) ([0-9]{1,2}:[0-9]{1,2}:[0-9][0-9]) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) (.*) ([^ ]*) ([0-9]{3}) ([0-9]+) ([^ ]*)')) then ( set_collected_field('', 'date', $1); set_collected_field('', 'time', $2); set_collected_field('', 'authenticated_user', $3); set_collected_field('', 'filter_name', $4); set_collected_field('', 'browsing_host', $5); set_collected_field('', 'url', $6); set_collected_field('', 'message', $7); set_collected_field('', 'operation', $8); set_collected_field('', 'server_response', $9); set_collected_field('', 'size', $10); set_collected_field('', 'mime_type', $11); ); # Get search engine and search phrase information from the url. # Note that log field "referrer" only exists to automatically # create derived database fields "search_engine" and "search_phrase", # but the value of the field "url" is passed to the function. # Note that this happens before url is simplified. if (get_search_engine_info(get_collected_field('', 'url'))) then ( set_collected_field('', 'search_engine', volatile.search_engine); set_collected_field('', 'search_phrase', volatile.search_phrase); ); get_user_agent_info(replace_all(get_collected_field('', 'user_agent'), '+', ' ')); set_collected_field('', 'web_browser', volatile.web_browser); set_collected_field('', 'operating_system', volatile.operating_system); accept_collected_entry('', false); ` # Database fields database.fields = { date_time = "" day_of_week = "" hour_of_day = "" authenticated_user = "" filter_name = "" browsing_host = "" url.suppress_top = "1" url.suppress_bottom = "9" search_engine = "" search_phrase = "" web_browser = "" operating_system = "" message = "" operation = "" server_response = "" phrase_score = "" filter_category = "" filter_group_number = "" mime_type = "" client_hostname = "" group_name = "" } # database.fields # Log Filters log.filters = { remove_query = { label = "$lang_admin.log_filters.remove_query_label" comment = "$lang_admin.log_filters.remove_query_comment" value = "if (contains(url, '?')) then url = substr(url, 0, index(url, '?') + 1) . '(parameters)';" } # remove_query detect_page_views = { label = '$lang_admin.log_filters.detect_page_views_label' comment = '$lang_admin.log_filters.detect_page_views_comment' value = "if ((file_type eq 'JPEG') or (file_type eq 'JPG') or (file_type eq 'GIF') or (file_type eq 'ICO') or (file_type eq 'PNG') or (file_type eq 'CSS') or (file_type eq 'SWF') or (file_type eq 'JS')) then page_views = 0; else page_views = 1;" } # detect_page_views simplify_url = { label = "$lang_admin.log_filters.simplify_url_label" comment = "$lang_admin.log_filters.simplify_url_comment" value = "if (matches_regular_expression(url, '^([^:]+://[^/]+/)')) then url = $1 . '(omitted)'" } # simplify_url strip_non_page_views = { label = '$lang_admin.log_filters.strip_non_page_views_label' comment = '$lang_admin.log_filters.strip_non_page_views_comment' value = "if (page_views == 0) then url = substr(url, 0, last_index(url, '/') + 1) . '(nonpage)';" } # strip_non_page_views mark_entry = { label = '$lang_admin.log_filters.mark_entry_label' comment = '$lang_admin.log_filters.mark_entry_comment' value = 'hits = 1;' } # mark_entry } # log.filters log.field_options = { sessions_page_field = "url" sessions_visitor_id_field = "browsing_host" sessions_event_field = "page_views" } # log.field_options database.numerical_fields = { hits = { default = false requires_log_field = false entries_field = true } # hits page_views = { default = true requires_log_field = false } # page_views visitors = { log_field = "browsing_host" type = "unique" } # visitors size = { type = "int" integer_bits = 64 display_format_type = "bandwidth" } # size } # database.numerical_fields create_profile_wizard_options = { final_step = ` include "templates.admin.profiles.setup_reports_util"; # Start with the standard reports string profile = "profiles." . volatile.new_profile_name; add_standard_reports(profile); # 2012-07-16 - GMF - Disabled this because set_report_element_variable() no longer exists. If this needs to be done, it should be done in a wizard node. #node report_element = profile . ".statistics.reports.phrase_score.report_elements.phrase_score"; #set_report_element_variable(profile, report_element, "sort_type", "integer"); #set_report_element_variable(profile, report_element, "sort_by", "phrase_score"); ` # How the reports should be grouped in the report menu report_groups = { date_time_group = "" } # report_groups } # create_profile_wizard_options } # dans_guardian29