# # This performs an HTTP POST operation # # Parameters: hostname: the name of the HTTP server, e.g. "www.sawmill.net" # uri: the URI to request, e.g., /index.html # https: true if we should connect with HTTPS; false to connect with HTTP # headers: A node containing HTTP headers, in name=value pairs in subnodes # parameters: A node containing HTTP parameters, with name=value pairs in subnodes # subroutine(escape_form_parameter(string s), ( if (matches_regular_expression(s, "[^a-zA-Z0-9_*]")) then ( s = replace_all(s, "$", "%24"); s = replace_all(s, "&", "%26"); s = replace_all(s, "+", "%2B"); s = replace_all(s, ",", "%2C"); s = replace_all(s, "/", "%2F"); s = replace_all(s, ":", "%3A"); s = replace_all(s, ";", "%3B"); s = replace_all(s, "=", "%3D"); s = replace_all(s, "?", "%3F"); s = replace_all(s, "@", "%40"); s = replace_all(s, '"', "%22"); s = replace_all(s, "'", "%27"); s = replace_all(s, " ", "%20"); s = replace_all(s, "<", "%3C"); s = replace_all(s, ">", "%3E"); s = replace_all(s, "#", "%23"); s = replace_all(s, "%", "%25"); s = replace_all(s, "(", "%28"); s = replace_all(s, ")", "%29"); s = replace_all(s, '{', "%7B"); s = replace_all(s, '}', "%7D"); s = replace_all(s, '[', "%5B"); s = replace_all(s, ']', "%5D"); s = replace_all(s, "\\", "%5C"); s = replace_all(s, "^", "%5E"); s = replace_all(s, "~", "%7E"); s = replace_all(s, "`", "%60"); s = replace_all(s, ascii_to_char(10), ""); # strip out LF s = replace_all(s, ascii_to_char(13), "%0D"); ); s; )); # subroutine escape_form_parameter() subroutine(post_http(string hostname, string uri, bool https, node headers, node parameters), ( #echo("parameters: " . node_as_string(parameters)); # Connect to the HTTP server int port = if (https) then 443 else 80; if (matches_regular_expression(hostname, '^([^:]+):([0-9]+)$')) then ( hostname = $1; port = $2; # echo("Split hostname=" . hostname . "; port=" . port); ); string socket = connect(hostname, port, https); # echo("connected: " . socket); node response = new_node(); # Build the parameters string string parameters_string = ""; node parameter; foreach parameter parameters ( parameters_string .= node_name(parameter) . "=" . escape_form_parameter(@parameter) . "&"; ); #echo("parameters_string: " . parameters_string); # Send a GET request to get the specified URI string dstr = "POST " . uri . " HTTP/1.1\r\nHost: " . hostname . "\r\n"; node header; foreach header headers ( dstr .= node_name(header) . ": " . @header . "\r\n"; ); dstr .= "Content-length: " . length(parameters_string) . "\r\n"; dstr .= "\r\n"; dstr .= parameters_string; # echo("dstr: " . dstr); # echo("length(d): " . length(d)); data d = dstr; write_to_socket(socket, d, length(d)); # echo("wrote " . d); # Read the result in the following loop int read = 1; int contentLength = -1; string totalResponse; string headers; string body; bool done = false; bool inHeaders = true; bool chunked = false; int chunkLength; string chunkedBody; string chunk; while (!done) ( # echo("Reading from socket"); read_from_socket(socket, d, 10000); if (length(d) == 0) then done = true; # echo(">>> Read " . length(d) . " bytes from socket"); # echo("DATA: " . d); # If we haven't found the end of the headers yet, look again if (inHeaders) then ( # Parse first line of header to get response and response code if (matches_regular_expression(d, '^HTTP/[0-9.]+ ([0-9]+) ([A-Za-z ]+)$')) then ( @response{"response_code"} = $1; @response{"response"} = $2; ); # Build the total response by adding this chunk totalResponse .= d; int blankLinePos = index(totalResponse, '\r\n\r\n'); # echo("blankLinePos: " . blankLinePos); if (blankLinePos != -1) then ( # echo("found blank line at " . blankLinePos); headers = substr(totalResponse, 0, blankLinePos); body = substr(totalResponse, blankLinePos + 4); totalResponse = substr(totalResponse, blankLinePos); # echo(">>>>>>>>>> START HEADERS\n" . headers . "\n<<<<<<<< END HEADERS"); # echo(">>>>>>>>>> START BODY\n" . body . "\n<<<<<<<< END BODY"); if (matches_regular_expression(headers, '\r\nTransfer-Encoding: chunked')) then ( chunked = true; chunkedBody = body; ); inHeaders = false; ); ); # Still reading the body else ( # echo("Chunked: " . chunked); # if chunked if (chunked) then ( # Add this block to the chunked body chunkedBody .= d; # Extract all the chunks we can from what we have bool extractedAllChunksSoFar = false; while (!extractedAllChunksSoFar) ( # echo("Extracting another chunk from what we have; extractedAllChunksSoFar=" . extractedAllChunksSoFar); # echo(">>>>>>>>>> START CHUNKEDBODY\n" . chunkedBody . "\n<<<<<<<< END CHUNKEDBODY"); # If there's nothing, or only whitespace, read some more. if ((chunkedBody eq "") or (matches_regular_expression(chunkedBody, "^[ \r\n]+$"))) then extractedAllChunksSoFar = true; # Extract the chunk length and the remainder of the chunk if (matches_regular_expression(chunkedBody, '^([0-9A-Fa-f]+)\r\n')) then ( chunkLength = convert_base($1, 16, 10); # echo("chunkLength=" . chunkLength); # If the chunk length is 0, we're done if ($1 == 0) then ( done = true; extractedAllChunksSoFar = true; ); # If we don't have a full chunk, we need to read more else if (length(chunkedBody) - length($1) < chunkLength) then ( # echo("Not enough chunk body yet (have " . length(chunkedBody) . "; need " . chunkLength . "; getting more"); extractedAllChunksSoFar = true; ); # If we now have a full chunk of data in the chunked body, add that chunk to the body else ( chunk = substr(chunkedBody, length($1) + 2, chunkLength); chunkedBody = substr(chunkedBody, length($1) + chunkLength + 2 + 2); body .= chunk; # chunkedBody = substr(chunkedBody, $1); # echo("CHUNK is: <<<<<<<\n" . chunk . "\n<<<< END CHUNK\n"); # echo("Now BODY is: <<<<<<<\n" . body . "\n<<<< END NOW BODY\n"); # echo("Now chunkedBody is: <<<<<<<\n" . chunkedBody . "\n>>>>>>> END NOW CHUNKEDBODY\n"); ); ); # if chunk length matched ); # while !readMore ); # if chunked # if not chunked else ( # Add this piece to the body body .= d; ); # if not chunked ); # if reading body ); # while not done disconnect(socket); @response{"body"} = body; response; )); # subroutine get_title_by_http_or_https