123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- /*
- * Copyright (C) 2010-2015 Mamadou Diop.
- *
- * This file is part of Open Source Doubango Framework.
- *
- * DOUBANGO is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * DOUBANGO is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with DOUBANGO.
- *
- */
- /**@file thttp_parser_url.c
- * @brief HTTP/HTTPS URL parser.
- */
- #include "tinyhttp/parsers/thttp_parser_url.h"
- #include "tsk_string.h"
- #include "tsk_memory.h"
- #include "tsk_debug.h"
- /***********************************
- * Ragel state machine.
- */
- %%{
- machine thttp_machine_parser_url;
- # Includes
- include thttp_machine_utils "./ragel/thttp_machine_utils.rl";
-
- action tag{
- tag_start = p;
- }
- #/* Sets URL type */
- action is_http { url->scheme = tsk_strdup("http"), url->type = thttp_url_http; }
- action is_https { url->scheme = tsk_strdup("https"), url->type = thttp_url_https; }
- #/* Sets HOST type */
- action is_ipv4 { url->host_type = url->host_type = thttp_host_ipv4; }
- action is_ipv6 { url->host_type = url->host_type = thttp_host_ipv6; }
- action is_hostname { url->host_type = url->host_type = thttp_host_hostname; }
- action parse_host{
- TSK_PARSER_SET_STRING(url->host);
- }
- action parse_port{
- have_port = 1;
- TSK_PARSER_SET_INT(url->port);
- }
- action parse_hpath{
- TSK_PARSER_SET_STRING(url->hpath);
- }
- action parse_search{
- TSK_PARSER_SET_STRING(url->search);
- }
- action eob{
- }
- #// RFC 1738: "http://" hostport [ "/" hpath [ "?" search ]]
- #// FIXME: hpath is no optional (see above) but in my def. I use it as opt (any*).
- search = any* >tag %parse_search;
- hpath = any* >tag %parse_hpath;
- port = DIGIT+ >tag %parse_port;
- myhost = ((IPv6reference >is_ipv6) | (IPv4address >is_ipv4) | (hostname >is_hostname)) >tag %parse_host;
- hostport = myhost ( ":" port )?;
- main := ( (("http:"i>tag %is_http | "https:"i>tag %is_https) "//")? hostport? :>("/" hpath :>("?" search)?)? ) @eob;
- #main := ( hostport? :>("/" hpath :>("?" search)?)? ) @eob;
-
- }%%
- /**@ingroup thttp_url_group
- * Parses a HTTP/HTTPS url.
- * @param urlstring A pointer to a valid url string. If the port is missing, then it's default value will be 443 if
- * the scheme is 'https' and 80 otherwise.<br>
- * @param length The length of the url string.
- * @retval A well-defined object representing the url string.
- *
- * @code
- * thttp_url_t* url = thttp_url_parse("http://www.google.com", tsk_strlen("http://www.google.com"));
- * @endcode
- *
- * @sa @ref thttp_url_tostring<br>@ref thttp_url_serialize
- **/
- thttp_url_t *thttp_url_parse(const char *urlstring, tsk_size_t length)
- {
- tsk_bool_t have_port = tsk_false;
- int cs = 0;
- const char *p = urlstring;
- const char *pe = p + length;
- const char *eof = pe;
- const char *ts = 0, *te = 0;
- int act =0;
- thttp_url_t *url = thttp_url_create(thttp_url_unknown);
-
- const char *tag_start = 0;
-
- TSK_RAGEL_DISABLE_WARNINGS_BEGIN()
- %%write data;
- (void)(ts);
- (void)(te);
- (void)(act);
- (void)(eof);
- (void)(thttp_machine_parser_url_first_final);
- (void)(thttp_machine_parser_url_error);
- (void)(thttp_machine_parser_url_en_main);
- %%write init;
- %%write exec;
- TSK_RAGEL_DISABLE_WARNINGS_END()
-
- if( cs < %%{ write first_final; }%% ){
- TSK_DEBUG_ERROR("Failed to parse HTTP/HTTPS URL: '%.*s'", length, urlstring);
- TSK_OBJECT_SAFE_FREE(url);
- }
- else if(!have_port){
- if(url->type == thttp_url_https){
- url->port = 443;
- }
- else{
- url->port = 80;
- }
- }
-
- return url;
- }
|