thttp_parser_url.rl 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. /*
  2. * Copyright (C) 2010-2015 Mamadou Diop.
  3. *
  4. * This file is part of Open Source Doubango Framework.
  5. *
  6. * DOUBANGO is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * DOUBANGO is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with DOUBANGO.
  18. *
  19. */
  20. /**@file thttp_parser_url.c
  21. * @brief HTTP/HTTPS URL parser.
  22. */
  23. #include "tinyhttp/parsers/thttp_parser_url.h"
  24. #include "tsk_string.h"
  25. #include "tsk_memory.h"
  26. #include "tsk_debug.h"
  27. /***********************************
  28. * Ragel state machine.
  29. */
  30. %%{
  31. machine thttp_machine_parser_url;
  32. # Includes
  33. include thttp_machine_utils "./ragel/thttp_machine_utils.rl";
  34. action tag{
  35. tag_start = p;
  36. }
  37. #/* Sets URL type */
  38. action is_http { url->scheme = tsk_strdup("http"), url->type = thttp_url_http; }
  39. action is_https { url->scheme = tsk_strdup("https"), url->type = thttp_url_https; }
  40. #/* Sets HOST type */
  41. action is_ipv4 { url->host_type = url->host_type = thttp_host_ipv4; }
  42. action is_ipv6 { url->host_type = url->host_type = thttp_host_ipv6; }
  43. action is_hostname { url->host_type = url->host_type = thttp_host_hostname; }
  44. action parse_host{
  45. TSK_PARSER_SET_STRING(url->host);
  46. }
  47. action parse_port{
  48. have_port = 1;
  49. TSK_PARSER_SET_INT(url->port);
  50. }
  51. action parse_hpath{
  52. TSK_PARSER_SET_STRING(url->hpath);
  53. }
  54. action parse_search{
  55. TSK_PARSER_SET_STRING(url->search);
  56. }
  57. action eob{
  58. }
  59. #// RFC 1738: "http://" hostport [ "/" hpath [ "?" search ]]
  60. #// FIXME: hpath is no optional (see above) but in my def. I use it as opt (any*).
  61. search = any* >tag %parse_search;
  62. hpath = any* >tag %parse_hpath;
  63. port = DIGIT+ >tag %parse_port;
  64. myhost = ((IPv6reference >is_ipv6) | (IPv4address >is_ipv4) | (hostname >is_hostname)) >tag %parse_host;
  65. hostport = myhost ( ":" port )?;
  66. main := ( (("http:"i>tag %is_http | "https:"i>tag %is_https) "//")? hostport? :>("/" hpath :>("?" search)?)? ) @eob;
  67. #main := ( hostport? :>("/" hpath :>("?" search)?)? ) @eob;
  68. }%%
  69. /**@ingroup thttp_url_group
  70. * Parses a HTTP/HTTPS url.
  71. * @param urlstring A pointer to a valid url string. If the port is missing, then it's default value will be 443 if
  72. * the scheme is 'https' and 80 otherwise.<br>
  73. * @param length The length of the url string.
  74. * @retval A well-defined object representing the url string.
  75. *
  76. * @code
  77. * thttp_url_t* url = thttp_url_parse("http://www.google.com", tsk_strlen("http://www.google.com"));
  78. * @endcode
  79. *
  80. * @sa @ref thttp_url_tostring<br>@ref thttp_url_serialize
  81. **/
  82. thttp_url_t *thttp_url_parse(const char *urlstring, tsk_size_t length)
  83. {
  84. tsk_bool_t have_port = tsk_false;
  85. int cs = 0;
  86. const char *p = urlstring;
  87. const char *pe = p + length;
  88. const char *eof = pe;
  89. const char *ts = 0, *te = 0;
  90. int act =0;
  91. thttp_url_t *url = thttp_url_create(thttp_url_unknown);
  92. const char *tag_start = 0;
  93. TSK_RAGEL_DISABLE_WARNINGS_BEGIN()
  94. %%write data;
  95. (void)(ts);
  96. (void)(te);
  97. (void)(act);
  98. (void)(eof);
  99. (void)(thttp_machine_parser_url_first_final);
  100. (void)(thttp_machine_parser_url_error);
  101. (void)(thttp_machine_parser_url_en_main);
  102. %%write init;
  103. %%write exec;
  104. TSK_RAGEL_DISABLE_WARNINGS_END()
  105. if( cs < %%{ write first_final; }%% ){
  106. TSK_DEBUG_ERROR("Failed to parse HTTP/HTTPS URL: '%.*s'", length, urlstring);
  107. TSK_OBJECT_SAFE_FREE(url);
  108. }
  109. else if(!have_port){
  110. if(url->type == thttp_url_https){
  111. url->port = 443;
  112. }
  113. else{
  114. url->port = 80;
  115. }
  116. }
  117. return url;
  118. }