summaryrefslogtreecommitdiffhomepage
path: root/src/uri.c
blob: af1ec7de024bb6b37e7d05d5911eeadbe77e7854 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* $Id: uri.c,v 1.4 2001-05-27 02:37:18 rjkaes Exp $
 *
 * This borrows the REGEX from RFC2396 to split a URI string into the five
 * primary components. The components are:
 *	scheme		the uri method (like "http", "ftp", "gopher")
 *	authority	the domain and optional ":" port
 *	path		path to the document/resource
 *	query		an optional query (separated with a "?")
 *	fragment	an optional fragement (separated with a "#")
 *
 * Copyright (C) 1999  Robert James Kaes (rjkaes@flarenet.com)
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2, or (at your option) any
 * later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 */

#include "tinyproxy.h"

#include "log.h"
#include "regexp.h"
#include "uri.h"
#include "utils.h"

#define NMATCH 10

#define URIPATTERN "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"

#define SCHEME        2
#define AUTHORITY     4
#define PATH          5
#define QUERY_MARK    6
#define QUERY         7
#define FRAGMENT_MARK 8
#define FRAGMENT      9

static int extract_uri(regmatch_t pmatch[], const char *buffer, char **section,
		       int substring)
{
	size_t len = pmatch[substring].rm_eo - pmatch[substring].rm_so;
	if ((*section = malloc(len + 1)) == NULL)
		return -1;

	memset(*section, '\0', len + 1);
	memcpy(*section, buffer + pmatch[substring].rm_so, len);

	return 0;
}

void free_uri(URI * uri)
{
	safefree(uri->scheme);
	safefree(uri->authority);
	safefree(uri->path);
	safefree(uri->query);
	safefree(uri->fragment);
	safefree(uri);
}

URI *explode_uri(const char *string)
{
	URI *uri;
	regmatch_t pmatch[NMATCH];
	regex_t preg;

	if (!(uri = malloc(sizeof(URI))))
		return NULL;
	memset(uri, 0, sizeof(URI));

	if (regcomp(&preg, URIPATTERN, REG_EXTENDED) != 0) {
		log_message(LOG_ERR, "explode_uri: regcomp");
		goto ERROR_EXIT;
	}

	if (regexec(&preg, string, NMATCH, pmatch, 0) != 0) {
		log_message(LOG_ERR, "explode_uri: regexec");
		goto ERROR_EXIT;
	}

	regfree(&preg);

	if (pmatch[SCHEME].rm_so != -1) {
		if (extract_uri(pmatch, string, &uri->scheme, SCHEME) < 0)
			goto ERROR_EXIT;
	}

	if (pmatch[AUTHORITY].rm_so != -1) {
		if (extract_uri(pmatch, string, &uri->authority, AUTHORITY) <
		    0) goto ERROR_EXIT;
	}

	if (pmatch[PATH].rm_so != -1) {
		if (extract_uri(pmatch, string, &uri->path, PATH) < 0)
			goto ERROR_EXIT;
	}

	if (pmatch[QUERY_MARK].rm_so != -1) {
		if (extract_uri(pmatch, string, &uri->query, QUERY) < 0)
			goto ERROR_EXIT;
	}

	if (pmatch[FRAGMENT_MARK].rm_so != -1) {
		if (extract_uri(pmatch, string, &uri->fragment, FRAGMENT) < 0)
			goto ERROR_EXIT;
	}

	return uri;

      ERROR_EXIT:
	free_uri(uri);
	return NULL;
}