1  
//
1  
//
2  
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
2  
// Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3  
//
3  
//
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
4  
// Distributed under the Boost Software License, Version 1.0. (See accompanying
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5  
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  
//
6  
//
7  
// Official repository: https://github.com/boostorg/url
7  
// Official repository: https://github.com/boostorg/url
8  
//
8  
//
9  

9  

10  
#ifndef BOOST_URL_IMPL_ENCODE_HPP
10  
#ifndef BOOST_URL_IMPL_ENCODE_HPP
11  
#define BOOST_URL_IMPL_ENCODE_HPP
11  
#define BOOST_URL_IMPL_ENCODE_HPP
12  

12  

13  
#include <boost/url/grammar/token_rule.hpp>
13  
#include <boost/url/grammar/token_rule.hpp>
14  
#include <boost/assert.hpp>
14  
#include <boost/assert.hpp>
15  
#include <boost/core/detail/static_assert.hpp>
15  
#include <boost/core/detail/static_assert.hpp>
16  
#include <boost/url/detail/encode.hpp>
16  
#include <boost/url/detail/encode.hpp>
17  
#include <boost/url/detail/except.hpp>
17  
#include <boost/url/detail/except.hpp>
18  
#include <boost/url/encoding_opts.hpp>
18  
#include <boost/url/encoding_opts.hpp>
19  
#include <boost/url/grammar/charset.hpp>
19  
#include <boost/url/grammar/charset.hpp>
20  
#include <boost/url/grammar/hexdig_chars.hpp>
20  
#include <boost/url/grammar/hexdig_chars.hpp>
21  
#include <boost/url/grammar/string_token.hpp>
21  
#include <boost/url/grammar/string_token.hpp>
22  
#include <boost/url/grammar/type_traits.hpp>
22  
#include <boost/url/grammar/type_traits.hpp>
23  

23  

24  
namespace boost {
24  
namespace boost {
25  
namespace urls {
25  
namespace urls {
26  

26  

27  
//------------------------------------------------
27  
//------------------------------------------------
28  

28  

29  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
29  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
30  
std::size_t
30  
std::size_t
31  
encoded_size(
31  
encoded_size(
32  
    core::string_view s,
32  
    core::string_view s,
33  
    CS const& allowed,
33  
    CS const& allowed,
34  
    encoding_opts opt) noexcept
34  
    encoding_opts opt) noexcept
35  
{
35  
{
36  
    /*
36  
    /*
37  
        If you get a compilation error here, it
37  
        If you get a compilation error here, it
38  
        means that the value you passed does
38  
        means that the value you passed does
39  
        not meet the requirements stated in
39  
        not meet the requirements stated in
40  
        the documentation.
40  
        the documentation.
41  
    */
41  
    */
42  
    BOOST_CORE_STATIC_ASSERT(
42  
    BOOST_CORE_STATIC_ASSERT(
43  
        grammar::is_charset<CS>::value);
43  
        grammar::is_charset<CS>::value);
44  

44  

45  
    std::size_t n = 0;
45  
    std::size_t n = 0;
46  
    auto it = s.data();
46  
    auto it = s.data();
47  
    auto const last = it + s.size();
47  
    auto const last = it + s.size();
48  

48  

49  
    if (!opt.space_as_plus)
49  
    if (!opt.space_as_plus)
50  
    {
50  
    {
51  
        while (it != last)
51  
        while (it != last)
52  
        {
52  
        {
53  
            char const c = *it;
53  
            char const c = *it;
54  
            if (allowed(c))
54  
            if (allowed(c))
55  
            {
55  
            {
56  
                ++n;
56  
                ++n;
57  
            }
57  
            }
58  
            else
58  
            else
59  
            {
59  
            {
60  
                n += 3;
60  
                n += 3;
61  
            }
61  
            }
62  
            ++it;
62  
            ++it;
63  
        }
63  
        }
64  
    }
64  
    }
65  
    else
65  
    else
66  
    {
66  
    {
67  
        // '+' is always encoded (thus
67  
        // '+' is always encoded (thus
68  
        // spending 3 chars) even if
68  
        // spending 3 chars) even if
69  
        // allowed because "%2B" and
69  
        // allowed because "%2B" and
70  
        // "+" have different meanings
70  
        // "+" have different meanings
71  
        // when space as plus is enabled
71  
        // when space as plus is enabled
72  
        using FNT = bool (*)(CS const& allowed, char);
72  
        using FNT = bool (*)(CS const& allowed, char);
73  
        FNT takes_one_char =
73  
        FNT takes_one_char =
74  
            allowed('+') ?
74  
            allowed('+') ?
75  
                (allowed(' ') ?
75  
                (allowed(' ') ?
76  
                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
76  
                     FNT([](CS const& allowed, char c){ return allowed(c) && c != '+'; }) :
77  
                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
77  
                     FNT([](CS const& allowed, char c){ return (allowed(c) || c == ' ') && c != '+'; })) :
78  
                (allowed(' ') ?
78  
                (allowed(' ') ?
79  
                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
79  
                     FNT([](CS const& allowed, char c){ return allowed(c); }) :
80  
                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
80  
                     FNT([](CS const& allowed, char c){ return allowed(c) || c == ' '; }));
81  
        while (it != last)
81  
        while (it != last)
82  
        {
82  
        {
83  
            char const c = *it;
83  
            char const c = *it;
84  
            if (takes_one_char(allowed, c))
84  
            if (takes_one_char(allowed, c))
85  
            {
85  
            {
86  
                ++n;
86  
                ++n;
87  
            }
87  
            }
88  
            else
88  
            else
89  
            {
89  
            {
90  
                n += 3;
90  
                n += 3;
91  
            }
91  
            }
92  
            ++it;
92  
            ++it;
93  
        }
93  
        }
94  
    }
94  
    }
95  
    return n;
95  
    return n;
96  
}
96  
}
97  

97  

98  
//------------------------------------------------
98  
//------------------------------------------------
99  

99  

100  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
100  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
101  
std::size_t
101  
std::size_t
102  
encode(
102  
encode(
103  
    char* dest,
103  
    char* dest,
104  
    std::size_t size,
104  
    std::size_t size,
105  
    core::string_view s,
105  
    core::string_view s,
106  
    CS const& allowed,
106  
    CS const& allowed,
107  
    encoding_opts opt)
107  
    encoding_opts opt)
108  
{
108  
{
109  
/*  If you get a compilation error here, it
109  
/*  If you get a compilation error here, it
110  
    means that the value you passed does
110  
    means that the value you passed does
111  
    not meet the requirements stated in
111  
    not meet the requirements stated in
112  
    the documentation.
112  
    the documentation.
113  
*/
113  
*/
114  
    BOOST_CORE_STATIC_ASSERT(
114  
    BOOST_CORE_STATIC_ASSERT(
115  
        grammar::is_charset<CS>::value);
115  
        grammar::is_charset<CS>::value);
116  

116  

117  
    // '%' must be reserved
117  
    // '%' must be reserved
118  
    BOOST_ASSERT(!allowed('%'));
118  
    BOOST_ASSERT(!allowed('%'));
119  

119  

120  
    char const* const hex =
120  
    char const* const hex =
121  
        detail::hexdigs[opt.lower_case];
121  
        detail::hexdigs[opt.lower_case];
122  
    auto const encode = [hex](
122  
    auto const encode = [hex](
123  
        char*& dest,
123  
        char*& dest,
124  
        unsigned char c) noexcept
124  
        unsigned char c) noexcept
125  
    {
125  
    {
126  
        *dest++ = '%';
126  
        *dest++ = '%';
127  
        *dest++ = hex[c>>4];
127  
        *dest++ = hex[c>>4];
128  
        *dest++ = hex[c&0xf];
128  
        *dest++ = hex[c&0xf];
129  
    };
129  
    };
130  

130  

131  
    auto it = s.data();
131  
    auto it = s.data();
132  
    auto const end = dest + size;
132  
    auto const end = dest + size;
133  
    auto const last = it + s.size();
133  
    auto const last = it + s.size();
134  
    auto const dest0 = dest;
134  
    auto const dest0 = dest;
135  

135  

136  
    if (!opt.space_as_plus)
136  
    if (!opt.space_as_plus)
137  
    {
137  
    {
138  
        while(it != last)
138  
        while(it != last)
139  
        {
139  
        {
140  
            char const c = *it;
140  
            char const c = *it;
141  
            if (allowed(c))
141  
            if (allowed(c))
142  
            {
142  
            {
143  
                if(dest == end)
143  
                if(dest == end)
144  
                    return dest - dest0;
144  
                    return dest - dest0;
145  
                *dest++ = c;
145  
                *dest++ = c;
146  
                ++it;
146  
                ++it;
147  
                continue;
147  
                continue;
148  
            }
148  
            }
149  
            if (end - dest < 3)
149  
            if (end - dest < 3)
150  
                return dest - dest0;
150  
                return dest - dest0;
151  
            encode(dest, c);
151  
            encode(dest, c);
152  
            ++it;
152  
            ++it;
153  
        }
153  
        }
154  
        return dest - dest0;
154  
        return dest - dest0;
155  
    }
155  
    }
156  
    else
156  
    else
157  
    {
157  
    {
158  
        while (it != last)
158  
        while (it != last)
159  
        {
159  
        {
160  
            char const c = *it;
160  
            char const c = *it;
161  
            if (c == ' ')
161  
            if (c == ' ')
162  
            {
162  
            {
163  
                if(dest == end)
163  
                if(dest == end)
164  
                    return dest - dest0;
164  
                    return dest - dest0;
165  
                *dest++ = '+';
165  
                *dest++ = '+';
166  
                ++it;
166  
                ++it;
167  
                continue;
167  
                continue;
168  
            }
168  
            }
169  
            else if (
169  
            else if (
170  
                allowed(c) &&
170  
                allowed(c) &&
171  
                c != '+')
171  
                c != '+')
172  
            {
172  
            {
173  
                if(dest == end)
173  
                if(dest == end)
174  
                    return dest - dest0;
174  
                    return dest - dest0;
175  
                *dest++ = c;
175  
                *dest++ = c;
176  
                ++it;
176  
                ++it;
177  
                continue;
177  
                continue;
178  
            }
178  
            }
179  
            if(end - dest < 3)
179  
            if(end - dest < 3)
180  
                return dest - dest0;
180  
                return dest - dest0;
181  
            encode(dest, c);
181  
            encode(dest, c);
182  
            ++it;
182  
            ++it;
183  
        }
183  
        }
184  
    }
184  
    }
185  
    return dest - dest0;
185  
    return dest - dest0;
186  
}
186  
}
187  

187  

188  
//------------------------------------------------
188  
//------------------------------------------------
189  

189  

190  
// unsafe encode just
190  
// unsafe encode just
191  
// asserts on the output buffer
191  
// asserts on the output buffer
192  
//
192  
//
193  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
193  
template<BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
194  
std::size_t
194  
std::size_t
195  
encode_unsafe(
195  
encode_unsafe(
196  
    char* dest,
196  
    char* dest,
197  
    std::size_t size,
197  
    std::size_t size,
198  
    core::string_view s,
198  
    core::string_view s,
199  
    CS const& allowed,
199  
    CS const& allowed,
200  
    encoding_opts opt)
200  
    encoding_opts opt)
201  
{
201  
{
202  
    BOOST_CORE_STATIC_ASSERT(
202  
    BOOST_CORE_STATIC_ASSERT(
203  
        grammar::is_charset<CS>::value);
203  
        grammar::is_charset<CS>::value);
204  

204  

205  
    // '%' must be reserved
205  
    // '%' must be reserved
206  
    BOOST_ASSERT(!allowed('%'));
206  
    BOOST_ASSERT(!allowed('%'));
207  

207  

208  
    auto it = s.data();
208  
    auto it = s.data();
209  
    auto const last = it + s.size();
209  
    auto const last = it + s.size();
210  
    auto const end = dest + size;
210  
    auto const end = dest + size;
211  
    ignore_unused(end);
211  
    ignore_unused(end);
212  

212  

213  
    char const* const hex =
213  
    char const* const hex =
214  
        detail::hexdigs[opt.lower_case];
214  
        detail::hexdigs[opt.lower_case];
215  
    auto const encode = [end, hex](
215  
    auto const encode = [end, hex](
216  
        char*& dest,
216  
        char*& dest,
217  
        unsigned char c) noexcept
217  
        unsigned char c) noexcept
218  
    {
218  
    {
219  
        ignore_unused(end);
219  
        ignore_unused(end);
220  
        *dest++ = '%';
220  
        *dest++ = '%';
221  
        BOOST_ASSERT(dest != end);
221  
        BOOST_ASSERT(dest != end);
222  
        *dest++ = hex[c>>4];
222  
        *dest++ = hex[c>>4];
223  
        BOOST_ASSERT(dest != end);
223  
        BOOST_ASSERT(dest != end);
224  
        *dest++ = hex[c&0xf];
224  
        *dest++ = hex[c&0xf];
225  
    };
225  
    };
226  

226  

227  
    auto const dest0 = dest;
227  
    auto const dest0 = dest;
228  
    if (!opt.space_as_plus)
228  
    if (!opt.space_as_plus)
229  
    {
229  
    {
230  
        while(it != last)
230  
        while(it != last)
231  
        {
231  
        {
232  
            BOOST_ASSERT(dest != end);
232  
            BOOST_ASSERT(dest != end);
233  
            char const c = *it;
233  
            char const c = *it;
234  
            if(allowed(c))
234  
            if(allowed(c))
235  
            {
235  
            {
236  
                *dest++ = c;
236  
                *dest++ = c;
237  
            }
237  
            }
238  
            else
238  
            else
239  
            {
239  
            {
240  
                encode(dest, c);
240  
                encode(dest, c);
241  
            }
241  
            }
242  
            ++it;
242  
            ++it;
243  
        }
243  
        }
244  
    }
244  
    }
245  
    else
245  
    else
246  
    {
246  
    {
247  
        while(it != last)
247  
        while(it != last)
248  
        {
248  
        {
249  
            BOOST_ASSERT(dest != end);
249  
            BOOST_ASSERT(dest != end);
250  
            char const c = *it;
250  
            char const c = *it;
251  
            if (c == ' ')
251  
            if (c == ' ')
252  
            {
252  
            {
253  
                *dest++ = '+';
253  
                *dest++ = '+';
254  
            }
254  
            }
255  
            else if (
255  
            else if (
256  
                allowed(c) &&
256  
                allowed(c) &&
257  
                c != '+')
257  
                c != '+')
258  
            {
258  
            {
259  
                *dest++ = c;
259  
                *dest++ = c;
260  
            }
260  
            }
261  
            else
261  
            else
262  
            {
262  
            {
263  
                encode(dest, c);
263  
                encode(dest, c);
264  
            }
264  
            }
265  
            ++it;
265  
            ++it;
266  
        }
266  
        }
267  
    }
267  
    }
268  
    return dest - dest0;
268  
    return dest - dest0;
269  
}
269  
}
270  

270  

271  
//------------------------------------------------
271  
//------------------------------------------------
272  

272  

273  
template<
273  
template<
274  
    BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
274  
    BOOST_URL_CONSTRAINT(string_token::StringToken) StringToken,
275  
    BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
275  
    BOOST_URL_CONSTRAINT(grammar::CharSet) CS>
276  
BOOST_URL_STRTOK_RETURN
276  
BOOST_URL_STRTOK_RETURN
277  
encode(
277  
encode(
278  
    core::string_view s,
278  
    core::string_view s,
279  
    CS const& allowed,
279  
    CS const& allowed,
280  
    encoding_opts opt,
280  
    encoding_opts opt,
281  
    StringToken&& token)
281  
    StringToken&& token)
282  
{
282  
{
283  
    BOOST_CORE_STATIC_ASSERT(
283  
    BOOST_CORE_STATIC_ASSERT(
284  
        grammar::is_charset<CS>::value);
284  
        grammar::is_charset<CS>::value);
285  

285  

286  
    auto const n = encoded_size(
286  
    auto const n = encoded_size(
287  
        s, allowed, opt);
287  
        s, allowed, opt);
288  
    auto p = token.prepare(n);
288  
    auto p = token.prepare(n);
289  
    if(n > 0)
289  
    if(n > 0)
290  
        encode_unsafe(
290  
        encode_unsafe(
291  
            p, n, s, allowed, opt);
291  
            p, n, s, allowed, opt);
292  
    return token.result();
292  
    return token.result();
293  
}
293  
}
294  

294  

295  
} // urls
295  
} // urls
296  
} // boost
296  
} // boost
297  

297  

298  
#endif
298  
#endif