| /* |
| * Copyright (C) 2019 Apple Inc. All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * |
| * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
| * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS |
| * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF |
| * THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| #include "config.h" |
| #include "UserAgentStringParser.h" |
| |
| #include "RFC7230.h" |
| #include "UserAgentStringData.h" |
| #include <optional> |
| #include <wtf/ASCIICType.h> |
| #include <wtf/StdLibExtras.h> |
| #include <wtf/WeakPtr.h> |
| #include <wtf/text/MakeString.h> |
| #include <wtf/text/StringCommon.h> |
| #include <wtf/text/StringImpl.h> |
| #include <wtf/text/WTFString.h> |
| |
| /* |
| * GRAMMAR: |
| * https://www.rfc-editor.org/rfc/rfc9110#name-user-agent |
| * User-Agent = product *( RWS ( product / comment ) ) |
| * product = token ["/" product-version] |
| * product-version = token |
| * token = 1*tchar |
| * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA ; any VCHAR, except delimiters |
| * RWS = 1*( SP / HTAB ); required whitespace |
| * comment = "(" *( ctext / quoted-pair / comment ) ")" |
| * ctext = HTAB / SP / %x21-27 / %x2A-5B / %x5D-7E / obs-text |
| * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) |
| * obs-text = %x80-FF |
| * HTAB = <ASCII horizontal tab %x09, aka '\t'> |
| * SP = <ASCII space, i.e. " "> |
| * VCHAR = <any visible US-ASCII character> |
| * |
| * REFERENCE: |
| * https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/User-Agent#syntax |
| * |
| * NOTE: |
| * User agent strings come in many different forms, but most browsers conform to a common pattern. |
| * This class is attempting to determine attributes about the user agent by expecting common forms |
| * of user agent strings. There is a link below that contains a list of of these strings grouped |
| * by platform, browser, layout engine, etc. I tried to pick the most frequent ones to parse out |
| * viable information. |
| * |
| * https://explore.whatismybrowser.com/useragents/explore/ |
| * |
| * Some user agent strings, while valid grammatically, list their info in odd locations. |
| * This parser will not be able to pick out the correct information from those. |
| */ |
| |
| namespace WebCore { |
| UserAgentStringParser::UserAgentStringParser(const String& userAgentString) |
| : m_userAgentString(userAgentString) |
| , data(UserAgentStringData::create()) { }; |
| |
| Ref<UserAgentStringParser> UserAgentStringParser::create(const String& userAgentString) |
| { |
| return adoptRef(*new UserAgentStringParser(userAgentString)); |
| } |
| |
| std::optional<Ref<UserAgentStringData>> UserAgentStringParser::parse() |
| { |
| data = UserAgentStringData::create(); |
| |
| if (atEnd()) |
| return { }; |
| |
| consumeProduct(); |
| |
| while (!atEnd()) { |
| if (!isTabOrSpace(peek())) |
| return { }; |
| |
| consumeRWS(); |
| start = pos; |
| if (peek() == '(') |
| consumeComment(); |
| else |
| consumeProduct(); |
| |
| if (malformed) |
| return { }; |
| } |
| |
| populateUserAgentData(); |
| return data; |
| } |
| |
| void UserAgentStringParser::consumeProduct() |
| { |
| consumeToken(); |
| if (malformed) |
| return; |
| |
| auto product = Product { .name = getSubstring(), .version = { } }; |
| if (!atEnd() && peek() == '/') { |
| increment(); |
| start = pos; |
| consumeToken(); |
| if (malformed) |
| return; |
| product.version = getSubstring(); |
| } |
| start = pos; |
| segments.append(product); |
| } |
| |
| void UserAgentStringParser::consumeRWS() |
| { |
| while (!atEnd() && isTabOrSpace(peek())) |
| increment(); |
| } |
| |
| void UserAgentStringParser::consumeComment() |
| { |
| ASSERT(peek() == '('); |
| increment(); // pass first '(' |
| start = pos; |
| |
| if (atEnd()) { |
| malformed = true; |
| return; |
| } |
| |
| auto c = peek(); |
| while (!atEnd() && c != ')') { |
| if (c == '(') |
| consumeComment(); |
| else if (c == '\\') |
| consumeQuotedPair(); |
| else if (RFC7230::isCommentText(c)) |
| increment(); |
| |
| if (malformed) |
| return; |
| |
| c = peek(); |
| } |
| |
| if (atEnd()) { |
| malformed = true; |
| return; |
| } |
| |
| auto s = getSubstring(); |
| if (!s.isEmpty()) { |
| auto comment = Comment { .parts = s.split(';') }; |
| segments.append(comment); |
| } |
| increment(); |
| start = pos; |
| // malformed user agent string |
| } |
| |
| void UserAgentStringParser::consumeToken() |
| { |
| if (!RFC7230::isTokenCharacter(peek())) { |
| malformed = true; |
| return; |
| } |
| |
| do { |
| increment(); |
| } while (!atEnd() && RFC7230::isTokenCharacter(peek())); |
| } |
| |
| void UserAgentStringParser::consumeQuotedPair() |
| { |
| ASSERT(peek() == '\\'); |
| increment(); // pass '\' |
| |
| if (RFC7230::isQuotedPairSecondOctet(peek())) { |
| increment(); |
| return; |
| } |
| |
| malformed = true; |
| } |
| |
| inline char16_t UserAgentStringParser::peek() |
| { |
| return m_userAgentString[this->pos]; |
| } |
| |
| inline void UserAgentStringParser::increment() |
| { |
| this->pos++; |
| } |
| |
| inline bool UserAgentStringParser::atEnd() |
| { |
| return this->pos >= this->m_userAgentString.length(); |
| } |
| |
| inline String UserAgentStringParser::getSubstring() |
| { |
| return m_userAgentString.substring(start, pos - start); |
| } |
| |
| struct BrowsersSeen { |
| bool brave : 1 { false }; |
| bool firefox : 1 { false }; |
| bool chrome : 1 { false }; |
| bool safari : 1 { false }; |
| bool opera : 1 { false }; |
| bool edge : 1 { false }; |
| String braveVersion; |
| String firefoxVersion; |
| String chromeVersion; |
| String safariVersion; |
| String operaVersion; |
| String edgeVersion; |
| }; |
| |
| void UserAgentStringParser::populateUserAgentData() |
| { |
| BrowsersSeen browsersSeen; |
| auto weakThis = WeakPtr { *this }; |
| bool linuxSeen { false }; |
| for (const auto& segment : segments) { |
| WTF::switchOn(segment, [&browsersSeen, weakThis](const Product& p) { |
| if (p.name == "Mobile") { |
| weakThis->data->mobile = true; |
| return; |
| } |
| if (p.name == "Brave") { |
| browsersSeen.braveVersion = p.version; |
| browsersSeen.brave = true; |
| return; |
| } |
| if (p.name == "Firefox" || p.name == "fxiOS") { |
| browsersSeen.firefoxVersion = p.version; |
| browsersSeen.firefox = true; |
| return; |
| } |
| if (p.name == "Chrome") { |
| browsersSeen.chromeVersion = p.version; |
| browsersSeen.chrome = true; |
| return; |
| } |
| if (p.name == "Safari") { |
| browsersSeen.firefoxVersion = p.version; |
| browsersSeen.safari = true; |
| return; |
| } |
| if (p.name == "OPR") { |
| browsersSeen.operaVersion = p.version; |
| browsersSeen.opera = true; |
| return; |
| } |
| if (p.name.contains("Edg")) { |
| browsersSeen.edgeVersion = p.version; |
| browsersSeen.edge = true; |
| return; |
| } }, [weakThis, &linuxSeen](const Comment& c) { |
| for (const auto& part : c.parts) { |
| if (part.contains("Windows")) { |
| weakThis->data->platform = "Windows"_s; |
| return; |
| } |
| if (part == "Macintosh") { |
| weakThis->data->platform = "macOS"_s; |
| return; |
| } |
| if (part == "iPhone") { |
| weakThis->data->platform = "iOS"_s; |
| return; |
| } |
| if (part == "iPad") { |
| weakThis->data->platform = "iOS"_s; |
| return; |
| } |
| if (part.contains("Android")) { |
| weakThis->data->platform = "Android"_s; |
| return; |
| } |
| if (part.contains("Linux")) { |
| linuxSeen = true; |
| return; |
| } |
| if (part.contains("CrOS")) { |
| weakThis->data->platform = "ChromeOS"_s; |
| return; |
| } |
| } }); |
| } |
| |
| // android user agents sometimes list linux and android, but linux user agents don't list androids |
| if (linuxSeen && data->platform.isEmpty()) |
| data->platform = "Linux"_s; |
| |
| // both chrome and firefox sometimes list safari in their user agent strings |
| if (browsersSeen.safari && !browsersSeen.chrome && !browsersSeen.firefox) { |
| data->browserName = "Safari"_s; |
| data->browserVersion = browsersSeen.safariVersion; |
| return; |
| } |
| |
| // no other browser typically list firefox |
| if (browsersSeen.firefox) { |
| data->browserName = "Firefox"_s; |
| data->browserVersion = browsersSeen.firefoxVersion; |
| } |
| |
| // chrome based browsers typically list chrome |
| if (browsersSeen.chrome) { |
| if (browsersSeen.edge) { |
| data->browserName = "Microsoft Edge"_s; |
| data->browserVersion = browsersSeen.edgeVersion; |
| } else if (browsersSeen.brave) { |
| data->browserName = "Brave"_s; |
| data->browserVersion = browsersSeen.braveVersion; |
| } else if (browsersSeen.opera) { |
| data->browserName = "Opera"_s; |
| data->browserVersion = browsersSeen.operaVersion; |
| } else { |
| data->browserName = "Google Chrome"_s; |
| data->browserVersion = browsersSeen.chromeVersion; |
| } |
| } |
| } |
| }; |