mas_data_model/
user_agent.rs

1// Copyright 2024 New Vector Ltd.
2// Copyright 2024 The Matrix.org Foundation C.I.C.
3//
4// SPDX-License-Identifier: AGPL-3.0-only
5// Please see LICENSE in the repository root for full details.
6
7use std::sync::LazyLock;
8
9use serde::Serialize;
10use woothee::{parser::Parser, woothee::VALUE_UNKNOWN};
11
12static CUSTOM_USER_AGENT_REGEX: LazyLock<regex::Regex> = LazyLock::new(|| {
13    regex::Regex::new(r"^(?P<name>[^/]+)/(?P<version>[^ ]+) \((?P<segments>.+)\)$").unwrap()
14});
15
16static ELECTRON_USER_AGENT_REGEX: LazyLock<regex::Regex> =
17    LazyLock::new(|| regex::Regex::new(r"(?m)\w+/[\w.]+").unwrap());
18
19#[derive(Debug, Serialize, Clone, PartialEq, Eq)]
20#[serde(rename_all = "snake_case")]
21pub enum DeviceType {
22    Pc,
23    Mobile,
24    Tablet,
25    Unknown,
26}
27
28#[derive(Debug, Serialize, Clone, PartialEq, Eq)]
29pub struct UserAgent {
30    pub name: Option<String>,
31    pub version: Option<String>,
32    pub os: Option<String>,
33    pub os_version: Option<String>,
34    pub model: Option<String>,
35    pub device_type: DeviceType,
36    pub raw: String,
37}
38
39impl std::ops::Deref for UserAgent {
40    type Target = str;
41
42    fn deref(&self) -> &Self::Target {
43        &self.raw
44    }
45}
46
47impl UserAgent {
48    fn parse_custom(user_agent: &str) -> Option<(&str, &str, &str, &str, Option<&str>)> {
49        let captures = CUSTOM_USER_AGENT_REGEX.captures(user_agent)?;
50        let name = captures.name("name")?.as_str();
51        let version = captures.name("version")?.as_str();
52        let segments: Vec<&str> = captures
53            .name("segments")?
54            .as_str()
55            .split(';')
56            .map(str::trim)
57            .collect();
58
59        match segments[..] {
60            ["Linux", "U", os, model, ..] | [model, os, ..] => {
61                // Most android model have a `/[build version]` suffix we don't care about
62                let model = model.split_once('/').map_or(model, |(model, _)| model);
63                // Some android version also have `Build/[build version]` suffix we don't care
64                // about
65                let model = model.strip_suffix("Build").unwrap_or(model);
66                // And let's trim any leftovers
67                let model = model.trim();
68
69                let (os, os_version) = if let Some((os, version)) = os.split_once(' ') {
70                    (os, Some(version))
71                } else {
72                    (os, None)
73                };
74
75                Some((name, version, model, os, os_version))
76            }
77            _ => None,
78        }
79    }
80
81    fn parse_electron(user_agent: &str) -> Option<(&str, &str)> {
82        let omit_keys = ["Mozilla", "AppleWebKit", "Chrome", "Electron", "Safari"];
83        return ELECTRON_USER_AGENT_REGEX
84            .find_iter(user_agent)
85            .map(|caps| caps.as_str().split_once('/').unwrap())
86            .find(|pair| !omit_keys.contains(&pair.0));
87    }
88
89    #[must_use]
90    pub fn parse(user_agent: String) -> Self {
91        if !user_agent.contains("Mozilla/") {
92            if let Some((name, version, model, os, os_version)) =
93                UserAgent::parse_custom(&user_agent)
94            {
95                let mut device_type = DeviceType::Unknown;
96
97                // Handle mobile simple mobile devices
98                if os == "Android" || os == "iOS" {
99                    device_type = DeviceType::Mobile;
100                }
101
102                // Handle iPads
103                if model.contains("iPad") {
104                    device_type = DeviceType::Tablet;
105                }
106
107                return Self {
108                    name: Some(name.to_owned()),
109                    version: Some(version.to_owned()),
110                    os: Some(os.to_owned()),
111                    os_version: os_version.map(std::borrow::ToOwned::to_owned),
112                    model: Some(model.to_owned()),
113                    device_type,
114                    raw: user_agent,
115                };
116            }
117        }
118
119        let mut model = None;
120        let Some(mut result) = Parser::new().parse(&user_agent) else {
121            return Self {
122                raw: user_agent,
123                name: None,
124                version: None,
125                os: None,
126                os_version: None,
127                model: None,
128                device_type: DeviceType::Unknown,
129            };
130        };
131
132        let mut device_type = match result.category {
133            "pc" => DeviceType::Pc,
134            "smartphone" | "mobilephone" => DeviceType::Mobile,
135            _ => DeviceType::Unknown,
136        };
137
138        // Special handling for Chrome user-agent reduction cases
139        // https://www.chromium.org/updates/ua-reduction/
140        match (result.os, &*result.os_version) {
141            // Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/533.88 (KHTML, like Gecko)
142            // Chrome/109.1.2342.76 Safari/533.88
143            ("Windows 10", "NT 10.0") if user_agent.contains("Windows NT 10.0; Win64; x64") => {
144                result.os = "Windows";
145                result.os_version = VALUE_UNKNOWN.into();
146            }
147
148            // Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)
149            // Chrome/100.0.0.0 Safari/537.36
150            ("Linux", _) if user_agent.contains("X11; Linux x86_64") => {
151                result.os = "Linux";
152                result.os_version = VALUE_UNKNOWN.into();
153            }
154
155            // Mozilla/5.0 (X11; CrOS x86_64 14541.0.0) AppleWebKit/537.36 (KHTML, like Gecko)
156            // Chrome/107.0.0.0 Safari/537.36
157            ("ChromeOS", _) if user_agent.contains("X11; CrOS x86_64 14541.0.0") => {
158                result.os = "Chrome OS";
159                result.os_version = VALUE_UNKNOWN.into();
160            }
161
162            // Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko)
163            // Chrome/100.0.0.0 Mobile Safari/537.36
164            ("Android", "10") if user_agent.contains("Linux; Android 10; K") => {
165                result.os = "Android";
166                result.os_version = VALUE_UNKNOWN.into();
167            }
168
169            // Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like
170            // Gecko) Chrome/100.0.4896.133 Safari/537.36
171            // Safari also freezes the OS version
172            // Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like
173            // Gecko) Version/17.3.1 Safari/605.1.15
174            ("Mac OSX", "10.15.7") if user_agent.contains("Macintosh; Intel Mac OS X 10_15_7") => {
175                result.os = "macOS";
176                result.os_version = VALUE_UNKNOWN.into();
177            }
178
179            // Woothee identifies iPhone and iPod in the OS, but we want to map them to iOS and use
180            // them as model
181            ("iPhone" | "iPod", _) => {
182                model = Some(result.os.to_owned());
183                result.os = "iOS";
184            }
185
186            ("iPad", _) => {
187                model = Some(result.os.to_owned());
188                device_type = DeviceType::Tablet;
189                result.os = "iPadOS";
190            }
191
192            // Also map `Mac OSX` to `macOS`
193            ("Mac OSX", _) => {
194                result.os = "macOS";
195            }
196
197            _ => {}
198        }
199
200        // For some reason, the version on Windows is on the OS field
201        // This transforms `Windows 10` into `Windows` and `10`
202        if let Some(version) = result.os.strip_prefix("Windows ") {
203            result.os = "Windows";
204            result.os_version = version.into();
205        }
206
207        // Special handling for Electron applications e.g. Element Desktop
208        if user_agent.contains("Electron/") {
209            if let Some(app) = UserAgent::parse_electron(&user_agent) {
210                result.name = app.0;
211                result.version = app.1;
212            }
213        }
214
215        Self {
216            name: (result.name != VALUE_UNKNOWN).then(|| result.name.to_owned()),
217            version: (result.version != VALUE_UNKNOWN).then(|| result.version.to_owned()),
218            os: (result.os != VALUE_UNKNOWN).then(|| result.os.to_owned()),
219            os_version: (result.os_version != VALUE_UNKNOWN)
220                .then(|| result.os_version.into_owned()),
221            device_type,
222            model,
223            raw: user_agent,
224        }
225    }
226}