miniscript/expression/
mod.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! # Function-like Expression Language
4//!
5
6mod error;
7
8use core::fmt;
9use core::str::FromStr;
10
11pub use self::error::ParseThresholdError;
12use crate::prelude::*;
13use crate::{errstr, Error, Threshold, MAX_RECURSION_DEPTH};
14
15/// Allowed characters are descriptor strings.
16pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";
17
18/// Map of valid characters in descriptor strings.
19#[rustfmt::skip]
20pub const VALID_CHARS: [Option<u8>; 128] = [
21    None, None, None, None, None, None, None, None, None, None, None, None, None,
22    None, None, None, None, None, None, None, None, None, None, None, None, None,
23    None, None, None, None, None, None, Some(94), Some(59), Some(92), Some(91),
24    Some(28), Some(29), Some(50), Some(15), Some(10), Some(11), Some(17), Some(51),
25    Some(14), Some(52), Some(53), Some(16), Some(0), Some(1), Some(2), Some(3),
26    Some(4), Some(5), Some(6), Some(7), Some(8), Some(9), Some(27), Some(54),
27    Some(55), Some(56), Some(57), Some(58), Some(26), Some(82), Some(83),
28    Some(84), Some(85), Some(86), Some(87), Some(88), Some(89), Some(32), Some(33),
29    Some(34), Some(35), Some(36), Some(37), Some(38), Some(39), Some(40), Some(41),
30    Some(42), Some(43), Some(44), Some(45), Some(46), Some(47), Some(48), Some(49),
31    Some(12), Some(93), Some(13), Some(60), Some(61), Some(90), Some(18), Some(19),
32    Some(20), Some(21), Some(22), Some(23), Some(24), Some(25), Some(64), Some(65),
33    Some(66), Some(67), Some(68), Some(69), Some(70), Some(71), Some(72), Some(73),
34    Some(74), Some(75), Some(76), Some(77), Some(78), Some(79), Some(80), Some(81),
35    Some(30), Some(62), Some(31), Some(63), None,
36];
37
38#[derive(Debug)]
39/// A token of the form `x(...)` or `x`
40pub struct Tree<'a> {
41    /// The name `x`
42    pub name: &'a str,
43    /// The comma-separated contents of the `(...)`, if any
44    pub args: Vec<Tree<'a>>,
45}
46// or_b(pk(A),pk(B))
47//
48// A = musig(musig(B,C),D,E)
49// or_b()
50// pk(A), pk(B)
51
52/// A trait for extracting a structure from a Tree representation in token form
53pub trait FromTree: Sized {
54    /// Extract a structure from Tree representation
55    fn from_tree(top: &Tree) -> Result<Self, Error>;
56}
57
58enum Found {
59    Nothing,
60    LBracket(usize), // Either a left ( or {
61    Comma(usize),
62    RBracket(usize), // Either a right ) or }
63}
64
65fn next_expr(sl: &str, delim: char) -> Found {
66    let mut found = Found::Nothing;
67    if delim == '(' {
68        for (n, ch) in sl.char_indices() {
69            match ch {
70                '(' => {
71                    found = Found::LBracket(n);
72                    break;
73                }
74                ',' => {
75                    found = Found::Comma(n);
76                    break;
77                }
78                ')' => {
79                    found = Found::RBracket(n);
80                    break;
81                }
82                _ => {}
83            }
84        }
85    } else if delim == '{' {
86        let mut new_count = 0;
87        for (n, ch) in sl.char_indices() {
88            match ch {
89                '{' => {
90                    found = Found::LBracket(n);
91                    break;
92                }
93                '(' => {
94                    new_count += 1;
95                }
96                ',' => {
97                    if new_count == 0 {
98                        found = Found::Comma(n);
99                        break;
100                    }
101                }
102                ')' => {
103                    new_count -= 1;
104                }
105                '}' => {
106                    found = Found::RBracket(n);
107                    break;
108                }
109                _ => {}
110            }
111        }
112    } else {
113        unreachable!("{}", "Internal: delimiters in parsing must be '(' or '{'");
114    }
115    found
116}
117
118// Get the corresponding delim
119fn closing_delim(delim: char) -> char {
120    match delim {
121        '(' => ')',
122        '{' => '}',
123        _ => unreachable!("Unknown delimiter"),
124    }
125}
126
127impl<'a> Tree<'a> {
128    /// Parse an expression with round brackets
129    pub fn from_slice(sl: &'a str) -> Result<(Tree<'a>, &'a str), Error> {
130        // Parsing TapTree or just miniscript
131        Self::from_slice_delim(sl, 0u32, '(')
132    }
133
134    pub(crate) fn from_slice_delim(
135        mut sl: &'a str,
136        depth: u32,
137        delim: char,
138    ) -> Result<(Tree<'a>, &'a str), Error> {
139        if depth >= MAX_RECURSION_DEPTH {
140            return Err(Error::MaxRecursiveDepthExceeded);
141        }
142
143        match next_expr(sl, delim) {
144            // String-ending terminal
145            Found::Nothing => Ok((Tree { name: sl, args: vec![] }, "")),
146            // Terminal
147            Found::Comma(n) | Found::RBracket(n) => {
148                Ok((Tree { name: &sl[..n], args: vec![] }, &sl[n..]))
149            }
150            // Function call
151            Found::LBracket(n) => {
152                let mut ret = Tree { name: &sl[..n], args: vec![] };
153
154                sl = &sl[n + 1..];
155                loop {
156                    let (arg, new_sl) = Tree::from_slice_delim(sl, depth + 1, delim)?;
157                    ret.args.push(arg);
158
159                    if new_sl.is_empty() {
160                        return Err(Error::ExpectedChar(closing_delim(delim)));
161                    }
162
163                    sl = &new_sl[1..];
164                    match new_sl.as_bytes()[0] {
165                        b',' => {}
166                        last_byte => {
167                            if last_byte == closing_delim(delim) as u8 {
168                                break;
169                            } else {
170                                return Err(Error::ExpectedChar(closing_delim(delim)));
171                            }
172                        }
173                    }
174                }
175                Ok((ret, sl))
176            }
177        }
178    }
179
180    /// Parses a tree from a string
181    #[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes.
182    pub fn from_str(s: &'a str) -> Result<Tree<'a>, Error> {
183        check_valid_chars(s)?;
184
185        let (top, rem) = Tree::from_slice(s)?;
186        if rem.is_empty() {
187            Ok(top)
188        } else {
189            Err(errstr(rem))
190        }
191    }
192
193    /// Parses an expression tree as a threshold (a term with at least one child,
194    /// the first of which is a positive integer k).
195    ///
196    /// This sanity-checks that the threshold is well-formed (begins with a valid
197    /// threshold value, etc.) but does not parse the children of the threshold.
198    /// Instead it returns a threshold holding the empty type `()`, which is
199    /// constructed without any allocations, and expects the caller to convert
200    /// this to the "real" threshold type by calling [`Threshold::translate`].
201    ///
202    /// (An alternate API which does the conversion inline turned out to be
203    /// too messy; it needs to take a closure, have multiple generic parameters,
204    /// and be able to return multiple error types.)
205    pub fn to_null_threshold<const MAX: usize>(
206        &self,
207    ) -> Result<Threshold<(), MAX>, ParseThresholdError> {
208        // First, special case "no arguments" so we can index the first argument without panics.
209        if self.args.is_empty() {
210            return Err(ParseThresholdError::NoChildren);
211        }
212
213        if !self.args[0].args.is_empty() {
214            return Err(ParseThresholdError::KNotTerminal);
215        }
216
217        let k = parse_num(self.args[0].name)
218            .map_err(|e| ParseThresholdError::ParseK(e.to_string()))? as usize;
219        Threshold::new(k, vec![(); self.args.len() - 1]).map_err(ParseThresholdError::Threshold)
220    }
221}
222
223/// Filter out non-ASCII because we byte-index strings all over the
224/// place and Rust gets very upset when you splinch a string.
225pub fn check_valid_chars(s: &str) -> Result<(), Error> {
226    for ch in s.bytes() {
227        if !ch.is_ascii() {
228            return Err(Error::Unprintable(ch));
229        }
230        // Index bounds: We know that ch is ASCII, so it is <= 127.
231        if VALID_CHARS[ch as usize].is_none() {
232            return Err(Error::Unexpected(
233                "Only characters in INPUT_CHARSET are allowed".to_string(),
234            ));
235        }
236    }
237    Ok(())
238}
239
240/// Parse a string as a u32, for timelocks or thresholds
241pub fn parse_num(s: &str) -> Result<u32, Error> {
242    if s.len() > 1 {
243        let ch = s.chars().next().unwrap();
244        if !('1'..='9').contains(&ch) {
245            return Err(Error::Unexpected("Number must start with a digit 1-9".to_string()));
246        }
247    }
248    u32::from_str(s).map_err(|_| errstr(s))
249}
250
251/// Attempts to parse a terminal expression
252pub fn terminal<T, F, Err>(term: &Tree, convert: F) -> Result<T, Error>
253where
254    F: FnOnce(&str) -> Result<T, Err>,
255    Err: fmt::Display,
256{
257    if term.args.is_empty() {
258        convert(term.name).map_err(|e| Error::Unexpected(e.to_string()))
259    } else {
260        Err(errstr(term.name))
261    }
262}
263
264/// Attempts to parse an expression with exactly one child
265pub fn unary<L, T, F>(term: &Tree, convert: F) -> Result<T, Error>
266where
267    L: FromTree,
268    F: FnOnce(L) -> T,
269{
270    if term.args.len() == 1 {
271        let left = FromTree::from_tree(&term.args[0])?;
272        Ok(convert(left))
273    } else {
274        Err(errstr(term.name))
275    }
276}
277
278/// Attempts to parse an expression with exactly two children
279pub fn binary<L, R, T, F>(term: &Tree, convert: F) -> Result<T, Error>
280where
281    L: FromTree,
282    R: FromTree,
283    F: FnOnce(L, R) -> T,
284{
285    if term.args.len() == 2 {
286        let left = FromTree::from_tree(&term.args[0])?;
287        let right = FromTree::from_tree(&term.args[1])?;
288        Ok(convert(left, right))
289    } else {
290        Err(errstr(term.name))
291    }
292}
293
294#[cfg(test)]
295mod tests {
296    use super::parse_num;
297
298    #[test]
299    fn test_parse_num() {
300        assert!(parse_num("0").is_ok());
301        assert!(parse_num("00").is_err());
302        assert!(parse_num("0000").is_err());
303        assert!(parse_num("06").is_err());
304        assert!(parse_num("+6").is_err());
305        assert!(parse_num("-6").is_err());
306    }
307
308    #[test]
309    fn test_valid_char_map() {
310        let mut valid_chars = [None; 128];
311        for (i, ch) in super::INPUT_CHARSET.chars().enumerate() {
312            valid_chars[ch as usize] = Some(i as u8);
313        }
314        assert_eq!(valid_chars, super::VALID_CHARS);
315    }
316}
317
318#[cfg(bench)]
319mod benches {
320    use test::{black_box, Bencher};
321
322    use super::*;
323
324    #[bench]
325    pub fn parse_tree(bh: &mut Bencher) {
326        bh.iter(|| {
327            let tree = Tree::from_str(
328                "and(thresh(2,and(sha256(H),or(sha256(H),pk(A))),pk(B),pk(C),pk(D),sha256(H)),pk(E))",
329            ).unwrap();
330            black_box(tree);
331        });
332    }
333
334    #[bench]
335    pub fn parse_tree_deep(bh: &mut Bencher) {
336        bh.iter(|| {
337            let tree = Tree::from_str(
338                "and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(and(1,2),3),4),5),6),7),8),9),10),11),12),13),14),15),16),17),18),19),20),21)"
339            ).unwrap();
340            black_box(tree);
341        });
342    }
343}