1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
use num::bigint;
use rand::{thread_rng, Rng};
use sha3::{Digest, Sha3_512};
#[cfg(not(target_arch = "wasm32"))]
use std::{
    collections::hash_map::DefaultHasher,
    hash::{Hash, Hasher},
};

use crate::{error::CuidError, BASE};

const BIG_LENGTH: u8 = 4;

// =============================================================================
// THREAD LOCALS
// =============================================================================
// Each thread generating CUIDs gets its own:
// - 64-bit counter, randomly initialized to some value between 0 and 2056, inclusive
// - fingerprint, a hash with added entropy, derived from a random number between
//   2063 and 4125, inclusive, the process ID, and the thread ID

thread_local! {
    /// Fingerprint! The original implementation is a hash of:
    /// - stringified keys of the global object
    /// - added entropy
    ///
    /// For us, we'll use
    /// - A few random numbers
    /// - the process ID
    /// - the thread ID (which also ensures our CUIDs will be different per thread)
    ///
    /// This is pretty non-language, non-system dependent, so it allows us to
    /// compile to wasm and so on.
    static FINGERPRINT: String = hash(
        [
            thread_rng().gen::<u128>().to_be_bytes(),
            thread_rng().gen::<u128>().to_be_bytes(),
            #[cfg(not(target_arch="wasm32"))]
            u128::from(std::process::id()).to_be_bytes(),
            #[cfg(not(target_arch="wasm32"))]
            u128::from(get_thread_id()).to_be_bytes(),
        ],
        BIG_LENGTH.into(),
    );
}

#[cfg(not(target_arch = "wasm32"))]
/// Retrieves the current thread's ID.
fn get_thread_id() -> u64 {
    // ThreadId doesn't implement debug or display, but it does implement Hash,
    // so we can get the hash value to use in our fingerprint.
    let mut hasher = DefaultHasher::new();
    std::thread::current().id().hash(&mut hasher);
    hasher.finish()
}

/// Retrieves the thread-local fingerprint.
fn get_fingerprint() -> String {
    FINGERPRINT.with(|x| x.clone())
}

// Hashing
// =======

/// Hash a value, including an additional salt of randomly generated data.
//
// Updated 2023-08-08 to match the updated JS implementation, which is:
//
// ```js
// const hash = (input = "") => {
//   // Drop the first character because it will bias the histogram
//   // to the left.
//   return bufToBigInt(sha3(input)).toString(36).slice(1);
// };
// ```
//
// We don't drop the first character, because it doesn't actually affect the
// histogram (the comment in the reference implementation is incorrect).
fn hash<S: AsRef<[u8]>, T: IntoIterator<Item = S>>(input: T, length: u16) -> String {
    let mut hasher = Sha3_512::new();

    for block in input {
        hasher.update(block.as_ref());
    }

    // 512 bits (64 bytes) of data ([u8; 64])
    let hash = hasher.finalize();

    // We'll convert the bytes directly to a big, unsigned int and then use
    // its builtin radix conversion.
    //
    // We don't use bigint for the rest of our base conversions, because it's
    // significantly slower, but we use it here since we need to deal with the
    // 512-bit integer from the hash function.
    let mut res = bigint::BigUint::from_bytes_be(&hash).to_str_radix(BASE.into());

    // Note that truncate panics if the length does not fall on a char boundary,
    // but we don't need to worry about that since all the chars will be ASCII.
    res.truncate(length.into());

    res
}

pub fn fingerprint() -> Result<String, CuidError> {
    let fingerprint = get_fingerprint();
    Ok(fingerprint)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_fingerprint_length() {
        assert_eq!(fingerprint().unwrap().len(), BIG_LENGTH as usize)
    }
}

#[cfg(nightly)]
#[cfg(test)]
mod benchmarks {
    use super::*;
    use test::Bencher;

    #[bench]
    fn bench_fingerprint(b: &mut Bencher) {
        b.iter(|| {
            fingerprint().unwrap();
        })
    }
}