diff --git a/.gitignore b/.gitignore index ea8c4bf..f587ab9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ /target +.history +.env +graph.postcard + diff --git a/Cargo.lock b/Cargo.lock index 4ab64f0..9788af9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -155,6 +155,29 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" +[[package]] +name = "crossbeam-deque" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset", +] + [[package]] name = "crossbeam-utils" version = "0.8.17" @@ -276,6 +299,12 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.14.3" @@ -284,6 +313,7 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash", "allocator-api2", + "rayon", ] [[package]] @@ -312,6 +342,16 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.1.0" @@ -319,7 +359,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.14.3", + "rayon", ] [[package]] @@ -328,6 +369,15 @@ version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.11.0" @@ -337,6 +387,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -371,7 +430,7 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7" dependencies = [ - "hashbrown", + "hashbrown 0.14.3", ] [[package]] @@ -389,6 +448,15 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -420,6 +488,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -447,12 +524,15 @@ dependencies = [ "async-channel", "crossterm", "error-stack", + "itertools 0.12.0", "pathdiff", "petgraph", "postcard", "ratatui", "regex", + "rustworkx-core", "serde", + "simple-pagerank", "thiserror", "tokio", "tracing", @@ -520,7 +600,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap", + "indexmap 2.1.0", "serde", "serde_derive", ] @@ -543,6 +623,22 @@ dependencies = [ "serde", ] +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "priority-queue" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fff39edfcaec0d64e8d0da38564fad195d2d51b680940295fcc307366e101e61" +dependencies = [ + "autocfg", + "indexmap 1.9.3", +] + [[package]] name = "proc-macro2" version = "1.0.70" @@ -561,6 +657,45 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_pcg" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e" +dependencies = [ + "rand_core", +] + [[package]] name = "ratatui" version = "0.24.0" @@ -571,7 +706,7 @@ dependencies = [ "cassowary", "crossterm", "indoc", - "itertools", + "itertools 0.11.0", "lru", "paste", "strum", @@ -579,6 +714,37 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac2a28c5317e6d26ac87a8629c0eb362690ed1d739f4040e21cfaafdf04e6f8" +dependencies = [ + "either", + "itertools 0.10.5", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -653,6 +819,25 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +[[package]] +name = "rustworkx-core" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72abf7976bc09a30391248b3c6509338b235c02b0e9b0bf8af686c289cad3f45" +dependencies = [ + "ahash", + "fixedbitset", + "hashbrown 0.14.3", + "indexmap 2.1.0", + "num-traits", + "petgraph", + "priority-queue", + "rand", + "rand_pcg", + "rayon", + "rayon-cond", +] + [[package]] name = "same-file" version = "1.0.6" @@ -733,6 +918,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simple-pagerank" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d2b6d15dfae728ed12a9d722c0b494cf85d2ba27f6e93443c7b3e6a614870f" + [[package]] name = "smallvec" version = "1.11.2" diff --git a/Cargo.toml b/Cargo.toml index 391d348..6dae960 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,12 +10,15 @@ ahash = { version = "0.8.6", features = ["serde"] } async-channel = "2.1.1" crossterm = "0.27.0" error-stack = "0.4.1" +itertools = "0.12.0" pathdiff = "0.2.1" petgraph = { version = "0.6.4", features = ["serde-1"] } postcard = { version = "1.0.8", features = ["alloc", "use-std"] } ratatui = "0.24.0" regex = "1.10.2" +rustworkx-core = "0.13.2" serde = { version = "1.0.193", features = ["derive"] } +simple-pagerank = "0.2.0" thiserror = "1.0.51" tokio = { version = "1.35.1", features = ["rt-multi-thread", "macros", "fs"] } tracing = "0.1.40" diff --git a/src/bin/build-index.rs b/src/bin/build-index.rs index 17ab4f5..4e0aee4 100644 --- a/src/bin/build-index.rs +++ b/src/bin/build-index.rs @@ -4,7 +4,7 @@ use std::{ thread, }; -use ahash::AHashMap; +use ahash::{AHashMap, AHashSet}; use regex::Regex; use walkdir::WalkDir; @@ -92,7 +92,7 @@ async fn main() { let index = note_name_unique_to_index .entry(note_name_unique.clone()) - .or_insert_with(|| graph.add_node(note_name_unique)); + .or_insert_with(|| graph.add_node(note_name_unique.clone())); let index = *index; @@ -103,15 +103,21 @@ async fn main() { let wikilinks = wikilink_regex.captures_iter(&contents); - for wikilink in wikilinks { - tracing::info!(?wikilink); + let mut outgoing = AHashSet::new(); + for wikilink in wikilinks { let link = wikilink.name("link").unwrap(); - tracing::info!(?wikilink); + let link_as_string = link.as_str().to_string(); + + outgoing.insert(link_as_string); + } + + for link in outgoing { + tracing::info!(note_name_unique, link, "linking"); let outgoing_index = note_name_unique_to_index - .entry(link.as_str().to_string()) + .entry(link) .or_insert_with_key(|k| graph.add_node(k.to_owned())); graph.add_edge(index, *outgoing_index, ()); diff --git a/src/bin/fundamentals.rs b/src/bin/fundamentals.rs new file mode 100644 index 0000000..c689bf6 --- /dev/null +++ b/src/bin/fundamentals.rs @@ -0,0 +1,51 @@ +use std::iter::zip; + +use petgraph::visit::EdgeRef; +use rustworkx_core::centrality::katz_centrality; + +use simple_pagerank::Pagerank; + +use itertools::Itertools; + +#[tokio::main] +async fn main() { + tracing_subscriber::fmt::init(); + + let file = std::fs::read("graph.postcard").unwrap(); + + let graph: petgraph::graph::DiGraph = postcard::from_bytes(&file).unwrap(); + + let mut pagerank = Pagerank::new(); + + for edge in graph.edge_references() { + let source = edge.source(); + let target = edge.target(); + + pagerank.add_edge(source, target); + } + + pagerank.calculate(); + // pagerank.calculate_with_convergence(0.00000003); + + for (node_index, rank) in pagerank.nodes().into_iter().take(50) { + let node_weight = graph.node_weight(*node_index); + + tracing::info!(?node_weight, rank); + } + + if false { + let katz_centralities = + katz_centrality(&graph, |_| Ok::<_, ()>(1.0), None, None, None, None, None); + let katz_centralities = katz_centralities.unwrap().unwrap(); + + let mut nodes_and_indices = zip(graph.node_indices(), katz_centralities).collect_vec(); + nodes_and_indices.sort_by(|(idx_a, centrality_a), (idx_b, centrality_b)| { + centrality_a.partial_cmp(centrality_b).unwrap() + }); + + for (node_index, centrality) in nodes_and_indices { + let node = graph.node_weight(node_index); + tracing::info!(?node_index, ?node, centrality); + } + } +}