find the most "fundamental" notes in the graph with PageRank (also with Katz centrality but I commented that out)

This commit is contained in:
2023-12-23 16:02:05 -05:00
parent a6e5cc9c6e
commit a2e13914f0
5 changed files with 265 additions and 10 deletions

4
.gitignore vendored
View File

@@ -1 +1,5 @@
/target
.history
.env
graph.postcard

199
Cargo.lock generated
View File

@@ -155,6 +155,29 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216"
[[package]]
name = "crossbeam-deque"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.17"
@@ -276,6 +299,12 @@ dependencies = [
"byteorder",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hashbrown"
version = "0.14.3"
@@ -284,6 +313,7 @@ checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
dependencies = [
"ahash",
"allocator-api2",
"rayon",
]
[[package]]
@@ -312,6 +342,16 @@ version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]]
name = "indexmap"
version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
dependencies = [
"autocfg",
"hashbrown 0.12.3",
]
[[package]]
name = "indexmap"
version = "2.1.0"
@@ -319,7 +359,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
dependencies = [
"equivalent",
"hashbrown",
"hashbrown 0.14.3",
"rayon",
]
[[package]]
@@ -328,6 +369,15 @@ version = "2.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
@@ -337,6 +387,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
dependencies = [
"either",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -371,7 +430,7 @@ version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7"
dependencies = [
"hashbrown",
"hashbrown 0.14.3",
]
[[package]]
@@ -389,6 +448,15 @@ version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "miniz_oxide"
version = "0.7.1"
@@ -420,6 +488,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
@@ -447,12 +524,15 @@ dependencies = [
"async-channel",
"crossterm",
"error-stack",
"itertools 0.12.0",
"pathdiff",
"petgraph",
"postcard",
"ratatui",
"regex",
"rustworkx-core",
"serde",
"simple-pagerank",
"thiserror",
"tokio",
"tracing",
@@ -520,7 +600,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
dependencies = [
"fixedbitset",
"indexmap",
"indexmap 2.1.0",
"serde",
"serde_derive",
]
@@ -543,6 +623,22 @@ dependencies = [
"serde",
]
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "priority-queue"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fff39edfcaec0d64e8d0da38564fad195d2d51b680940295fcc307366e101e61"
dependencies = [
"autocfg",
"indexmap 1.9.3",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
@@ -561,6 +657,45 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_pcg"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59cad018caf63deb318e5a4586d99a24424a364f40f1e5778c29aca23f4fc73e"
dependencies = [
"rand_core",
]
[[package]]
name = "ratatui"
version = "0.24.0"
@@ -571,7 +706,7 @@ dependencies = [
"cassowary",
"crossterm",
"indoc",
"itertools",
"itertools 0.11.0",
"lru",
"paste",
"strum",
@@ -579,6 +714,37 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "rayon"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-cond"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ac2a28c5317e6d26ac87a8629c0eb362690ed1d739f4040e21cfaafdf04e6f8"
dependencies = [
"either",
"itertools 0.10.5",
"rayon",
]
[[package]]
name = "rayon-core"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.4.1"
@@ -653,6 +819,25 @@ version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "rustworkx-core"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72abf7976bc09a30391248b3c6509338b235c02b0e9b0bf8af686c289cad3f45"
dependencies = [
"ahash",
"fixedbitset",
"hashbrown 0.14.3",
"indexmap 2.1.0",
"num-traits",
"petgraph",
"priority-queue",
"rand",
"rand_pcg",
"rayon",
"rayon-cond",
]
[[package]]
name = "same-file"
version = "1.0.6"
@@ -733,6 +918,12 @@ dependencies = [
"libc",
]
[[package]]
name = "simple-pagerank"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47d2b6d15dfae728ed12a9d722c0b494cf85d2ba27f6e93443c7b3e6a614870f"
[[package]]
name = "smallvec"
version = "1.11.2"

View File

@@ -10,12 +10,15 @@ ahash = { version = "0.8.6", features = ["serde"] }
async-channel = "2.1.1"
crossterm = "0.27.0"
error-stack = "0.4.1"
itertools = "0.12.0"
pathdiff = "0.2.1"
petgraph = { version = "0.6.4", features = ["serde-1"] }
postcard = { version = "1.0.8", features = ["alloc", "use-std"] }
ratatui = "0.24.0"
regex = "1.10.2"
rustworkx-core = "0.13.2"
serde = { version = "1.0.193", features = ["derive"] }
simple-pagerank = "0.2.0"
thiserror = "1.0.51"
tokio = { version = "1.35.1", features = ["rt-multi-thread", "macros", "fs"] }
tracing = "0.1.40"

View File

@@ -4,7 +4,7 @@ use std::{
thread,
};
use ahash::AHashMap;
use ahash::{AHashMap, AHashSet};
use regex::Regex;
use walkdir::WalkDir;
@@ -92,7 +92,7 @@ async fn main() {
let index = note_name_unique_to_index
.entry(note_name_unique.clone())
.or_insert_with(|| graph.add_node(note_name_unique));
.or_insert_with(|| graph.add_node(note_name_unique.clone()));
let index = *index;
@@ -103,15 +103,21 @@ async fn main() {
let wikilinks = wikilink_regex.captures_iter(&contents);
for wikilink in wikilinks {
tracing::info!(?wikilink);
let mut outgoing = AHashSet::new();
for wikilink in wikilinks {
let link = wikilink.name("link").unwrap();
tracing::info!(?wikilink);
let link_as_string = link.as_str().to_string();
outgoing.insert(link_as_string);
}
for link in outgoing {
tracing::info!(note_name_unique, link, "linking");
let outgoing_index = note_name_unique_to_index
.entry(link.as_str().to_string())
.entry(link)
.or_insert_with_key(|k| graph.add_node(k.to_owned()));
graph.add_edge(index, *outgoing_index, ());

51
src/bin/fundamentals.rs Normal file
View File

@@ -0,0 +1,51 @@
use std::iter::zip;
use petgraph::visit::EdgeRef;
use rustworkx_core::centrality::katz_centrality;
use simple_pagerank::Pagerank;
use itertools::Itertools;
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
let file = std::fs::read("graph.postcard").unwrap();
let graph: petgraph::graph::DiGraph<String, ()> = postcard::from_bytes(&file).unwrap();
let mut pagerank = Pagerank::new();
for edge in graph.edge_references() {
let source = edge.source();
let target = edge.target();
pagerank.add_edge(source, target);
}
pagerank.calculate();
// pagerank.calculate_with_convergence(0.00000003);
for (node_index, rank) in pagerank.nodes().into_iter().take(50) {
let node_weight = graph.node_weight(*node_index);
tracing::info!(?node_weight, rank);
}
if false {
let katz_centralities =
katz_centrality(&graph, |_| Ok::<_, ()>(1.0), None, None, None, None, None);
let katz_centralities = katz_centralities.unwrap().unwrap();
let mut nodes_and_indices = zip(graph.node_indices(), katz_centralities).collect_vec();
nodes_and_indices.sort_by(|(idx_a, centrality_a), (idx_b, centrality_b)| {
centrality_a.partial_cmp(centrality_b).unwrap()
});
for (node_index, centrality) in nodes_and_indices {
let node = graph.node_weight(node_index);
tracing::info!(?node_index, ?node, centrality);
}
}
}