find the most "fundamental" notes in the graph with PageRank (also with Katz centrality but I commented that out)

This commit is contained in:
2023-12-23 16:02:05 -05:00
parent a6e5cc9c6e
commit a2e13914f0
5 changed files with 265 additions and 10 deletions

View File

@@ -4,7 +4,7 @@ use std::{
thread,
};
use ahash::AHashMap;
use ahash::{AHashMap, AHashSet};
use regex::Regex;
use walkdir::WalkDir;
@@ -92,7 +92,7 @@ async fn main() {
let index = note_name_unique_to_index
.entry(note_name_unique.clone())
.or_insert_with(|| graph.add_node(note_name_unique));
.or_insert_with(|| graph.add_node(note_name_unique.clone()));
let index = *index;
@@ -103,15 +103,21 @@ async fn main() {
let wikilinks = wikilink_regex.captures_iter(&contents);
for wikilink in wikilinks {
tracing::info!(?wikilink);
let mut outgoing = AHashSet::new();
for wikilink in wikilinks {
let link = wikilink.name("link").unwrap();
tracing::info!(?wikilink);
let link_as_string = link.as_str().to_string();
outgoing.insert(link_as_string);
}
for link in outgoing {
tracing::info!(note_name_unique, link, "linking");
let outgoing_index = note_name_unique_to_index
.entry(link.as_str().to_string())
.entry(link)
.or_insert_with_key(|k| graph.add_node(k.to_owned()));
graph.add_edge(index, *outgoing_index, ());

51
src/bin/fundamentals.rs Normal file
View File

@@ -0,0 +1,51 @@
use std::iter::zip;
use petgraph::visit::EdgeRef;
use rustworkx_core::centrality::katz_centrality;
use simple_pagerank::Pagerank;
use itertools::Itertools;
#[tokio::main]
async fn main() {
tracing_subscriber::fmt::init();
let file = std::fs::read("graph.postcard").unwrap();
let graph: petgraph::graph::DiGraph<String, ()> = postcard::from_bytes(&file).unwrap();
let mut pagerank = Pagerank::new();
for edge in graph.edge_references() {
let source = edge.source();
let target = edge.target();
pagerank.add_edge(source, target);
}
pagerank.calculate();
// pagerank.calculate_with_convergence(0.00000003);
for (node_index, rank) in pagerank.nodes().into_iter().take(50) {
let node_weight = graph.node_weight(*node_index);
tracing::info!(?node_weight, rank);
}
if false {
let katz_centralities =
katz_centrality(&graph, |_| Ok::<_, ()>(1.0), None, None, None, None, None);
let katz_centralities = katz_centralities.unwrap().unwrap();
let mut nodes_and_indices = zip(graph.node_indices(), katz_centralities).collect_vec();
nodes_and_indices.sort_by(|(idx_a, centrality_a), (idx_b, centrality_b)| {
centrality_a.partial_cmp(centrality_b).unwrap()
});
for (node_index, centrality) in nodes_and_indices {
let node = graph.node_weight(node_index);
tracing::info!(?node_index, ?node, centrality);
}
}
}