Graph cluster example
We understand UMI deduplication processes better in a visiable manner sometimes due to the complexity between a set of UMIs. Starting from building a 6-node UMI graph, we deduplicate UMIs and plot the final graph.
build a graph with data from UMI-tools
Python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 graph_adj = {
'A' : [ 'B' , 'C' , 'D' ],
'B' : [ 'A' , 'C' ],
'C' : [ 'A' , 'B' ],
'D' : [ 'A' , 'E' , 'F' ],
'E' : [ 'D' ],
'F' : [ 'D' ],
}
print ( "An adjacency list of a graph: \n {} " . format ( graph_adj ))
node_val_sorted = pd . Series ({
'A' : 456 ,
'E' : 90 ,
'D' : 72 ,
'B' : 2 ,
'C' : 2 ,
'F' : 1 ,
})
print ( "Counts sorted: \n {} " . format ( node_val_sorted ))
deduplicate UMIs with 6 methods
Python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 ### @@@ ******Connected components******
from umiche.deduplicate.method.Cluster import Cluster as umiclust
ccs = umiclust () . cc ( graph_adj = graph_adj )
print ( "Connected components: \n {} " . format ( ccs ))
### @@@ ******UMI-tools Adjacency******
from umiche.deduplicate.method.Adjacency import Adjacency as umiadj
from umiche.deduplicate.method.Directional import Directional as umidirec
from umiche.deduplicate.method.MarkovClustering import MarkovClustering as umimcl
dedup_res_adj = umiadj () . umi_tools (
connected_components = ccs ,
df_umi_uniq_val_cnt = node_val_sorted ,
graph_adj = graph_adj ,
)
dedup_res_adj_dc = umiadj () . decompose ( dedup_res_adj [ 'clusters' ])
print ( "deduplicated clusters (UMI-tools Adjacency): \n {} " . format ( dedup_res_adj_dc ))
### @@@ ******UMI-tools Directional******
from umiche.deduplicate.method.Directional import Directional as umidirec
from umiche.deduplicate.method.MarkovClustering import MarkovClustering as umimcl
dedup_res_direc = umidirec () . umi_tools (
connected_components = ccs ,
df_umi_uniq_val_cnt = node_val_sorted ,
graph_adj = graph_adj ,
)
dedup_res_direc_dc = umidirec () . decompose ( dedup_res_direc [ 'clusters' ])
print ( "deduplicated clusters (UMI-tools Directional): \n {} " . format ( dedup_res_direc_dc ))
### @@@ ******MCL******
from umiche.deduplicate.method.MarkovClustering import MarkovClustering as umimcl
mcl = umimcl (
inflat_val = 1.6 ,
exp_val = 2 ,
iter_num = 100 ,
)
df_mcl = mcl . dfclusters (
connected_components = ccs ,
graph_adj = graph_adj ,
)
dedup_res_mcl_dc = mcl . decompose ( list_nd = df_mcl [ 'clusters' ] . values )
print ( "deduplicated clusters (MCL): \n {} " . format ( dedup_res_mcl_dc ))
### @@@ ******MCL mcl_val******
df_mcl_val = mcl . maxval_val (
df_mcl_ccs = df_mcl ,
df_umi_uniq_val_cnt = node_val_sorted ,
thres_fold = 2 ,
)
dedup_res_mcl_val_dc = mcl . decompose ( list_nd = df_mcl_val [ 'clusters' ] . values )
print ( "deduplicated clusters decomposed (mcl_val): \n {} " . format ( dedup_res_mcl_val_dc ))
dedup_res_mcl_val_dc_full = mcl . get_full_subcc ( ccs_dict = dedup_res_mcl_val_dc , mcl_ccs_dict = dedup_res_mcl_dc )
print ( "deduplicated clusters decomposed full list(mcl_val): \n {} " . format ( dedup_res_mcl_val_dc_full ))
### @@@ ******MCL mcl_ed******
int_to_umi_dict = {
'A' : 'ACGT' ,
'B' : 'TCGT' ,
'C' : 'CCGT' ,
'D' : 'ACAT' ,
'E' : 'ACAG' ,
'F' : 'AAAT' ,
}
df_mcl_ed = mcl . maxval_ed (
df_mcl_ccs = df_mcl ,
df_umi_uniq_val_cnt = node_val_sorted ,
thres_fold = 1 ,
int_to_umi_dict = int_to_umi_dict ,
)
dedup_res_mcl_ed_dc = mcl . decompose ( list_nd = df_mcl_ed [ 'clusters' ] . values )
print ( "deduplicated clusters decomposed (mcl_ed): \n {} " . format ( dedup_res_mcl_ed_dc ))
dedup_res_mcl_ed_dc_full = mcl . get_full_subcc ( ccs_dict = dedup_res_mcl_ed_dc , mcl_ccs_dict = dedup_res_mcl_dc )
print ( "deduplicated clusters decomposed full list(mcl_ed): \n {} " . format ( dedup_res_mcl_ed_dc_full ))
draw UMI nodes with deduplicated information
Python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 fig , ax = plt . subplots ( nrows = 2 , ncols = 3 , figsize = ( 14 , 9.5 ))
p = Graph ( graph = graph_adj )
p . color_list = [ 'cornflowerblue' , 'lightcoral' , 'mediumseagreen' ,]
p . draw ( ccs , ax = ax [ 0 , 0 ], title = 'Cluster' )
p . draw ( dedup_res_adj_dc , title = 'Adjacent' , ax = ax [ 0 , 1 ])
p . draw ( dedup_res_direc_dc , title = 'Directional' , ax = ax [ 0 , 2 ])
p . draw ( dedup_res_mcl_dc , title = 'MCL' , ax = ax [ 1 , 0 ])
p . draw ( dedup_res_mcl_val_dc_full , title = 'MCL-val' , ax = ax [ 1 , 1 ])
p . draw ( dedup_res_mcl_ed_dc_full , title = 'MCL-ed' , ax = ax [ 1 , 2 ])
plt . subplots_adjust (
top = 0.92 ,
bottom = 0.04 ,
left = 0.04 ,
right = 0.98 ,
hspace = 0.10 ,
wspace = 0.15 ,
)
plt . show ()
Fig 1. Graph-based identification of true molecules by collapsing UMIs using the cluster, adjacency, directional, mcl, mcl-ed, and mcl-val algorithms.