Skip to content

YAML configuration

In UMIche, all calculation methods rely on a YAML file for parameter initialisation.

It defines 5 sections

  • work_dir - working directory
  • trimmed - trimming FastQ reads and extracting barcodes and UMIs
  • fixed - single-valued simulation parameters
  • varied - varying-valued simulation parameters
  • dedup - UMI deduplication parameters

Monomer UMI pipeline settings

work_dir: /mnt/d/Document/Programming/Python/umiche/umiche/data/simu/mclumi/
#work_dir: D:/Document/Programming/Python/umiche/umiche/data/simu/general/seq_errs/
# work_dir data/simu/tree/trimer/
# work_dir data/simu/monomer/pcr8/
# work_dir data/simu/trimer/pcr8/
# work_dir data/simu/dimer/pcr8/
# work_dir data/simu/dimer/treepcr22_250/
# work_dir data/simu/dimer/pcr8_mono24/

trimmed:
  fastq:
    fpn: None
    trimmed_fpn: None

  umi_1:
    len: 10

  seq:
    len: 100

  read_struct: 'umi_1'


fixed:
  pcr_num: 8
  pcr_err: 0.00001
  seq_err: 0.001
  ampl_rate: 0.85
  seq_dep: 400
  umi_num: 50
  permutation_num: 2
  umi_unit_pattern: 1
  umi_unit_len: 10
  seq_sub_spl_rate: 0.333
  sim_thres: 3


varied:
  pcr_nums: [ # pcr_nums_err_2d_spl0.33
    1, 2, 3,
    4, 5, 6, 7,
    8, 9, 10, 11, 12,
    13, 14, 15, 16,
#    17, 18
#    17, 18, 19, 20,
  ]
  pcr_errs: [
    0.00001,
    0.000025,
    0.00005,
    0.000075,
    0.0001,
    0.00025,
    0.0005,
    0.00075,
    0.001,
    0.0025,
    0.005,
    0.0075,
    0.01,
#    0.025,
    0.05,
#    0.075,
#    0.1,
#    0.2,
#    0.3,
  ]
  seq_errs: [
    0.00001,
    0.000025,
    0.00005,
    0.000075,
    0.0001,
    0.00025,
    0.0005,
    0.00075,
    0.001,
    0.0025,
    0.005,
    0.0075,
    0.01,
    0.025,
    0.05,
    0.075,
    0.1,
#    0.2,
#    0.3,
  ]
  ampl_rates: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
  umi_lens: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
#  umi_lens: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]
#  umi_nums: [50, 250, 450, 650, 850, 1050]
  umi_nums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]
  seq_deps: [100, 200, 500, 600, 800, 1000, 2000, 3000, 5000 ]
#  seq_deps: [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]


dedup:
  dbscan_eps: 1.5 # 1.5
  dbscan_min_spl: 1
  birch_thres: 1.8 # 1.8
  birch_n_clusters: None
  hdbscan_min_spl: 3
  aprop_preference: None
  aprop_random_state: 0

  ed_thres: 1
  mcl_fold_thres: 1.6 # 1.6
  iter_num: 100

#  inflat_val: 2.7 # 1.1 2.7
#  exp_val: 2 # 2 3
#
  inflat_val: [1.1, 2.7, 3.6]
  exp_val: 2

#  exp_val: [2, 3, 4]

  # mcl_ed trace!!!
#  ed_thres: 1
#  mcl_fold_thres: 2 # 1.6
#  inflat_val: 2.7 # 1.1 2.7
#  exp_val: 2 # 2 3
#  iter_num: 100

  # pcr_nums
  # mcl_inflat: 2.3
  # mcl_exp: 2
  # mcl_fold_thres: 1

Homotrimer UMI pipeline settings

#work_dir: d:/Document/Programming/Python/umiche/umiche/data/simu/umiche/trimer/
work_dir: /mnt/d/Document/Programming/Python/umiche/umiche/data/simu/umiche/trimer/

trimmed:
  fastq:
    fpn: None
    trimmed_fpn: None

  umi_1:
    len: 36

  seq:
    len: 100

  read_struct: 'umi_1'


fixed:
  pcr_num: 8
  pcr_err: 0.00001
  seq_err: 0.001
  ampl_rate: 0.85
  seq_dep: 400
  umi_num: 50
  permutation_num: 10
  umi_unit_pattern: 3
  umi_unit_len: 12
  seq_sub_spl_rate: 0.333
  sim_thres: 3


varied:
  pcr_nums: [ # pcr_nums_err_2d_spl0.33
    1, 2, 3,
    4, 5, 6, 7,
    8, 9, 10, 11, 12,
    13, 14, 15, 16,
#    17, 18
#    17, 18, 19, 20,
  ]
  pcr_errs: [
    0.00001,
    0.000025,
    0.00005,
    0.000075,
    0.0001,
    0.00025,
    0.0005,
    0.00075,
    0.001,
    0.0025,
    0.005,
    0.0075,
    0.01,
#    0.025,
    0.05,
#    0.075,
#    0.1,
#    0.2,
#    0.3,
  ]
  seq_errs: [
    0.00001,
    0.000025,
    0.00005,
    0.000075,
    0.0001,
    0.00025,
    0.0005,
    0.00075,
    0.001,
    0.0025,
    0.005,
    0.0075,
    0.01,
    0.025,
    0.05,
    0.075,
    0.1,
    0.2,
#    0.3,
  ]
  ampl_rates: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
  umi_lens: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
#  umi_lens: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]
#  umi_nums: [50, 250, 450, 650, 850, 1050]
  umi_nums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45]
  seq_deps: [100, 200, 500, 600, 800, 1000, 2000, 3000, 5000 ]
#  seq_deps: [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]


dedup:
  dbscan_eps: 1.5 # 1.5
  dbscan_min_spl: 1
  birch_thres: 1.8 # 1.8
  birch_n_clusters: None
  hdbscan_min_spl: 3
  aprop_preference: None
  aprop_random_state: 0

  ed_thres: 1
  mcl_fold_thres: 1.6 # 1.6
  inflat_val: 2.7 # 1.1 2.7
  exp_val: 2 # 2 3
  iter_num: 100

  # mcl_ed trace!!!
#  ed_thres: 1
#  mcl_fold_thres: 2 # 1.6
#  inflat_val: 2.7 # 1.1 2.7
#  exp_val: 2 # 2 3
#  iter_num: 100

  # pcr_nums
  # mcl_inflat: 2.3
  # mcl_exp: 2
  # mcl_fold_thres: 1