From Genbank-ID to Phylogenetic Tree Visualization using roary, raxml and ete3

gene_x 0 like s 16 view s

Tags: processing

http://xgenes.com/article/article-content/371/identify-all-occurrences-of-phage-hh1-mt880870-in-s-epidermidis-st2-genomes-from-public-and-clinical-isolates/

  1. Downloads gff

    #for id in CP009257.1 CP000521.1 CP059040.1 CU459141.1 CU468230.2 CP141284.1 CP131470.1 CP002080.1 CP002177.1 CP041365.1; do
    #  efetch -db nuccore -id ${id} -format gff3 > ${id}.gff;
    #  efetch -db nuccore -id ${id} -format fasta > ${id}.fasta;
    #done
    
    echo -e "CP009257\nCP000521\nCP059040\nCU459141\nCU468230\nCP141284\nCP131470\nCP002080\nCP002177\nCP041365\nCP191205" > ids.txt
    
    cat ids.txt | while read id; do
        efetch -db nuccore -id $id -format gff3 > "${id}.gff"
        efetch -db nuccore -id $id -format fasta > "${id}.fasta"
        # Append the FASTA sequence to the GFF3 file with ##FASTA header
        echo "##FASTA" >> "${id}.gff"
        cat "${id}.fasta" >> "${id}.gff"
        rm "${id}.fasta"  # Optionally remove the separate FASTA file
    
  2. Roary

    mv *.gff roary_input
    (bacto) roary -f roary_out -e --mafft -p 100 roary_input/*.gff
    
  3. Generate phylogenetic tree using FastTree or raxml

    #iqtree -s core_gene_alignment.aln -m GTR+G -bb 1000 -nt AUTO
    
    (bacto) FastTree -nt roary_out/core_gene_alignment.aln > roary_out/core_gene_tree.nwk
    
    (bacto) raxml-ng --all \
      --msa roary_out/core_gene_alignment.aln \
      --model GTR+G \
      --bs-trees 1000 \
      --threads 40 \
      --prefix core_gene_tree_1000
    
      运行完后,你会得到:
    
      core_gene_tree_1000.raxml.bestTree : 最佳最大似然树。
      core_gene_tree_1000.raxml.bootstraps : 100 个 bootstrap 树。
      core_gene_tree_1000.raxml.support : 带有 bootstrap 支持值的树(就是你要的)。
    
  4. Install ete3_env

    mamba create -n ete3_env python=3.10 ete3 -c etetoolkit -y
    mamba activate ete3_env
    mamba install -c etetoolkit ete3 pyqt
    
  5. Phylogenetic Tree Visualization using ete3

    #(ete3_env) python3 ~/Scripts/label_tree.py roary_out/core_gene_tree.nwk
    (ete3_env) python3 ~/Scripts/label_tree.py core_gene_tree_1000.raxml.support
    
    #!/usr/bin/env python3
    
    from ete3 import Tree, TreeStyle, TextFace, NodeStyle, faces
    import sys
    
    #(ete3_env) jhuang@WS-2290C:~/DATA/Data_Tam_DNAseq_2025_AYE/REVIEW$ python3 ~/Scripts/label_tree.py core_gene_tree_1000.raxml.support
    
    # -------------------------
    group_colors = {
        "A. baumannii": "#1f77b4",
        "A. junii": "#ff7f0e",
        "A. pittii": "#2ca02c",
        "A. oleivorans": "#d62728",
        "A. tandoii": "#9467bd",
    }
    
    leaf_name_map = {
        "CP191205": "A. baumannii HKAB-1",
        "CP009257": "A. baumannii AB030",
        "CP000521": "A. baumannii ATCC 17978",
        "CP059040": "A. baumannii ATCC 19606",
        "CU459141": "A. baumannii AYE",
        "CU468230": "A. baumannii SDF",
        "CP141284": "A. junii H1",
        "CP131470": "A. junii SC22",
        "CP002080": "A. oleivorans DR1",
        "CP002177": "A. pittii PHEA-2",
        "CP041365": "A. tandoii SE63"
    }
    
    def get_group(label):
        for group in group_colors:
            if label.startswith(group):
                return group
        return "Other"
    
    def colorize_node(node):
        if node.is_leaf():
            label = leaf_name_map.get(node.name, node.name)
            node.name = label
            group = get_group(label)
            color = group_colors.get(group, "black")
            face = TextFace(label, fsize=8, fgcolor=color)
            face.margin_left = 5  # <-- add margin to move label right from the point
            faces.add_face_to_node(face, node, column=0)
    
            nstyle = NodeStyle()
            nstyle["fgcolor"] = color
            nstyle["shape"] = "circle"
            node.set_style(nstyle)
    
    def render_tree(tree, filename, title="", circular=False):
        ts = TreeStyle()
        ts.mode = "c" if circular else "r"
        ts.show_leaf_name = False
        ts.title.add_face(TextFace(title, fsize=12, bold=True), column=0)
        ts.layout_fn = colorize_node
        ts.show_scale = True
        ts.branch_vertical_margin = 0
    
        # Customize scale bar:
        #ts.scale_len = 0.05  # Scale bar length in tree units
        #ts.scale_format = "%.2f"  # Format scale label to 2 decimals
    
        # Add a TextFace to show the scale you want
        #scale_face = TextFace("Scale: 0.05 substitutions/site", fsize=8)
        #ts.title.add_face(scale_face, column=1)
    
        tree.render(filename, w=1200, h=1200 if circular else 800, tree_style=ts)
    
    def main():
        if len(sys.argv) != 2:
            print("Usage: python3 label_tree.py <newick_file>")
            sys.exit(1)
    
        newick_file = sys.argv[1]
    
        try:
            with open(newick_file, "r") as f:
                nwk_str = f.read().strip()
                if not nwk_str.endswith(";"):
                    nwk_str += ";"
                t = Tree(nwk_str, format=1)
        except Exception as e:
            print(f"[ERROR] Failed to load tree: {e}")
            sys.exit(1)
    
        # Attempt to reroot at outgroup
        try:
            outgroup = t&"CP041365"
            if outgroup != t:
                t.set_outgroup(outgroup)
            else:
                print("[!] Outgroup is root already; skipping reroot.")
        except Exception as e:
            print(f"[!] Warning: Could not reroot tree at baumannii clade: {e}")
    
        #render_tree(t, "tree_colored_bootstrap.png", title="", circular=False)
        render_tree(t, "tree_colored_bootstrap.svg", title="", circular=False)
    
        print("✅ Saved: tree_colored_bootstrap.svg")
    
    if __name__ == "__main__":
        main()
    

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum