[{"data":1,"prerenderedAt":6965},["ShallowReactive",2],{"document-content":3},{"en":4,"cn":3579},{"id":5,"title":6,"body":7,"description":489,"extension":3572,"meta":3573,"navigation":3574,"path":3575,"seo":3576,"stem":3577,"__hash__":3578},"content\u002Fdocument\u002Fusermanual_en.md","OrthoVenn Plus User Manual",{"type":8,"value":9,"toc":3524},"minimark",[10,14,44,47,52,175,177,181,184,189,206,211,216,347,359,365,367,371,376,441,461,465,472,479,490,505,509,515,518,559,563,578,582,625,629,657,659,663,666,672,675,689,701,705,712,730,823,828,854,863,865,869,873,921,928,952,956,977,979,983,990,996,1002,1008,1013,1015,1019,1024,1027,1047,1053,1058,1067,1119,1134,1142,1160,1167,1175,1185,1193,1202,1206,1267,1271,1277,1315,1320,1461,1463,1467,1472,1482,1488,1491,1499,1561,1577,1585,1622,1636,1639,1694,1728,1731,1737,1752,1757,1809,1811,1815,1820,1828,1834,1837,1844,1895,1905,1914,1963,1968,2024,2029,2062,2065,2109,2112,2118,2126,2130,2192,2194,2198,2207,2219,2225,2228,2242,2312,2355,2363,2366,2410,2426,2429,2435,2472,2476,2538,2540,2544,2549,2559,2582,2589,2595,2598,2606,2614,2617,2671,2682,2685,2691,2705,2709,2781,2783,2787,2816,2824,2826,2830,2842,2844,2848,2852,2875,2879,2919,2923,2926,2928,2932,2938,2954,2958,2964,2968,2975,2979,2985,2992,3077,3089,3093,3130,3134,3140,3198,3210,3214,3286,3291,3293,3297,3302,3306,3309,3322,3328,3332,3335,3339,3342,3344,3348,3351,3383,3393,3395,3399,3504,3506],[11,12,6],"h1",{"id":13},"orthovenn-plus-user-manual",[15,16,17],"blockquote",{},[18,19,20,24,25,28,31,32,38,40,43],"p",{},[21,22,23],"strong",{},"Platform:"," OrthoVenn Plus · Online platform for multi-species comparative genomics",[26,27],"br",{},[21,29,30],{},"URL:"," ",[33,34,35],"a",{"href":35,"rel":36},"https:\u002F\u002Forthovenn.com",[37],"nofollow",[26,39],{},[21,41,42],{},"Intended for:"," Researchers who need to perform multi-species comparative genomics, with no programming or command-line experience required.",[45,46],"hr",{},[48,49,51],"h2",{"id":50},"table-of-contents","Table of Contents",[53,54,55,62,68,84,90,133,139,145,151,157,163,169],"ol",{},[56,57,58],"li",{},[33,59,61],{"href":60},"#1-platform-overview","Platform Overview",[56,63,64],{},[33,65,67],{"href":66},"#2-data-preparation--format-requirements","Data Preparation & Format Requirements",[56,69,70,74],{},[33,71,73],{"href":72},"#3-analysis-workflow-at-a-glance","Analysis Workflow at a Glance",[75,76,77],"ul",{},[56,78,79,80],{},"3.1 ",[33,81,83],{"href":82},"#31-parameter-changes--versioned-reruns-old-results-are-never-overwritten-orthovenn-plus-signature-feature","Parameter Changes & Versioned Reruns: Old Results Are Never Overwritten (signature feature)",[56,85,86],{},[33,87,89],{"href":88},"#4-step-1-select-species--upload-data","Step 1: Select Species & Upload Data",[56,91,92,96],{},[33,93,95],{"href":94},"#5-step-2-configure-analysis-modules","Step 2: Configure Analysis Modules",[75,97,98,105,112,119,126],{},[56,99,100,101],{},"5.1 ",[33,102,104],{"href":103},"#51-module-1--orthologous-cluster-analysis-required","Module 1 · Orthologous Cluster Analysis (required)",[56,106,107,108],{},"5.2 ",[33,109,111],{"href":110},"#52-module-2--species-tree-analysis","Module 2 · Species Tree Analysis",[56,113,114,115],{},"5.3 ",[33,116,118],{"href":117},"#53-module-3--divergence-time-estimation","Module 3 · Divergence Time Estimation",[56,120,121,122],{},"5.4 ",[33,123,125],{"href":124},"#54-module-4--gene-family-expansion--contraction-analysis","Module 4 · Gene Family Expansion & Contraction Analysis",[56,127,128,129],{},"5.5 ",[33,130,132],{"href":131},"#55-module-5--chromosomal-collinearity-analysis","Module 5 · Chromosomal Collinearity Analysis",[56,134,135],{},[33,136,138],{"href":137},"#6-step-3-preview--submit","Step 3: Preview & Submit",[56,140,141],{},[33,142,144],{"href":143},"#7-tracking-progress--task-history","Tracking Progress & Task History",[56,146,147],{},[33,148,150],{"href":149},"#8-interpreting--exporting-results","Interpreting & Exporting Results",[56,152,153],{},[33,154,156],{"href":155},"#9-positive-selection-analysis-on-gene-clusters-run-on-demand-after-results-are-generated","Positive Selection Analysis on Gene Clusters (run on demand after results are generated)",[56,158,159],{},[33,160,162],{"href":161},"#10-online-helper-tools-web-tools","Online Helper Tools (Web Tools)",[56,164,165],{},[33,166,168],{"href":167},"#11-local-deployment-docker","Local Deployment (Docker)",[56,170,171],{},[33,172,174],{"href":173},"#12-faq--troubleshooting","FAQ & Troubleshooting",[45,176],{},[48,178,180],{"id":179},"_1-platform-overview","1. Platform Overview",[18,182,183],{},"OrthoVenn Plus integrates the many algorithms used in comparative genomics — which would otherwise have to be installed separately and chained together by hand — into a single, complete online workflow. You simply pick your species on the web page, set a few key parameters, and click submit to run the entire chain, from orthologous gene identification all the way to positive selection detection.",[18,185,186],{},[21,187,188],{},"Scientific questions OrthoVenn Plus helps you answer:",[75,190,191,194,197,200,203],{},[56,192,193],{},"Which genes does my species share with its relatives, and which are unique to it?",[56,195,196],{},"What is the evolutionary relationship among these species, and roughly when did they diverge?",[56,198,199],{},"Which gene families underwent significant expansion or contraction during evolution?",[56,201,202],{},"Did any genes experience positive selection (adaptive evolution) along particular lineages?",[56,204,205],{},"Do the genomes of different species remain collinear at the chromosomal level?",[18,207,208],{},[21,209,210],{},"The six analyses and how they relate to one another:",[15,212,213],{},[18,214,215],{},"【Insert figure here: six-module workflow \u002F technical roadmap diagram】",[217,218,219,241],"table",{},[220,221,222],"thead",{},[223,224,225,229,232,235,238],"tr",{},[226,227,228],"th",{},"Analysis",[226,230,231],{},"Purpose",[226,233,234],{},"Input",[226,236,237],{},"Main Output",[226,239,240],{},"How it runs",[242,243,244,262,279,295,311,327],"tbody",{},[223,245,246,250,253,256,259],{},[247,248,249],"td",{},"① Orthologous cluster analysis",[247,251,252],{},"Group genes across species, extract single-copy orthologs, functional annotation",[247,254,255],{},"Protein sequences",[247,257,258],{},"Gene clusters, pan-genome structure, single-copy gene set, GO annotation",[247,260,261],{},"Required, runs with the task",[223,263,264,267,270,273,276],{},[247,265,266],{},"② Species tree analysis",[247,268,269],{},"Build the species phylogeny",[247,271,272],{},"Single-copy genes from ①",[247,274,275],{},"Species tree with support values",[247,277,278],{},"Runs with the task",[223,280,281,284,287,290,293],{},[247,282,283],{},"③ Divergence time estimation",[247,285,286],{},"Estimate absolute divergence ages",[247,288,289],{},"Species tree from ② + fossil calibration points",[247,291,292],{},"Time-calibrated tree",[247,294,278],{},[223,296,297,300,303,306,309],{},[247,298,299],{},"④ Gene family expansion & contraction",[247,301,302],{},"Detect significantly expanded\u002Fcontracted gene families",[247,304,305],{},"Time tree from ③ + gene counts from ①",[247,307,308],{},"Expansion\u002Fcontraction events on each branch",[247,310,278],{},[223,312,313,316,319,322,325],{},[247,314,315],{},"⑤ Chromosomal collinearity analysis",[247,317,318],{},"Detect conservation of chromosomal structure",[247,320,321],{},"GFF annotation",[247,323,324],{},"Collinear blocks, Sankey diagram",[247,326,278],{},[223,328,329,332,335,338,341],{},[247,330,331],{},"⑥ Positive selection analysis",[247,333,334],{},"Detect genes that underwent adaptive evolution",[247,336,337],{},"Gene cluster + CDS sequences",[247,339,340],{},"Positively selected branches\u002Fsites",[247,342,343,346],{},[21,344,345],{},"Run on demand on the clusters of interest after results are generated"," (see Chapter 9)",[15,348,349],{},[18,350,351,354,355,358],{},[21,352,353],{},"Why is positive selection \"run on demand\"?"," The first five analyses run automatically over the whole dataset, whereas positive selection targets ",[21,356,357],{},"one cluster of interest at a time"," (e.g. a family that expanded significantly). It requires you to first see the results and then choose which cluster to analyze, so it is not part of the submission wizard — instead it is triggered per cluster on the results page.",[18,360,361,364],{},[21,362,363],{},"Built-in species database:"," The platform ships with a built-in database covering six major groups — vertebrates, invertebrates (metazoa), protists, fungi, plants and bacteria — comprising 1,566 species and roughly 19.7 million protein sequences (data source: Ensembl, 2025). All sequences have been format-unified, de-duplicated and ID-standardized. You can start an analysis simply by selecting species in the interface, with no need to download or prepare data yourself.",[45,366],{},[48,368,370],{"id":369},"_2-data-preparation-format-requirements","2. Data Preparation & Format Requirements",[372,373,375],"h3",{"id":374},"_21-the-three-input-file-types","2.1 The Three Input File Types",[217,377,378,393],{},[220,379,380],{},[223,381,382,385,388,390],{},[226,383,384],{},"File type",[226,386,387],{},"Format",[226,389,231],{},[226,391,392],{},"When needed",[242,394,395,413,427],{},[223,396,397,401,404,407],{},[247,398,399],{},[21,400,255],{},[247,402,403],{},"FASTA (.fa \u002F .fasta)",[247,405,406],{},"Core input for orthologous cluster analysis",[247,408,409,412],{},[21,410,411],{},"Required"," (basis for all analyses)",[223,414,415,418,421,424],{},[247,416,417],{},"Gene annotation",[247,419,420],{},"GFF3 (.gff \u002F .gff3) or BED (.bed)",[247,422,423],{},"Provides gene positions on chromosomes",[247,425,426],{},"Chromosomal collinearity analysis",[223,428,429,432,435,438],{},[247,430,431],{},"CDS nucleotide sequences",[247,433,434],{},"FASTA (.cds.fa)",[247,436,437],{},"Provides codon-level information (dN\u002FdS)",[247,439,440],{},"Positive selection analysis on gene clusters",[15,442,443,454],{},[18,444,445,446,449,450,453],{},"Protein sequences are the ",[21,447,448],{},"only required"," file. GFF and CDS are both optional — they are used only if you need collinearity or positive selection. You can either provide them together when you upload a species, or ",[21,451,452],{},"add them later"," (adding them later unlocks only the corresponding analysis and does not affect results already completed).",[18,455,456,457,460],{},"For species chosen from the built-in database, all three file types are already prepared — ",[21,458,459],{},"no upload is needed at all",".",[372,462,464],{"id":463},"_22-the-single-most-important-rule-gene-ids-must-match-across-the-three-files","2.2 The Single Most Important Rule: Gene IDs Must Match Across the Three Files",[18,466,467,468,471],{},"This is the ",[21,469,470],{},"number-one cause"," of upload\u002Fanalysis failure, so please read it first.",[18,473,474,475,478],{},"Within one species, the protein FASTA, CDS and GFF files must use the ",[21,476,477],{},"exact same ID"," to refer to the same gene:",[480,481,486],"pre",{"className":482,"code":484,"language":485},[483],"language-text","Protein FASTA  >GeneA001   MSTDVPAK...\nCDS  FASTA     >GeneA001   ATGTCTACT...\nGFF  annotation  ... gene_id \"GeneA001\" ...\n","text",[487,488,484],"code",{"__ignoreMap":489},"",[75,491,492,495,498],{},[56,493,494],{},"If the CDS ID does not match the protein → positive selection cannot map the protein to its codons, and that cluster is flagged as not analyzable.",[56,496,497],{},"If the GFF ID does not match the protein → the collinearity plot is empty or fails to render.",[56,499,500,501,504],{},"The platform ",[21,502,503],{},"does not guess"," this correspondence from string similarity, so please confirm ID consistency before uploading (you can validate it in one click with the preprocessing tool in Chapter 10).",[372,506,508],{"id":507},"_23-protein-fasta-format","2.3 Protein FASTA Format",[480,510,513],{"className":511,"code":512,"language":485},[483],">GeneA001\nMSTDVPAKTSVILGQITTADTCLDPAGRKVIYLSE...\n>GeneA002\nMKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFK...\n",[487,514,512],{"__ignoreMap":489},[18,516,517],{},"Notes:",[75,519,520,530,544],{},[56,521,522,523,526,527,460],{},"Each sequence ID (the name after ",[487,524,525],{},">",") must be unique ",[21,528,529],{},"within each species file",[56,531,532,533,460],{},"Keep IDs concise and ",[21,534,535,536,539,540,543],{},"avoid spaces, slashes ",[487,537,538],{},"\u002F",", and quotes ",[487,541,542],{},"' \""," and other special characters",[56,545,546,547,550,551,554,555,558],{},"Use a different file name for each species; we recommend naming files after the full Latin species name (e.g. ",[487,548,549],{},"Arabidopsis_thaliana.fa",", ",[487,552,553],{},"Oryza_sativa.fa","). ",[21,556,557],{},"The file name is used as the species name shown in the results",", so please use meaningful names.",[372,560,562],{"id":561},"_24-cds-nucleotide-sequence-format","2.4 CDS Nucleotide Sequence Format",[75,564,565,572],{},[56,566,567,568,571],{},"The gene ID must be ",[21,569,570],{},"exactly identical"," to the ID in the corresponding protein FASTA (see 2.2).",[56,573,574,575,460],{},"Sequences should ideally be complete codons (length a multiple of 3); if not, the platform handles it automatically — ",[21,576,577],{},"no manual padding required",[372,579,581],{"id":580},"_25-gff-bed-annotation-format","2.5 GFF \u002F BED Annotation Format",[75,583,584,587,590,607],{},[56,585,586],{},"Must contain gene position information: chromosome, gene ID, start, end, strand.",[56,588,589],{},"The gene ID must match the ID in the protein FASTA (see 2.2).",[56,591,592,593,550,596,550,599,602,603,606],{},"Supports ",[487,594,595],{},".gff",[487,597,598],{},".gff3",[487,600,601],{},".bed",". Standard GFF3 downloaded from Ensembl \u002F NCBI \u002F Phytozome can be converted in one click with the ",[21,604,605],{},"GFF to BED"," tool in Chapter 10.",[56,608,609,612,613,616,617,620,621,624],{},[21,610,611],{},"Requirement specific to collinearity analysis:"," be sure to use ",[21,614,615],{},"chromosome-level"," annotation, and keep the ",[21,618,619],{},"number of distinct chromosomes below 50",". If your annotation is at the scaffold\u002Fcontig level (many fragments), please ",[21,622,623],{},"keep only the 50 longest fragments"," before uploading, otherwise the collinearity plot becomes hard to read (see 5.5).",[372,626,628],{"id":627},"_26-data-preprocessing-tools-strongly-recommended","2.6 Data Preprocessing Tools (strongly recommended)",[15,630,631,638,654],{},[18,632,633,634,637],{},"Before uploading, we recommend checking your input files with the online helper tools under the ",[21,635,636],{},"TOOLS"," menu. They can automatically:",[75,639,640,643,646,649],{},[56,641,642],{},"detect and report duplicate sequence IDs;",[56,644,645],{},"remove illegal characters and unify line breaks;",[56,647,648],{},"convert GFF3 into the BED format the platform requires;",[56,650,651,460],{},[21,652,653],{},"validate ID consistency across the protein \u002F CDS \u002F GFF files",[18,655,656],{},"A downloadable local version (Windows \u002F macOS \u002F Linux) is also available on the Resources page of the homepage.",[45,658],{},[48,660,662],{"id":661},"_3-analysis-workflow-at-a-glance","3. Analysis Workflow at a Glance",[18,664,665],{},"The whole analysis runs in three steps, plus one optional on-demand analysis:",[480,667,670],{"className":668,"code":669,"language":485},[483],"Step 1 Select species \u002F upload data    Step 2 Configure modules        Step 3 Preview & submit\n   ├ Built-in or upload proteins  ──▶   ├ Module 1 Orthology (required) ──▶  Runs in the background\n   └ Optional: GFF \u002F CDS               ├ Modules 2–5 toggle as needed       Email notification on completion\n                                       └ Just set the Basic parameters\n\n                          ────────  After results are generated  ────────▶\n                          Chapter 9: run positive selection on demand for the clusters of interest\n",[487,671,669],{"__ignoreMap":489},[18,673,674],{},"Parameters for each module are shown in two layers:",[75,676,677,683],{},[56,678,679,682],{},[21,680,681],{},"Basic (expanded by default):"," the 2–4 key parameters that genuinely require a decision from you.",[56,684,685,688],{},[21,686,687],{},"Advanced (collapsed by default):"," tuning parameters that affect accuracy or runtime; the defaults are fine in the vast majority of cases.",[15,690,691],{},[18,692,693,696,697,700],{},[21,694,695],{},"About threads:"," the Advanced section of each module has a thread-count parameter, which is ",[21,698,699],{},"fixed at 48 and not editable in the online version"," (the platform schedules resources centrally). To customize threads and accelerate large-scale analyses, download the local deployment (see Chapter 11). Hovering over the parameter shows a download hint.",[372,702,704],{"id":703},"_31-parameter-changes-versioned-reruns-old-results-are-never-overwritten-orthovenn-plus-signature-feature","3.1 Parameter Changes & Versioned Reruns: Old Results Are Never Overwritten (OrthoVenn Plus signature feature)",[18,706,707,708,711],{},"The OrthoVenn Plus backend has been redesigned so that ",[21,709,710],{},"rerunning an analysis never overwrites the old results"," — a hallmark of this version that lets you explore the effect of different parameters quickly and efficiently.",[18,713,714,715,718,719,722,723,726,727,460],{},"Whenever you change a parameter that ",[21,716,717],{},"affects the computed result",", the platform generates a ",[21,720,721],{},"new version"," of the affected module while ",[21,724,725],{},"keeping the old version for comparison","; changes that only affect display (thresholds, colors, sorting, window size, etc.) merely refresh the view instantly and ",[21,728,729],{},"trigger no recomputation",[217,731,732,745],{},[220,733,734],{},[223,735,736,739,742],{},[226,737,738],{},"Your change",[226,740,741],{},"Typical effect",[226,743,744],{},"Rerun needed?",[242,746,747,758,769,780,790,801,812],{},[223,748,749,752,755],{},[247,750,751],{},"GO p-value filter, table sorting, plot colors",[247,753,754],{},"Display only",[247,756,757],{},"No (instant view refresh)",[223,759,760,763,766],{},[247,761,762],{},"Orthologous cluster algorithm or threshold",[247,764,765],{},"Affects orthogroups and almost all downstream",[247,767,768],{},"Rerun this module + all downstream",[223,770,771,774,777],{},[247,772,773],{},"Species tree method or rooting",[247,775,776],{},"Affects time tree and CAFE5",[247,778,779],{},"Rerun species tree and its downstream",[223,781,782,785,787],{},[247,783,784],{},"Divergence time calibration points \u002F Root Age",[247,786,776],{},[247,788,789],{},"Rerun time tree and CAFE5",[223,791,792,795,798],{},[247,793,794],{},"CAFE5 k value",[247,796,797],{},"Affects expansion\u002Fcontraction only",[247,799,800],{},"Rerun CAFE5 only",[223,802,803,806,809],{},[247,804,805],{},"Positive selection method or foreground branches",[247,807,808],{},"Affects only the positive selection result of the chosen cluster",[247,810,811],{},"Rerun only the corresponding positive selection task",[223,813,814,817,820],{},[247,815,816],{},"Collinearity display window size",[247,818,819],{},"View only",[247,821,822],{},"Usually no rerun",[18,824,825],{},[21,826,827],{},"Benefits of this design:",[75,829,830,836,842,848],{},[56,831,832,835],{},[21,833,834],{},"Nothing is lost, everything is comparable:"," old results are always kept, so you can compare results from different parameters side by side (e.g. Inflation 1.5 vs 1.2, CAFE5 k=2 vs Base, different tree-building methods) and pick the most reasonable one.",[56,837,838,841],{},[21,839,840],{},"Compute only what's needed, save time and resources:"," the platform reruns only the affected module and its downstream; unaffected upstream results are reused automatically, with no need to start over.",[56,843,844,847],{},[21,845,846],{},"Fully reproducible:"," every result version is bound to three things — input data + normalized parameters + tool versions — so anyone can reproduce it exactly.",[56,849,850,853],{},[21,851,852],{},"Zero wait for display tweaks:"," changes that only affect the view (thresholds, colors, sorting, windows) take effect instantly and consume no compute.",[15,855,856],{},[18,857,858,859,862],{},"Tip: on the results page you can see a ",[21,860,861],{},"version list"," for each module; switch versions to compare results under different parameters.",[45,864],{},[48,866,868],{"id":867},"_4-step-1-select-species-upload-data","4. Step 1: Select Species & Upload Data",[372,870,872],{"id":871},"_41-steps","4.1 Steps",[53,874,875,885],{},[56,876,877,878,881,882,460],{},"Open OrthoVenn Plus, click ",[21,879,880],{},"Create"," to start a new project, and enter ",[21,883,884],{},"Step 1 · Species",[56,886,887,888,890,893,901,903,906],{},"Choose your data source:",[26,889],{},[21,891,892],{},"Option A · Select from the built-in database (Cloud Species)",[75,894,895,898],{},[56,896,897],{},"Type the Latin species name in the search box (fuzzy search supported) and click to add it to the analysis list.",[56,899,900],{},"No files to upload; the sequences are already standardized.",[26,902],{},[21,904,905],{},"Option B · Upload custom data (Custom Upload)",[75,907,908,915],{},[56,909,910,911,914],{},"Drag your protein FASTA into the upload area, or click to choose files; multiple species can be uploaded at once (",[21,912,913],{},"up to 12 species in the online version",").",[56,916,917,920],{},[21,918,919],{},"Optional:"," if you plan to do collinearity or positive selection later, you can upload the corresponding GFF \u002F CDS here as well; you can also add them later.",[18,922,923],{},[924,925],"img",{"alt":926,"src":927},"Cloud_Species_Custom_Upload","\u002Fimages\u002Fdocument\u002F1-Cloud_Species_Custom_Upload.webp",[53,929,931,938,945],{"start":930},3,[56,932,933,934,937],{},"The ",[21,935,936],{},"Added Species"," panel on the right lists the species you have added and the status of their files (protein \u002F GFF \u002F CDS). Please confirm the count and file types are correct.",[56,939,940,941,944],{},"To practice first, click ",[21,942,943],{},"Load Example"," to load sample data.",[56,946,947,948,951],{},"When everything looks right, click ",[21,949,950],{},"Next"," to go to Step 2.",[372,953,955],{"id":954},"_42-notes","4.2 Notes",[75,957,958,961,964,967],{},[56,959,960],{},"The two options can be combined: you can both select built-in species and upload custom species.",[56,962,963],{},"GFF \u002F CDS are optional; leaving them out does not affect core analyses such as orthology and the species tree.",[56,965,966],{},"If a file has a format error, the platform flags it during upload.",[56,968,969,972,973,976],{},[21,970,971],{},"Compare within the same major group."," Built-in species are intended to be compared ",[21,974,975],{},"within the same major group"," (e.g. plants with plants, fungi with fungi). Cross-kingdom comparisons (e.g. plants vs. bacteria) rarely yield biologically meaningful results, so the interface restricts built-in selection to a single group by default. If you genuinely need a cross-group comparison, prepare the species via custom upload.",[45,978],{},[48,980,982],{"id":981},"_5-step-2-configure-analysis-modules","5. Step 2: Configure Analysis Modules",[18,984,985,986,989],{},"The list of analysis modules is on the left. ",[21,987,988],{},"Module 1 (Orthologous cluster analysis) is required","; Modules 2–5 can be toggled as needed. Click a module name to switch the parameter panel on the right.",[18,991,992],{},[924,993],{"alt":994,"src":995},"Overview_and_Detailed_Parameters","\u002Fimages\u002Fdocument\u002F2-Overview_and_Detailed_Parameters.webp",[18,997,998,1001],{},[21,999,1000],{},"Module dependencies"," (enabling a downstream module automatically includes its dependencies):",[480,1003,1006],{"className":1004,"code":1005,"language":485},[483],"Module 1 (Orthology) ──▶ Module 2 (Species tree) ──▶ Module 3 (Divergence time) ──▶ Module 4 (Gene families)\nModule 5 (Collinearity) is independent and only needs GFF\n",[487,1007,1005],{"__ignoreMap":489},[15,1009,1010],{},[18,1011,1012],{},"Tip: positive selection is not configured here. It is run on demand for individual clusters on the results page after results are generated (see Chapter 9).",[45,1014],{},[372,1016,1018],{"id":1017},"_51-module-1-orthologous-cluster-analysis-required","5.1 Module 1 · Orthologous Cluster Analysis (required)",[18,1020,1021],{},[21,1022,1023],{},"What does this module do for you?",[18,1025,1026],{},"It is the starting point of the whole workflow: it compares the proteins of all species pairwise and groups them into \"clusters\" (orthogroups) by similarity. Genes within the same cluster are considered to derive from a common ancestor. Outputs:",[75,1028,1029,1035,1041],{},[56,1030,1031,1034],{},[21,1032,1033],{},"Pan-genome structure:"," which clusters are shared by all species (the core genome) and which are unique to certain species.",[56,1036,1037,1040],{},[21,1038,1039],{},"Single-copy orthologs:"," the set of genes present in exactly one copy in every species — the ideal data for building a species tree.",[56,1042,1043,1046],{},[21,1044,1045],{},"GO functional annotation:"," the functional classification of each cluster, to help you understand its biological meaning.",[18,1048,1049],{},[924,1050],{"alt":1051,"src":1052},"Orthologous_Analysis","\u002Fimages\u002Fdocument\u002F3-Orthologous_Analysis.webp",[1054,1055,1057],"h4",{"id":1056},"basic-parameters","Basic Parameters",[18,1059,1060,1063,1064],{},[21,1061,1062],{},"Orthology algorithm (Algorithm)"," — default ",[21,1065,1066],{},"OrthoFinder",[217,1068,1069,1082],{},[220,1070,1071],{},[223,1072,1073,1076,1079],{},[226,1074,1075],{},"Algorithm",[226,1077,1078],{},"Characteristics",[226,1080,1081],{},"Use case",[242,1083,1084,1097,1108],{},[223,1085,1086,1091,1094],{},[247,1087,1088,1090],{},[21,1089,1066],{}," (recommended, default)",[247,1092,1093],{},"Based on gene-tree\u002Fspecies-tree reconciliation; distinguishes orthologs (from speciation) from paralogs (from gene duplication); high accuracy",[247,1095,1096],{},"Most comparative genomics analyses",[223,1098,1099,1102,1105],{},[247,1100,1101],{},"OrthoMCL (classic)",[247,1103,1104],{},"The classic method based on global sequence-similarity + Markov clustering (MCL), with cluster granularity controlled by the Inflation value",[247,1106,1107],{},"General use, moderate evolutionary distance; when you need to compare against classic OrthoMCL-based literature",[223,1109,1110,1113,1116],{},[247,1111,1112],{},"SonicParanoid2 (advanced)",[247,1114,1115],{},"An ultra-fast algorithm optimized for large datasets",[247,1117,1118],{},"Many species (>30) or rapid exploration",[15,1120,1121],{},[18,1122,1123,1126,1127,1129,1130,1133],{},[21,1124,1125],{},"How to choose?"," If unsure, use ",[21,1128,1066],{}," — currently the most accurate and general-purpose method, and the only one that explicitly distinguishes orthologs from paralogs. If you want the classic MCL clustering approach, or need to compare against existing OrthoMCL results, choose ",[21,1131,1132],{},"OrthoMCL"," (note it is more sensitive to the Inflation value, see below). Only consider SonicParanoid2 when you have many species and want a quick first pass.",[18,1135,1136,1063,1139],{},[21,1137,1138],{},"Search Sensitivity",[21,1140,1141],{},"Standard (diamond)",[75,1143,1144,1150],{},[56,1145,1146,1149],{},[21,1147,1148],{},"Standard (fast):"," suitable for most cases and quick.",[56,1151,1152,1155,1156,1159],{},[21,1153,1154],{},"Ultra-sensitive (diamond_ultra_sens, slow):"," finds more distant homology, suitable for ",[21,1157,1158],{},"evolutionarily distant"," species (e.g. cross-phylum comparisons), but slower.",[15,1161,1162],{},[18,1163,1164,1166],{},[21,1165,1125],{}," Standard is fine for closely related species; choose ultra-sensitive when the species are distant and you are worried about missing distant homologs.",[18,1168,1169,1063,1172],{},[21,1170,1171],{},"Inflation value (MCL clustering tightness)",[21,1173,1174],{},"1.5",[75,1176,1177,1180],{},[56,1178,1179],{},"Used for MCL-based clustering (OrthoMCL, and the MCL step inside OrthoFinder). It controls cluster \"tightness\": higher values give smaller, tighter clusters; lower values give larger, looser ones.",[56,1181,1182,1184],{},[21,1183,1125],{}," Keep 1.5 in most cases. If clearly related genes are being split into different clusters, lower it to 1.2; if a single cluster mixes functionally divergent genes, raise it to 2.0. The effect is more pronounced when OrthoMCL is selected.",[18,1186,1187,1063,1190],{},[21,1188,1189],{},"Functional annotation (Run Annotation)",[21,1191,1192],{},"on",[75,1194,1195],{},[56,1196,1197,1198,1201],{},"When on, it produces GO functional annotation and enrichment analysis. ",[21,1199,1200],{},"We strongly recommend keeping it on"," as the basis for downstream functional interpretation; turn it off only to save time.",[1054,1203,1205],{"id":1204},"advanced-parameters","Advanced Parameters",[217,1207,1208,1221],{},[220,1209,1210],{},[223,1211,1212,1215,1218],{},[226,1213,1214],{},"Parameter",[226,1216,1217],{},"Default",[226,1219,1220],{},"Description",[242,1222,1223,1234,1245,1256],{},[223,1224,1225,1228,1231],{},[247,1226,1227],{},"Alignment E-value",[247,1229,1230],{},"1e-5",[247,1232,1233],{},"Statistical significance threshold for homology calls. Looser (1e-2) suits distant species; stricter (1e-10) suits fine-grained comparison of close relatives. Rarely needs changing",[223,1235,1236,1239,1242],{},[247,1237,1238],{},"Annotation database",[247,1240,1241],{},"Swiss-Prot reviewed",[247,1243,1244],{},"Reference database for functional annotation",[223,1246,1247,1250,1253],{},[247,1248,1249],{},"GO multiple-testing correction",[247,1251,1252],{},"BH (FDR)",[247,1254,1255],{},"p-value correction method for enrichment analysis",[223,1257,1258,1261,1264],{},[247,1259,1260],{},"Threads",[247,1262,1263],{},"48",[247,1265,1266],{},"Read-only online; editable locally",[1054,1268,1270],{"id":1269},"interpreting-the-results","Interpreting the Results",[18,1272,1273],{},[924,1274],{"alt":1275,"src":1276},"Result_of_Orthologous","\u002Fimages\u002Fdocument\u002F4-Result_of_Orthologous.webp",[75,1278,1279,1285,1291,1297,1303,1309],{},[56,1280,1281,1284],{},[21,1282,1283],{},"UpSet plot:"," shows the distribution of clusters shared and unique among species; click an intersection bar to see the specific clusters in that intersection.",[56,1286,1287,1290],{},[21,1288,1289],{},"Venn diagram:"," the classic Venn (suitable for ≤6 species).",[56,1292,1293,1296],{},[21,1294,1295],{},"Pairwise shared-cluster heatmap:"," a matrix showing, for every species pair, the number of clusters they share — a quick read on overall similarity among species.",[56,1298,1299,1302],{},[21,1300,1301],{},"Occurrence Table:"," a matrix showing each cluster's copy number in every species; sort by column to find clusters with a specific distribution pattern.",[56,1304,1305,1308],{},[21,1306,1307],{},"Pan-genome statistics:"," total proteins, cluster count, single-copy gene count and singleton count for each species.",[56,1310,1311,1314],{},[21,1312,1313],{},"Single-cluster detail:"," click any cluster ID to see species composition, multiple sequence alignment, conserved motifs, the within-cluster gene tree, the similarity network, cluster-to-cluster relationships, and GO enrichment — this is also where you launch positive selection analysis (see Chapter 9).",[18,1316,1317],{},[21,1318,1319],{},"Quick glossary (cluster categories):",[217,1321,1322,1332],{},[220,1323,1324],{},[223,1325,1326,1329],{},[226,1327,1328],{},"Term",[226,1330,1331],{},"Meaning",[242,1333,1334,1344,1363,1382,1396,1410,1424,1434,1444],{},[223,1335,1336,1341],{},[247,1337,1338],{},[21,1339,1340],{},"Orthogroup \u002F Cluster",[247,1342,1343],{},"A set of homologous genes judged to descend from a common ancestral gene",[223,1345,1346,1352],{},[247,1347,1348,1351],{},[21,1349,1350],{},"1:1:1"," (single-copy core cluster)",[247,1353,1354,1355,1358,1359,1362],{},"An orthologous cluster with",[21,1356,1357],{},"exactly one copy"," in ",[21,1360,1361],{},"every species"," — the ideal data for building a species tree",[223,1364,1365,1371],{},[247,1366,1367,1370],{},[21,1368,1369],{},"N:N:N"," (multi-copy core cluster)",[247,1372,1373,1374,1377,1378,1381],{},"A cluster containing",[21,1375,1376],{},"all species"," but with ",[21,1379,1380],{},"multiple copies"," in at least some of them",[223,1383,1384,1389],{},[247,1385,1386],{},[21,1387,1388],{},"Species-specific cluster",[247,1390,1391,1392,1395],{},"A cluster whose genes are",[21,1393,1394],{},"all from one species",", often related to that species' unique functions",[223,1397,1398,1403],{},[247,1399,1400],{},[21,1401,1402],{},"Other \u002F Orthoer cluster",[247,1404,1405,1406,1409],{},"A (multi-copy) orthologous cluster containing only",[21,1407,1408],{},"some"," of the species",[223,1411,1412,1417],{},[247,1413,1414],{},[21,1415,1416],{},"Singletons",[247,1418,1419,1420,1423],{},"Isolated genes",[21,1421,1422],{},"not assigned to any cluster"," (no homology found)",[223,1425,1426,1431],{},[247,1427,1428],{},[21,1429,1430],{},"Core genome",[247,1432,1433],{},"The set of clusters shared by all species",[223,1435,1436,1441],{},[247,1437,1438],{},[21,1439,1440],{},"Pan-genome",[247,1442,1443],{},"The union of all clusters across all species",[223,1445,1446,1451],{},[247,1447,1448],{},[21,1449,1450],{},"Ortholog vs paralog",[247,1452,1453,1454,1457,1458],{},"Orthologs arise from",[21,1455,1456],{},"speciation","; paralogs arise from ",[21,1459,1460],{},"gene duplication",[45,1462],{},[372,1464,1466],{"id":1465},"_52-module-2-species-tree-analysis","5.2 Module 2 · Species Tree Analysis",[18,1468,1469,1471],{},[21,1470,1023],{}," It builds a species phylogeny from the single-copy orthologs of Module 1. This tree is the \"evolutionary scaffold\" for downstream analyses such as divergence time, gene family dynamics and positive selection.",[18,1473,1474,1477,1478,1481],{},[21,1475,1476],{},"How to enable:"," tick ",[21,1479,1480],{},"Species Tree"," on the left.",[18,1483,1484],{},[924,1485],{"alt":1486,"src":1487},"Tree_Methods","\u002Fimages\u002Fdocument\u002F5-Tree_Methods.webp",[1054,1489,1057],{"id":1490},"basic-parameters-1",[18,1492,1493,1063,1496],{},[21,1494,1495],{},"Tree method",[21,1497,1498],{},"FastTree",[217,1500,1501,1516],{},[220,1502,1503],{},[223,1504,1505,1508,1511,1514],{},[226,1506,1507],{},"Method",[226,1509,1510],{},"Speed",[226,1512,1513],{},"Accuracy",[226,1515,1081],{},[242,1517,1518,1534,1548],{},[223,1519,1520,1525,1528,1531],{},[247,1521,1522,1524],{},[21,1523,1498],{}," (default)",[247,1526,1527],{},"Fastest (minutes)",[247,1529,1530],{},"Good",[247,1532,1533],{},"Online analysis, quick preview, many species",[223,1535,1536,1539,1542,1545],{},[247,1537,1538],{},"IQ-TREE 2",[247,1540,1541],{},"Slower (tens of minutes up)",[247,1543,1544],{},"High",[247,1546,1547],{},"When you want publication-grade accuracy; built-in ModelFinder selects the model automatically",[223,1549,1550,1553,1556,1558],{},[247,1551,1552],{},"RAxML-NG",[247,1554,1555],{},"Slower",[247,1557,1544],{},[247,1559,1560],{},"A strict maximum-likelihood method, comparable to IQ-TREE 2; good for cross-validating topology with a different method",[15,1562,1563],{},[18,1564,1565,1567,1568,1570,1571,1573,1574,1576],{},[21,1566,1125],{}," For online analysis we recommend ",[21,1569,1498],{}," — fast and good enough for most studies. If you need publication-grade accuracy and can accept a longer wait, choose ",[21,1572,1538],{}," (with automatic model selection); to cross-validate your tree with another mainstream maximum-likelihood implementation, use ",[21,1575,1552],{},". For large-scale analyses, use the local deployment.",[18,1578,1579,1063,1582],{},[21,1580,1581],{},"Root method",[21,1583,1584],{},"Midpoint",[217,1586,1587,1597],{},[220,1588,1589],{},[223,1590,1591,1593,1595],{},[226,1592,1507],{},[226,1594,1331],{},[226,1596,1081],{},[242,1598,1599,1611],{},[223,1600,1601,1605,1608],{},[247,1602,1603,1524],{},[21,1604,1584],{},[247,1606,1607],{},"Places the root at the midpoint of the longest path",[247,1609,1610],{},"Quick and convenient when the outgroup is uncertain",[223,1612,1613,1616,1619],{},[247,1614,1615],{},"Outgroup",[247,1617,1618],{},"Designates a known outgroup species as the root",[247,1620,1621],{},"More reliable when a clear outgroup exists",[15,1623,1624],{},[18,1625,1626,1627,1630,1631,1635],{},"When you choose Outgroup, an ",[21,1628,1629],{},"outgroup species selector"," appears below. The outgroup should be a species clearly related to all study species but lying outside the study group (e.g. grape ",[1632,1633,1634],"em",{},"Vitis vinifera"," as the outgroup when studying Rosaceae).",[1054,1637,1205],{"id":1638},"advanced-parameters-1",[217,1640,1641,1651],{},[220,1642,1643],{},[223,1644,1645,1647,1649],{},[226,1646,1214],{},[226,1648,1217],{},[226,1650,1220],{},[242,1652,1653,1664,1675,1686],{},[223,1654,1655,1658,1661],{},[247,1656,1657],{},"MSA algorithm",[247,1659,1660],{},"MAFFT Auto",[247,1662,1663],{},"MUSCLE v5 Super5 available for large data",[223,1665,1666,1669,1672],{},[247,1667,1668],{},"Substitution model",[247,1670,1671],{},"Auto-detect (MFP) \u002F LG+CAT",[247,1673,1674],{},"See note below",[223,1676,1677,1680,1683],{},[247,1678,1679],{},"Alignment trimming",[247,1681,1682],{},"automated1",[247,1684,1685],{},"gappyout \u002F none available (none for experts only)",[223,1687,1688,1690,1692],{},[247,1689,1260],{},[247,1691,1263],{},[247,1693,1266],{},[15,1695,1696,1702],{},[18,1697,1698,1701],{},[21,1699,1700],{},"About substitution models and auto-detection (MFP):"," the substitution model describes the frequency and pattern of amino-acid substitutions during evolution; choosing wrongly can produce an incorrect topology.",[75,1703,1704,1719],{},[56,1705,1706,1707,1710,1711,1714,1715,1718],{},"With ",[21,1708,1709],{},"IQ-TREE 2 \u002F RAxML-NG",", the default is ",[21,1712,1713],{},"MFP (ModelFinder Plus) auto-detection",": the algorithm uses information criteria (BIC \u002F AIC) to pick the best-fitting model from a set of candidates automatically, with ",[21,1716,1717],{},"no manual specification needed",". If you already know a suitable model, you can fill it in manually to skip detection time.",[56,1720,1706,1721,1723,1724,1727],{},[21,1722,1498],{},", a fixed ",[21,1725,1726],{},"LG+CAT"," model is used (WAG \u002F JTT also selectable); no model search is performed, which is why it is faster.",[1054,1729,1270],{"id":1730},"interpreting-the-results-1",[18,1732,1733],{},[924,1734],{"alt":1735,"src":1736},"Species_Tree","\u002Fimages\u002Fdocument\u002F6-Species_Tree.webp",[75,1738,1739,1749],{},[56,1740,1741,1742,1745,1746,460],{},"Hover over a tree node to see its bootstrap support: ",[21,1743,1744],{},"≥95% highly reliable",", 70%–95% moderate, ",[21,1747,1748],{},"\u003C70% interpret with caution",[56,1750,1751],{},"The tree can be exported as Newick text and as SVG \u002F PNG images.",[18,1753,1754],{},[21,1755,1756],{},"Quick glossary:",[217,1758,1759,1767],{},[220,1760,1761],{},[223,1762,1763,1765],{},[226,1764,1328],{},[226,1766,1331],{},[242,1768,1769,1779,1789,1799],{},[223,1770,1771,1776],{},[247,1772,1773],{},[21,1774,1775],{},"Topology",[247,1777,1778],{},"The branching structure of the tree, i.e. who is more closely related to whom",[223,1780,1781,1786],{},[247,1782,1783],{},[21,1784,1785],{},"Bootstrap support",[247,1787,1788],{},"A percentage assessing the reliability of a branch via resampling; higher is more reliable",[223,1790,1791,1796],{},[247,1792,1793],{},[21,1794,1795],{},"Newick",[247,1797,1798],{},"The standard text format using nested parentheses to represent a tree",[223,1800,1801,1806],{},[247,1802,1803],{},[21,1804,1805],{},"Single-copy orthologs",[247,1807,1808],{},"The 1:1:1 clusters from Module 1, used as input for tree building",[45,1810],{},[372,1812,1814],{"id":1813},"_53-module-3-divergence-time-estimation","5.3 Module 3 · Divergence Time Estimation",[18,1816,1817,1819],{},[21,1818,1023],{}," It converts the \"relative\" tree from Module 2 into a \"time tree\" labeled with absolute geological ages. Using fossil calibration points and a molecular-clock model, it estimates the divergence time of each node, letting you map genome-evolution events onto geological and climatic events.",[18,1821,1822,1477,1824,1827],{},[21,1823,1476],{},[21,1825,1826],{},"Time Tree"," (depends on Module 2).",[18,1829,1830],{},[924,1831],{"alt":1832,"src":1833},"Divergence_Time_Module","\u002Fimages\u002Fdocument\u002F7-Divergence_Time_Module.webp",[1054,1835,1057],{"id":1836},"basic-parameters-2",[18,1838,1839,1063,1841],{},[21,1840,1507],{},[21,1842,1843],{},"R8s",[217,1845,1846,1859],{},[220,1847,1848],{},[223,1849,1850,1852,1854,1857],{},[226,1851,1507],{},[226,1853,1510],{},[226,1855,1856],{},"Output",[226,1858,1081],{},[242,1860,1861,1879],{},[223,1862,1863,1867,1870,1876],{},[247,1864,1865,1524],{},[21,1866,1843],{},[247,1868,1869],{},"Fast (minutes)",[247,1871,1872,1875],{},[21,1873,1874],{},"Point estimates"," of node ages",[247,1877,1878],{},"A quick divergence-time framework",[223,1880,1881,1884,1886,1892],{},[247,1882,1883],{},"MCMCTree",[247,1885,1541],{},[247,1887,1888,1889],{},"Times + 95% HPD",[21,1890,1891],{},"confidence intervals",[247,1893,1894],{},"Publication-grade; when uncertainty intervals are needed",[15,1896,1897],{},[18,1898,1899,1901,1902,1904],{},[21,1900,1125],{}," For a quick sense of divergence times, use ",[21,1903,1843],{},". For publication, when you need a 95% confidence interval per node, use MCMCTree (slower; large-scale analyses are best run locally).",[18,1906,1907,1910,1911],{},[21,1908,1909],{},"Calibration points"," — ",[21,1912,1913],{},"required; this is the most critical input of this module",[75,1915,1916,1930,1954],{},[56,1917,1918,1919,1922,1923,1926,1927,460],{},"Pick a species pair, and the platform can automatically look up their divergence time from the ",[21,1920,1921],{},"TimeTree"," database. Click ",[21,1924,1925],{},"+"," to add multiple sets; ",[21,1928,1929],{},"at least 1–2 are recommended",[56,1931,1932,1935],{},[21,1933,1934],{},"The two methods use the calibration information differently:",[75,1936,1937,1946],{},[56,1938,1939,1941,1942,1945],{},[21,1940,1843],{}," uses the ",[21,1943,1944],{},"median"," divergence time returned by TimeTree (a single point). R8s does not propagate calibration uncertainty and only gives point estimates, so using the median is more stable and more honest.",[56,1947,1948,1941,1950,1953],{},[21,1949,1883],{},[21,1951,1952],{},"range"," returned by TimeTree (minimum \u002F maximum) and performs Bayesian sampling within that interval, yielding a 95% HPD confidence interval.",[56,1955,1956,1959,1960],{},[21,1957,1958],{},"Source guidance:"," prefer field-recognized calibration points backed by paleontological\u002Fgeological evidence. ",[21,1961,1962],{},"An incorrect calibration point will systematically bias the entire time tree.",[18,1964,1965],{},[21,1966,1967],{},"Example settings (Rosaceae):",[217,1969,1970,1986],{},[220,1971,1972],{},[223,1973,1974,1977,1980,1983],{},[226,1975,1976],{},"Species pair",[226,1978,1979],{},"Min (Ma)",[226,1981,1982],{},"Max (Ma)",[226,1984,1985],{},"Basis",[242,1987,1988,2008],{},[223,1989,1990,1999,2002,2005],{},[247,1991,1992,1995,1996],{},[1632,1993,1994],{},"Malus domestica"," – ",[1632,1997,1998],{},"Pyrus communis",[247,2000,2001],{},"12",[247,2003,2004],{},"20",[247,2006,2007],{},"Fossil record",[223,2009,2010,2016,2019,2022],{},[247,2011,2012,1995,2014],{},[1632,2013,1994],{},[1632,2015,1634],{},[247,2017,2018],{},"110",[247,2020,2021],{},"124",[247,2023,2007],{},[18,2025,2026],{},[21,2027,2028],{},"Root Age (Ma)",[75,2030,2031,2041,2051],{},[56,2032,2033,2036,2037,2040],{},[21,2034,2035],{},"Default:"," the platform automatically sets it to ",[21,2038,2039],{},"1.5× the largest branch divergence time"," in the current tree.",[56,2042,2043,2046,2047,2050],{},[21,2044,2045],{},"⚠️ We strongly recommend confirming this value manually."," Root Age is the global constraint on divergence times for the whole tree; it ",[21,2048,2049],{},"must be greater than the true divergence time of the oldest (root) node",", otherwise the entire time tree is compressed and you get incorrect age estimates.",[56,2052,2053,2056,2057,2061],{},[21,2054,2055],{},"If you are unsure of the exact value",", err on the high side — it serves only as an upper-bound constraint, so being too high does not distort results substantially, whereas being too low certainly causes errors. You can consult TimeTree (",[33,2058,2059],{"href":2059,"rel":2060},"https:\u002F\u002Ftimetree.org",[37],") for an approximate root age for your group.",[1054,2063,1205],{"id":2064},"advanced-parameters-2",[217,2066,2067,2077],{},[220,2068,2069],{},[223,2070,2071,2073,2075],{},[226,2072,1214],{},[226,2074,1217],{},[226,2076,1220],{},[242,2078,2079,2090,2101],{},[223,2080,2081,2084,2087],{},[247,2082,2083],{},"Cross-validation (R8s)",[247,2085,2086],{},"Off",[247,2088,2089],{},"Slower when on",[223,2091,2092,2095,2098],{},[247,2093,2094],{},"Chain length \u002F computational complexity (MCMCTree only)",[247,2096,2097],{},"Standard",[247,2099,2100],{},"Increase when intervals are too wide or repeated runs differ markedly, to ensure MCMC convergence",[223,2102,2103,2105,2107],{},[247,2104,1260],{},[247,2106,1263],{},[247,2108,1266],{},[1054,2110,1270],{"id":2111},"interpreting-the-results-2",[18,2113,2114],{},[924,2115],{"alt":2116,"src":2117},"Time_Tree","\u002Fimages\u002Fdocument\u002F8-Time_Tree.webp",[75,2119,2120],{},[56,2121,2122,2125],{},[21,2123,2124],{},"Time tree:"," an ultrametric tree with the horizontal axis as geological time (millions of years ago); MCMCTree additionally gives a 95% HPD interval for each node.",[18,2127,2128],{},[21,2129,1756],{},[217,2131,2132,2140],{},[220,2133,2134],{},[223,2135,2136,2138],{},[226,2137,1328],{},[226,2139,1331],{},[242,2141,2142,2152,2162,2172,2182],{},[223,2143,2144,2149],{},[247,2145,2146],{},[21,2147,2148],{},"Ultrametric tree",[247,2150,2151],{},"A tree in which all leaves are equidistant from the root, i.e. branch lengths converted to time",[223,2153,2154,2159],{},[247,2155,2156],{},[21,2157,2158],{},"Ma \u002F Mya",[247,2160,2161],{},"Time units; 1 Ma = 1 million years",[223,2163,2164,2169],{},[247,2165,2166],{},[21,2167,2168],{},"Molecular clock",[247,2170,2171],{},"The modeling assumption that uses the rate of sequence change to infer time",[223,2173,2174,2179],{},[247,2175,2176],{},[21,2177,2178],{},"95% HPD interval",[247,2180,2181],{},"Highest posterior density interval, the divergence-time confidence range from a Bayesian method (MCMCTree)",[223,2183,2184,2189],{},[247,2185,2186],{},[21,2187,2188],{},"Calibration point",[247,2190,2191],{},"A known divergence time of a species pair, used to anchor the relative tree to absolute ages",[45,2193],{},[372,2195,2197],{"id":2196},"_54-module-4-gene-family-expansion-contraction-analysis","5.4 Module 4 · Gene Family Expansion & Contraction Analysis",[18,2199,2200,2202,2203,2206],{},[21,2201,1023],{}," Using the time tree from Module 3 and the gene counts from Module 1, it identifies gene families that underwent ",[21,2204,2205],{},"statistically significant expansion (gene gain) or contraction (gene loss)"," on each branch of the species tree. Such changes are often linked to adaptive evolution, functional innovation or degeneration — for example, a significant expansion of a disease-resistance gene family on a cultivated lineage may point to selection during domestication.",[18,2208,2209,1477,2211,2214,2215,2218],{},[21,2210,1476],{},[21,2212,2213],{},"Gene Family Expansion & Contraction"," (depends on Modules 1 and 3). The algorithm is ",[21,2216,2217],{},"CAFE5"," (based on a stochastic birth-death model).",[18,2220,2221],{},[924,2222],{"alt":2223,"src":2224},"Gene_Family_Dynamics_Module","\u002Fimages\u002Fdocument\u002F9-Gene_Family_Dynamics_Module.webp",[1054,2226,1057],{"id":2227},"basic-parameters-3",[18,2229,2230,2233,2234,2237,2238,2241],{},[21,2231,2232],{},"k value (rate heterogeneity among gene families)"," — determines whether the ",[21,2235,2236],{},"Base model"," or the ",[21,2239,2240],{},"Gamma model"," is used",[217,2243,2244,2259],{},[220,2245,2246],{},[223,2247,2248,2251,2254,2256],{},[226,2249,2250],{},"Setting",[226,2252,2253],{},"Model used",[226,2255,1331],{},[226,2257,2258],{},"When to use",[242,2260,2261,2278,2295],{},[223,2262,2263,2268,2272,2275],{},[247,2264,2265],{},[21,2266,2267],{},"k left empty (none)",[247,2269,2270],{},[21,2271,2236],{},[247,2273,2274],{},"Assumes all gene families share exactly the same evolutionary rate (λ)",[247,2276,2277],{},"Small data, first pass; or for a robust analysis whose failures are easy to detect",[223,2279,2280,2285,2289,2292],{},[247,2281,2282],{},[21,2283,2284],{},"k = 2",[247,2286,2287],{},[21,2288,2240],{},[247,2290,2291],{},"Allows family rates to follow a 2-category gamma distribution",[247,2293,2294],{},"A common choice in most scenarios",[223,2296,2297,2302,2306,2309],{},[247,2298,2299],{},[21,2300,2301],{},"k = 3 or more",[247,2303,2304],{},[21,2305,2240],{},[247,2307,2308],{},"More rate categories, finer fit",[247,2310,2311],{},"Large data, pronounced rate differences among families",[15,2313,2314,2328,2341],{},[18,2315,2316,2319,2320,2323,2324,2327],{},[21,2317,2318],{},"How it works:"," the Gamma model (allowing different families to evolve at different rates) is enabled only ",[21,2321,2322],{},"when a k value is set","; ",[21,2325,2326],{},"leaving k empty uses the Base model"," (a single rate).",[18,2329,2330,2332,2333,2336,2337,2340],{},[21,2331,1125],{}," In real data, different families almost certainly evolve at different rates (immune genes fast, ribosomal proteins slow), so a ",[21,2334,2335],{},"k=2"," Gamma model is usually more reasonable than Base — we suggest starting from k=2. However, the Gamma model can ",[21,2338,2339],{},"fail silently"," when it does not converge, whereas problems with the Base model are easier to spot — so if you want the most robust, diagnosable baseline, run Base (k empty) once for comparison first.",[18,2342,2343,2346,2347,2350,2351,2354],{},[21,2344,2345],{},"Convergence safeguards for the Gamma model:"," because the Gamma model can fail silently, when you set a k value the platform automatically runs ",[21,2348,2349],{},"multiple restarts"," and ",[21,2352,2353],{},"reports the convergence quality"," of the run. Inspect this convergence report before trusting a Gamma result; if convergence is poor, prefer the Base model or adjust the parameters.",[18,2356,2357,1063,2360,2362],{},[21,2358,2359],{},"Use Poisson root distribution (Use Poisson)",[21,2361,1192],{},", recommended to keep.",[1054,2364,1205],{"id":2365},"advanced-parameters-3",[217,2367,2368,2378],{},[220,2369,2370],{},[223,2371,2372,2374,2376],{},[226,2373,1214],{},[226,2375,1217],{},[226,2377,1220],{},[242,2379,2380,2391,2402],{},[223,2381,2382,2385,2388],{},[247,2383,2384],{},"Max family size",[247,2386,2387],{},"100",[247,2389,2390],{},"Filters out very large families to avoid non-convergence",[223,2392,2393,2396,2399],{},[247,2394,2395],{},"Error model",[247,2397,2398],{},"None",[247,2400,2401],{},"Optional (expert); when empty, the platform automatically downgrades and retries",[223,2403,2404,2406,2408],{},[247,2405,1260],{},[247,2407,1263],{},[247,2409,1266],{},[15,2411,2412],{},[18,2413,2414,2417,2418,2421,2422,2425],{},[21,2415,2416],{},"Significance threshold (p-value, default 0.05):"," used to decide which families have a statistically ",[21,2419,2420],{},"significant"," size change on a given branch. This threshold also determines the family set used for the subsequent GO enrichment analysis (see results). Adjusting it on the ",[21,2423,2424],{},"results page"," only refreshes the view and does not rerun the analysis; set 0.01 for stricter, 0.10 to see more candidate families.",[1054,2427,1270],{"id":2428},"interpreting-the-results-3",[18,2430,2431],{},[924,2432],{"alt":2433,"src":2434},"Result_of_Expansion_and_Contraction","\u002Fimages\u002Fdocument\u002F10-Result_of_Expansion_and_Contraction.webp",[75,2436,2437,2456,2459,2469],{},[56,2438,2439,2440,2443,2444,2447,2448,2451,2452,2455],{},"Each branch of the species tree is labeled with two numbers: a ",[21,2441,2442],{},"red +"," for the number of families that ",[21,2445,2446],{},"expanded"," on that branch, and a ",[21,2449,2450],{},"blue −"," for the number that contracted. Note these are ",[21,2453,2454],{},"descriptive counts"," (all families with a size change on that branch), not limited to the statistically significant ones.",[56,2457,2458],{},"Click the number on a branch → view the list of families that changed on that branch; click a family ID → view its per-species copy number, member genes and GO annotation.",[56,2460,2461,2464,2465,2468],{},[21,2462,2463],{},"GO enrichment:"," when you click an expansion\u002Fcontraction node to view its GO enrichment, the platform runs enrichment ",[21,2466,2467],{},"only on the significant families (OG clusters) with p \u003C 0.05 at that node",", to ensure the enrichment reflects genuinely significant evolutionary events.",[56,2470,2471],{},"It is worth focusing on the families that expanded on the terminal branch leading to your target species, to see whether their functions relate to known phenotypes.",[18,2473,2474],{},[21,2475,1756],{},[217,2477,2478,2486],{},[220,2479,2480],{},[223,2481,2482,2484],{},[226,2483,1328],{},[226,2485,1331],{},[242,2487,2488,2498,2508,2518,2528],{},[223,2489,2490,2495],{},[247,2491,2492],{},[21,2493,2494],{},"Expansion \u002F Contraction",[247,2496,2497],{},"An increase \u002F decrease in the copy number of a gene family on a branch",[223,2499,2500,2505],{},[247,2501,2502],{},[21,2503,2504],{},"Birth-death model",[247,2506,2507],{},"The statistical model CAFE5 uses to describe gene gain (birth) and loss (death)",[223,2509,2510,2515],{},[247,2511,2512],{},[21,2513,2514],{},"λ (lambda)",[247,2516,2517],{},"The gene gain\u002Floss rate; one λ across the whole tree in the Base model, family-specific in the Gamma model",[223,2519,2520,2525],{},[247,2521,2522],{},[21,2523,2524],{},"Base vs Gamma model",[247,2526,2527],{},"See the Basic-parameter note on k: empty k uses Base, set k uses Gamma",[223,2529,2530,2535],{},[247,2531,2532],{},[21,2533,2534],{},"Significant family",[247,2536,2537],{},"A family with p \u003C 0.05; GO enrichment uses only these",[45,2539],{},[372,2541,2543],{"id":2542},"_55-module-5-chromosomal-collinearity-analysis","5.5 Module 5 · Chromosomal Collinearity Analysis",[18,2545,2546,2548],{},[21,2547,1023],{}," It analyzes structural conservation between species at the chromosomal level. If certain chromosomal segments of two species contain the same genes in roughly the same order, those segments are said to be \"collinear\". This can reveal: the degree of chromosomal-structure conservation, large-scale rearrangements (inversions\u002Ftranslocations\u002Ffusions\u002Ffissions), traces of whole-genome duplication (WGD), and the chromosomal distribution of particular gene families.",[18,2550,2551,2554,2555,2558],{},[21,2552,2553],{},"Data requirement:"," upload a ",[21,2556,2557],{},"GFF annotation file"," (see 2.5).",[15,2560,2561,2567],{},[18,2562,2563,2566],{},[21,2564,2565],{},"⚠️ Be sure to use a chromosome-level annotation file, and keep the number of distinct chromosomes below 50."," Collinearity analysis uses chromosomes as coordinate axes, and too many sequence fragments make the Sankey diagram unreadable.",[75,2568,2569,2579],{},[56,2570,2571,2572,2575,2576,2578],{},"If your annotation is at the ",[21,2573,2574],{},"scaffold \u002F contig level"," (many fragments), filter first and ",[21,2577,623],{}," before uploading.",[56,2580,2581],{},"Chromosome-level genomes (e.g. a reference genome already anchored to chromosomes) can be used directly.",[18,2583,2584,1477,2586,460],{},[21,2585,1476],{},[21,2587,2588],{},"Collinearity \u002F MCScanX",[18,2590,2591],{},[924,2592],{"alt":2593,"src":2594},"Collinearity_Module","\u002Fimages\u002Fdocument\u002F11-Collinearity_Module.webp",[1054,2596,1057],{"id":2597},"basic-parameters-4",[18,2599,2600,1063,2603],{},[21,2601,2602],{},"Run all species pairs (Run All Pairs)",[21,2604,2605],{},"on (small projects)",[75,2607,2608,2611],{},[56,2609,2610],{},"On: runs collinearity for every species pair. Recommended when there are few species.",[56,2612,2613],{},"Off: a species-pair selector appears so you run only the pairs you choose. Recommended when there are many species, to save time.",[1054,2615,1205],{"id":2616},"advanced-parameters-4",[217,2618,2619,2629],{},[220,2620,2621],{},[223,2622,2623,2625,2627],{},[226,2624,1214],{},[226,2626,1217],{},[226,2628,1220],{},[242,2630,2631,2642,2653,2663],{},[223,2632,2633,2636,2639],{},[247,2634,2635],{},"Match Size (-s)",[247,2637,2638],{},"5",[247,2640,2641],{},"Minimum number of anchor genes in a collinear block",[223,2643,2644,2647,2650],{},[247,2645,2646],{},"Max Gaps (-m)",[247,2648,2649],{},"25",[247,2651,2652],{},"Maximum gap allowed within a block",[223,2654,2655,2658,2660],{},[247,2656,2657],{},"Anchor E-value (-e)",[247,2659,1230],{},[247,2661,2662],{},"Anchor alignment threshold",[223,2664,2665,2667,2669],{},[247,2666,1260],{},[247,2668,1263],{},[247,2670,1266],{},[15,2672,2673],{},[18,2674,933,2675,2678,2679,2681],{},[21,2676,2677],{},"up\u002Fdown-stream gene window"," is adjusted on the ",[21,2680,2424],{}," and only refreshes the view.",[1054,2683,1270],{"id":2684},"interpreting-the-results-4",[18,2686,2687],{},[924,2688],{"alt":2689,"src":2690},"Result_of_Collinearity","\u002Fimages\u002Fdocument\u002F12-Result_of_Collinearity.webp",[75,2692,2693,2699],{},[56,2694,2695,2698],{},[21,2696,2697],{},"Sankey diagram:"," the two sides represent the chromosomes of two species, and the links are collinear blocks; denser links mean more conserved structure, while breaks and crossings represent rearrangement events.",[56,2700,2701,2704],{},[21,2702,2703],{},"Gene-search highlighting (signature feature):"," type a gene ID in the search box (e.g. a member of an expanded family found in Module 4), and the plot highlights the collinear blocks containing those genes — linking gene-family dynamics to chromosomal-structure change.",[18,2706,2707],{},[21,2708,1756],{},[217,2710,2711,2719],{},[220,2712,2713],{},[223,2714,2715,2717],{},[226,2716,1328],{},[226,2718,1331],{},[242,2720,2721,2731,2741,2751,2761,2771],{},[223,2722,2723,2728],{},[247,2724,2725],{},[21,2726,2727],{},"Collinearity \u002F Synteny",[247,2729,2730],{},"Chromosomal segments of two species containing the same genes in roughly the same order",[223,2732,2733,2738],{},[247,2734,2735],{},[21,2736,2737],{},"Collinear block",[247,2739,2740],{},"A region of consecutive homologous genes judged to be conserved",[223,2742,2743,2748],{},[247,2744,2745],{},[21,2746,2747],{},"Anchor",[247,2749,2750],{},"A pair of homologous genes within a block; the basis for the collinearity call",[223,2752,2753,2758],{},[247,2754,2755],{},[21,2756,2757],{},"Sankey diagram",[247,2759,2760],{},"A plot using links to show collinear relationships between the chromosomes of two species",[223,2762,2763,2768],{},[247,2764,2765],{},[21,2766,2767],{},"Rearrangement",[247,2769,2770],{},"Chromosomal structural changes such as inversion, translocation, fusion, fission",[223,2772,2773,2778],{},[247,2774,2775],{},[21,2776,2777],{},"WGD (whole-genome duplication)",[247,2779,2780],{},"Leaves traces of doubled blocks in collinearity",[45,2782],{},[48,2784,2786],{"id":2785},"_6-step-3-preview-submit","6. Step 3: Preview & Submit",[53,2788,2789,2796,2802,2809],{},[56,2790,2791,2792,2795],{},"Once the desired modules are configured, click ",[21,2793,2794],{},"Preview"," to review a summary of the task configuration.",[56,2797,2798,2799,460],{},"After confirming everything is correct, click ",[21,2800,2801],{},"Submit",[56,2803,2804,2805,2808],{},"The page shows a unique ",[21,2806,2807],{},"Task ID"," — save it so you can check progress.",[56,2810,2811,2812,2815],{},"The task runs in the background, so ",[21,2813,2814],{},"you may close the browser","; on completion the system sends an email with a link to the results page.",[15,2817,2818],{},[18,2819,2820,2823],{},[21,2821,2822],{},"Tip:"," keep your Task ID. Provide a valid email if you want the completion notification.",[45,2825],{},[48,2827,2829],{"id":2828},"_7-tracking-progress-task-history","7. Tracking Progress & Task History",[75,2831,2832,2839],{},[56,2833,2834,2835,2838],{},"Click ",[21,2836,2837],{},"Projects \u002F Task History"," to see all tasks and their status (queued \u002F running \u002F completed \u002F failed).",[56,2840,2841],{},"Click a Task ID to open its results page.",[45,2843],{},[48,2845,2847],{"id":2846},"_8-interpreting-exporting-results","8. Interpreting & Exporting Results",[372,2849,2851],{"id":2850},"_81-interactive-exploration","8.1 Interactive Exploration",[18,2853,2854,2855,2858,2859,2862,2863,2866,2867,2870,2871,2874],{},"All results are interactive visualizations. You can: ",[21,2856,2857],{},"click"," elements in a plot (clusters, tree nodes, Sankey blocks) to see details; ",[21,2860,2861],{},"hover"," to see exact values (support, divergence time, p-value); ",[21,2864,2865],{},"filter and sort"," tables; ",[21,2868,2869],{},"search"," gene IDs; and ",[21,2872,2873],{},"zoom and drag"," plots.",[372,2876,2878],{"id":2877},"_82-export","8.2 Export",[75,2880,2881,2894,2900,2906],{},[56,2882,2883,2886,2887,2890,2891,460],{},[21,2884,2885],{},"Graphics:"," all charts can be exported as ",[21,2888,2889],{},"SVG"," (vector, publication-ready) or ",[21,2892,2893],{},"PNG",[56,2895,2896,2899],{},[21,2897,2898],{},"Data:"," cluster lists, Newick tree files, statistics tables (TSV \u002F CSV), etc. can be downloaded.",[56,2901,2902,2905],{},[21,2903,2904],{},"BLAST database:"," the project also provides a pre-built BLAST database of all project proteins for download, so you can run your own sequence searches locally.",[56,2907,2908,2911,2912,2915,2916,460],{},[21,2909,2910],{},"Cloud:"," one-click export to ",[21,2913,2914],{},"Google Drive"," or ",[21,2917,2918],{},"Dropbox",[372,2920,2922],{"id":2921},"_83-analysis-report-reproducibility","8.3 Analysis Report (Reproducibility)",[18,2924,2925],{},"Each analysis automatically generates a report recording the parameters used, the versions of the integrated tools, and the full command history, so others can independently reproduce it with the same data and parameters.",[45,2927],{},[48,2929,2931],{"id":2930},"_9-positive-selection-analysis-on-gene-clusters-run-on-demand-after-results-are-generated","9. Positive Selection Analysis on Gene Clusters (run on demand after results are generated)",[18,2933,2934,2937],{},[21,2935,2936],{},"What does this analysis do for you?"," It detects which genes underwent positive selection (adaptive evolution). The molecular signal is a non-synonymous substitution rate significantly higher than the synonymous rate, i.e. ω = dN\u002FdS > 1 — meaning natural selection \"favors\" mutations that change protein function, often associated with adaptation to a new environment.",[15,2939,2940],{},[18,2941,2942,2945,2946,2949,2950,2953],{},[21,2943,2944],{},"It differs from the first five modules:"," positive selection targets a ",[21,2947,2948],{},"single gene cluster",", requiring you to first see the results and then pick a cluster of interest (e.g. a significantly expanded family, a cluster with significant GO enrichment). It is therefore ",[21,2951,2952],{},"not in the submission wizard"," but triggered on demand on the results page.",[372,2955,2957],{"id":2956},"_91-prepare-cds","9.1 Prepare CDS",[18,2959,2960,2961,2963],{},"This analysis requires the species' ",[21,2962,431],{}," (see 2.4). If you did not upload CDS when adding the species, you can add CDS to the species data at any time — adding it later unlocks only positive selection and does not affect results already completed. Built-in database species already have CDS ready.",[372,2965,2967],{"id":2966},"_92-entry-point","9.2 Entry Point",[18,2969,2970,2971,2974],{},"On the ",[21,2972,2973],{},"cluster detail page",", click \"Positive Selection Analysis\", or launch it directly from result highlights (CAFE5 significant families, GO-enriched clusters, search-hit clusters).",[372,2976,2978],{"id":2977},"_93-choose-your-scientific-question-key","9.3 Choose Your Scientific Question (key)",[18,2980,2981],{},[924,2982],{"alt":2983,"src":2984},"Positive_Selection_Module","\u002Fimages\u002Fdocument\u002F13-Positive_Selection_Module.webp",[18,2986,2987,2988,2991],{},"The top of the dialog asks: ",[21,2989,2990],{},"What do you want to find out about this cluster?"," You do not need to understand the internal differences between tools — just choose the question you want to answer; the algorithm name is shown as a subtitle.",[217,2993,2994,3009],{},[220,2995,2996],{},[223,2997,2998,3001,3003,3006],{},[226,2999,3000],{},"The question you want to answer (interface text)",[226,3002,1507],{},[226,3004,3005],{},"Output granularity",[226,3007,3008],{},"Extra input needed",[242,3010,3011,3031,3046,3060],{},[223,3012,3013,3018,3025,3028],{},[247,3014,3015],{},[21,3016,3017],{},"Which branches (lineages) show positive selection?",[247,3019,3020,3021,3024],{},"HyPhy aBSREL (",[21,3022,3023],{},"recommended, default",")",[247,3026,3027],{},"Branch level",[247,3029,3030],{},"No",[223,3032,3033,3038,3041,3044],{},[247,3034,3035],{},[21,3036,3037],{},"Which amino-acid sites show episodic positive selection?",[247,3039,3040],{},"HyPhy MEME",[247,3042,3043],{},"Site level",[247,3045,3030],{},[223,3047,3048,3053,3056,3058],{},[247,3049,3050],{},[21,3051,3052],{},"Does this family contain any positively selected sites?",[247,3054,3055],{},"PAML M7 vs M8 (expert)",[247,3057,3043],{},[247,3059,3030],{},[223,3061,3062,3067,3070,3072],{},[247,3063,3064],{},[21,3065,3066],{},"On branches I select, which sites are under positive selection?",[247,3068,3069],{},"PAML branch-site (expert)",[247,3071,3043],{},[247,3073,3074],{},[21,3075,3076],{},"Foreground branches must be selected on the tree",[15,3078,3079],{},[18,3080,3081,3084,3085,3088],{},[21,3082,3083],{},"Unsure which to choose?"," Start with ",[21,3086,3087],{},"aBSREL"," — the fastest and most robust, ideal for a first analysis. To pinpoint specific amino-acid sites, use MEME (when you suspect episodic selection on only some branches) or PAML M7\u002FM8 (to detect persistent positively selected sites across the whole tree). If you already have a hypothesis that a lineage is under selection, use PAML branch-site and mark the foreground branches on the tree.",[372,3090,3092],{"id":3091},"_94-other-options","9.4 Other Options",[75,3094,3095,3105,3111,3117,3127],{},[56,3096,3097,3100,3101,3104],{},[21,3098,3099],{},"Foreground branches (branch-site only):"," an interactive cluster tree pops up; click the species or lineages you hypothesize to be under selection; ",[21,3102,3103],{},"at least 1 must be selected"," to submit.",[56,3106,3107,3110],{},[21,3108,3109],{},"Genetic code (Advanced):"," defaults to the Universal code; species with non-standard codes (mitochondria, ciliates, etc.) need to switch here.",[56,3112,3113,3116],{},[21,3114,3115],{},"Threads (Advanced):"," read-only 48 online; editable locally.",[56,3118,3119,3122,3123,3126],{},[21,3120,3121],{},"Online size limit:"," online analysis allows ",[21,3124,3125],{},"at most 100 proteins"," per cluster. For larger clusters, pick a smaller one or use the local deployment (see Chapter 11).",[56,3128,3129],{},"The first time you analyze a cluster, the platform builds its alignment and tree (once only), so please wait a moment.",[372,3131,3133],{"id":3132},"_95-interpreting-the-results","9.5 Interpreting the Results",[18,3135,3136],{},[924,3137],{"alt":3138,"src":3139},"Result_of_Positive_Selection","\u002Fimages\u002Fdocument\u002F14-Result_of_Positive_Selection.webp",[217,3141,3142,3154],{},[220,3143,3144],{},[223,3145,3146,3148,3151],{},[226,3147,1507],{},[226,3149,3150],{},"Result display",[226,3152,3153],{},"Filters adjustable on the results page",[242,3155,3156,3166,3177,3188],{},[223,3157,3158,3160,3163],{},[247,3159,3087],{},[247,3161,3162],{},"Branch-level table + tree (significant branches highlighted)",[247,3164,3165],{},"p-value",[223,3167,3168,3171,3174],{},[247,3169,3170],{},"MEME",[247,3172,3173],{},"Site-level table + alignment (significant sites highlighted)",[247,3175,3176],{},"p-value, EBF",[223,3178,3179,3182,3185],{},[247,3180,3181],{},"PAML M7\u002FM8",[247,3183,3184],{},"Site-level table (likelihood-ratio test + posterior)",[247,3186,3187],{},"p-value, BEB posterior",[223,3189,3190,3193,3196],{},[247,3191,3192],{},"PAML branch-site",[247,3194,3195],{},"Site-level table on the foreground branches",[247,3197,3187],{},[75,3199,3200,3207],{},[56,3201,3202,3203,3206],{},"Amino-acid sites identified as positively selected by the Bayesian methods (BEB \u002F EBF) are highlighted (e.g. ",[487,3204,3205],{},"128 A*",") — these are the specific positions that bear an adaptive signature at the molecular level.",[56,3208,3209],{},"Adjusting the thresholds on the results page only refreshes the view and does not rerun the analysis.",[18,3211,3212],{},[21,3213,1756],{},[217,3215,3216,3224],{},[220,3217,3218],{},[223,3219,3220,3222],{},[226,3221,1328],{},[226,3223,1331],{},[242,3225,3226,3236,3246,3256,3266,3276],{},[223,3227,3228,3233],{},[247,3229,3230],{},[21,3231,3232],{},"dN\u002FdS (ω)",[247,3234,3235],{},"The ratio of non-synonymous to synonymous substitution rates; ω > 1 suggests positive selection",[223,3237,3238,3243],{},[247,3239,3240],{},[21,3241,3242],{},"Positive selection",[247,3244,3245],{},"Natural selection favoring mutations that change protein function, i.e. adaptive evolution",[223,3247,3248,3253],{},[247,3249,3250],{},[21,3251,3252],{},"Branch level vs site level",[247,3254,3255],{},"Branch level answers \"which lineages are under selection\"; site level answers \"which amino-acid sites are under selection\"",[223,3257,3258,3263],{},[247,3259,3260],{},[21,3261,3262],{},"Foreground branch",[247,3264,3265],{},"In branch-site, the branch you hypothesize to be under selection and must mark on the tree",[223,3267,3268,3273],{},[247,3269,3270],{},[21,3271,3272],{},"Episodic selection",[247,3274,3275],{},"Positive selection occurring on only some branches or at only some times",[223,3277,3278,3283],{},[247,3279,3280],{},[21,3281,3282],{},"BEB \u002F EBF",[247,3284,3285],{},"Bayesian empirical methods giving the posterior probability \u002F empirical Bayes factor that a site is under positive selection",[15,3287,3288],{},[18,3289,3290],{},"Different methods detect different types of selection signal; we recommend trying several methods on the same cluster to obtain complementary evidence.",[45,3292],{},[48,3294,3296],{"id":3295},"_10-online-helper-tools-web-tools","10. Online Helper Tools (Web Tools)",[18,3298,933,3299,3301],{},[21,3300,636],{}," menu provides three standalone tools you can use without submitting a task.",[372,3303,3305],{"id":3304},"_101-cluster-venn-general-purpose-orthologous-cluster-venn-diagram","10.1 Cluster-Venn: General-Purpose Orthologous-Cluster Venn Diagram",[18,3307,3308],{},"Upload a custom cluster-membership file to directly generate an interactive Venn \u002F UpSet plot, with no need to re-run clustering on the platform. Ideal when you have already clustered with a third-party tool (OrthoFinder, OrthoMCL, etc.) and just want a quick visualization.",[18,3310,3311,3314,3315,3318,3319,460],{},[21,3312,3313],{},"Input format (.csv \u002F .txt):"," one cluster per line, genes within a cluster separated by spaces, gene names in the form ",[487,3316,3317],{},"SpeciesName|GeneID","; the platform identifies species membership by the prefix before ",[487,3320,3321],{},"|",[480,3323,3326],{"className":3324,"code":3325,"language":485},[483],"SpeciesA|bin1 SpeciesA|bin2 SpeciesB|fin1 SpeciesB|fin2 SpeciesC|gin2\nSpeciesA|bin22 SpeciesB|fin22 SpeciesC|gin24\nSpeciesB|fin32 SpeciesC|gin624\n",[487,3327,3325],{"__ignoreMap":489},[372,3329,3331],{"id":3330},"_102-gff-to-bed-annotation-format-conversion","10.2 GFF to BED: Annotation Format Conversion",[18,3333,3334],{},"Converts a standard 9-column GFF \u002F GFF3 into a 4-column \u002F 5-column BED that meets the input requirements of collinearity analysis. GFF3 downloaded from Ensembl \u002F NCBI \u002F Phytozome can be converted in one click and then uploaded.",[372,3336,3338],{"id":3337},"_103-newick-viewer-online-phylogenetic-tree-viewer","10.3 Newick Viewer: Online Phylogenetic Tree Viewer",[18,3340,3341],{},"Upload or paste a Newick tree file to view and interactively browse its topology online. Handy for quickly checking that a tree file is correct and previewing its shape.",[45,3343],{},[48,3345,3347],{"id":3346},"_11-local-deployment-docker","11. Local Deployment (Docker)",[18,3349,3350],{},"For users with higher demands on analysis scale, compute speed or data privacy.",[75,3352,3353,3359,3365,3371,3377],{},[56,3354,3355,3358],{},[21,3356,3357],{},"No limit on species count:"," the online version limits concurrent analysis to 12 species to keep shared resources fair; the local version removes this limit, enabling large-scale comparisons of dozens of species.",[56,3360,3361,3364],{},[21,3362,3363],{},"Uses local compute, faster:"," tasks run locally with no queue; computation on large datasets is significantly faster.",[56,3366,3367,3370],{},[21,3368,3369],{},"Customizable thread count:"," the thread-count parameter in each module's Advanced section is editable in the local version, so you can fully use a multi-core CPU (fixed at 48 and non-editable online).",[56,3372,3373,3376],{},[21,3374,3375],{},"Larger positive-selection scale:"," the per-cluster protein cap can be raised (limited to 100 online).",[56,3378,3379,3382],{},[21,3380,3381],{},"Data privacy \u002F offline:"," data is processed entirely locally, suitable for unpublished or sensitive data; once deployed, it can run offline.",[18,3384,3385,3388,3389,3392],{},[21,3386,3387],{},"How to deploy:"," distributed as a Docker container; installation and configuration are on the ",[21,3390,3391],{},"DOWNLOAD"," page of the homepage.",[45,3394],{},[48,3396,3398],{"id":3397},"_12-faq-troubleshooting","12. FAQ & Troubleshooting",[217,3400,3401,3414],{},[220,3402,3403],{},[223,3404,3405,3408,3411],{},[226,3406,3407],{},"Problem",[226,3409,3410],{},"Possible cause",[226,3412,3413],{},"Solution",[242,3415,3416,3427,3438,3449,3460,3471,3482,3493],{},[223,3417,3418,3421,3424],{},[247,3419,3420],{},"Upload fails or reports a format error",[247,3422,3423],{},"Special characters in FASTA headers; GFF\u002FCDS IDs do not match the protein",[247,3425,3426],{},"Preprocess and validate ID consistency with the online helper tools (Chapter 10)",[223,3428,3429,3432,3435],{},[247,3430,3431],{},"Task stays \"queued\" for a long time",[247,3433,3434],{},"High load on the public server",[247,3436,3437],{},"Wait for the email notification; for more speed, use the Docker local version",[223,3439,3440,3443,3446],{},[247,3441,3442],{},"Species-tree topology disagrees with known relationships",[247,3444,3445],{},"Too few single-copy genes (\u003C50); or insufficient method accuracy",[247,3447,3448],{},"Check the number of single-copy genes; switch to IQ-TREE 2 for higher accuracy",[223,3450,3451,3454,3457],{},[247,3452,3453],{},"Divergence-time intervals too wide or results unstable",[247,3455,3456],{},"Insufficient calibration points; MCMC did not converge",[247,3458,3459],{},"Add reliable calibration points; increase chain length \u002F complexity in MCMCTree",[223,3461,3462,3465,3468],{},[247,3463,3464],{},"No significant positive-selection signal",[247,3466,3467],{},"Too few substitution events; weak signal",[247,3469,3470],{},"Switch to aBSREL to detect episodic selection; look at genes with elevated but non-significant ω",[223,3472,3473,3476,3479],{},[247,3474,3475],{},"Positive selection reports protein count over the limit",[247,3477,3478],{},"The cluster has >100 proteins (online cap)",[247,3480,3481],{},"Choose a smaller cluster, or analyze large families with the local version",[223,3483,3484,3487,3490],{},[247,3485,3486],{},"CDS \u002F GFF reports a mismatch after upload",[247,3488,3489],{},"IDs do not match the protein file",[247,3491,3492],{},"Ensure IDs are exactly identical; validate with the online tool (see 2.2)",[223,3494,3495,3498,3501],{},[247,3496,3497],{},"Collinearity plot is very sparse",[247,3499,3500],{},"Species are too distant; GFF is incomplete",[247,3502,3503],{},"Compare more closely related species pairs; check gene coverage of the GFF",[45,3505],{},[15,3507,3508,3518],{},[18,3509,3510,3513,3514,3517],{},[21,3511,3512],{},"Getting help:"," for questions, contact us via the homepage, or see the ",[21,3515,3516],{},"DOCUMENTATION"," page.",[18,3519,3520,3523],{},[21,3521,3522],{},"Citing OrthoVenn:"," if you use OrthoVenn in your research, please cite the corresponding paper (refer to the latest release on the platform homepage).",{"title":489,"searchDepth":3525,"depth":3525,"links":3526},2,[3527,3528,3529,3537,3540,3544,3551,3552,3553,3558,3565,3570,3571],{"id":50,"depth":3525,"text":51},{"id":179,"depth":3525,"text":180},{"id":369,"depth":3525,"text":370,"children":3530},[3531,3532,3533,3534,3535,3536],{"id":374,"depth":930,"text":375},{"id":463,"depth":930,"text":464},{"id":507,"depth":930,"text":508},{"id":561,"depth":930,"text":562},{"id":580,"depth":930,"text":581},{"id":627,"depth":930,"text":628},{"id":661,"depth":3525,"text":662,"children":3538},[3539],{"id":703,"depth":930,"text":704},{"id":867,"depth":3525,"text":868,"children":3541},[3542,3543],{"id":871,"depth":930,"text":872},{"id":954,"depth":930,"text":955},{"id":981,"depth":3525,"text":982,"children":3545},[3546,3547,3548,3549,3550],{"id":1017,"depth":930,"text":1018},{"id":1465,"depth":930,"text":1466},{"id":1813,"depth":930,"text":1814},{"id":2196,"depth":930,"text":2197},{"id":2542,"depth":930,"text":2543},{"id":2785,"depth":3525,"text":2786},{"id":2828,"depth":3525,"text":2829},{"id":2846,"depth":3525,"text":2847,"children":3554},[3555,3556,3557],{"id":2850,"depth":930,"text":2851},{"id":2877,"depth":930,"text":2878},{"id":2921,"depth":930,"text":2922},{"id":2930,"depth":3525,"text":2931,"children":3559},[3560,3561,3562,3563,3564],{"id":2956,"depth":930,"text":2957},{"id":2966,"depth":930,"text":2967},{"id":2977,"depth":930,"text":2978},{"id":3091,"depth":930,"text":3092},{"id":3132,"depth":930,"text":3133},{"id":3295,"depth":3525,"text":3296,"children":3566},[3567,3568,3569],{"id":3304,"depth":930,"text":3305},{"id":3330,"depth":930,"text":3331},{"id":3337,"depth":930,"text":3338},{"id":3346,"depth":3525,"text":3347},{"id":3397,"depth":3525,"text":3398},"md",{},true,"\u002Fdocument\u002Fusermanual_en",{"title":6,"description":489},"document\u002Fusermanual_en","qFgh_8BSheEVbGotTVCtEGTTHmPP1aR4E7wKvENfRXg",{"id":3580,"title":3581,"body":3582,"description":489,"extension":3572,"meta":6960,"navigation":3574,"path":6961,"seo":6962,"stem":6963,"__hash__":6964},"content\u002Fdocument\u002Fusermanual_cn.md","OrthoVenn Plus 使用手册",{"type":8,"value":3583,"toc":6913},[3584,3587,3609,3611,3614,3728,3730,3734,3737,3742,3759,3764,3769,3894,3906,3912,3914,3918,3922,3989,4009,4013,4020,4027,4033,4048,4052,4057,4060,4097,4101,4116,4120,4159,4163,4190,4192,4196,4199,4205,4208,4222,4234,4238,4245,4263,4356,4361,4387,4396,4398,4402,4406,4451,4455,4475,4479,4490,4492,4496,4503,4507,4513,4519,4524,4526,4530,4535,4538,4558,4562,4566,4574,4626,4640,4648,4666,4673,4680,4690,4698,4707,4711,4769,4772,4776,4807,4812,4950,4952,4956,4961,4969,4973,4976,4983,5044,5060,5068,5104,5116,5119,5172,5204,5207,5211,5226,5231,5282,5284,5288,5293,5301,5305,5308,5315,5365,5375,5383,5431,5436,5483,5488,5520,5523,5567,5570,5574,5582,5586,5648,5650,5654,5659,5669,5673,5676,5689,5758,5782,5790,5793,5836,5852,5855,5859,5896,5900,5962,5964,5968,5973,5983,6007,6013,6017,6020,6028,6036,6039,6091,6102,6105,6109,6123,6127,6199,6201,6205,6233,6235,6239,6250,6252,6256,6260,6283,6287,6317,6321,6324,6326,6330,6336,6352,6356,6362,6366,6372,6376,6380,6386,6478,6489,6493,6530,6534,6538,6592,6603,6607,6678,6683,6685,6689,6695,6699,6702,6715,6720,6724,6727,6731,6734,6736,6740,6743,6775,6784,6786,6790,6895,6897],[11,3585,3581],{"id":3586},"orthovenn-plus-使用手册",[15,3588,3589],{},[18,3590,3591,3594,3595,3597,31,3600,3603,3605,3608],{},[21,3592,3593],{},"平台:"," OrthoVenn Plus · 多物种比较基因组学在线分析平台",[26,3596],{},[21,3598,3599],{},"网址:",[33,3601,35],{"href":35,"rel":3602},[37],[26,3604],{},[21,3606,3607],{},"适用对象:"," 需要做多物种比较基因组学分析的研究人员,无需编程或命令行经验。",[45,3610],{},[48,3612,3613],{"id":3613},"目录",[53,3615,3616,3622,3628,3642,3648,3686,3692,3698,3704,3710,3716,3722],{},[56,3617,3618],{},[33,3619,3621],{"href":3620},"#1-%E5%B9%B3%E5%8F%B0%E7%AE%80%E4%BB%8B","平台简介",[56,3623,3624],{},[33,3625,3627],{"href":3626},"#2-%E6%95%B0%E6%8D%AE%E5%87%86%E5%A4%87%E4%B8%8E%E6%A0%BC%E5%BC%8F%E8%A6%81%E6%B1%82","数据准备与格式要求",[56,3629,3630,3634],{},[33,3631,3633],{"href":3632},"#3-%E5%88%86%E6%9E%90%E6%B5%81%E7%A8%8B%E6%80%BB%E8%A7%88","分析流程总览",[75,3635,3636],{},[56,3637,79,3638],{},[33,3639,3641],{"href":3640},"#31-%E4%BF%AE%E6%94%B9%E5%8F%82%E6%95%B0%E4%B8%8E%E7%89%88%E6%9C%AC%E9%87%8D%E8%B7%91%E4%B8%8D%E8%A6%86%E7%9B%96%E6%97%A7%E7%BB%93%E6%9E%9Corthovenn-plus-%E7%89%B9%E8%89%B2","修改参数与版本重跑:不覆盖旧结果(特色)",[56,3643,3644],{},[33,3645,3647],{"href":3646},"#4-%E7%AC%AC%E4%B8%80%E6%AD%A5%E9%80%89%E6%8B%A9%E7%89%A9%E7%A7%8D%E4%B8%8E%E4%B8%8A%E4%BC%A0%E6%95%B0%E6%8D%AE","第一步:选择物种与上传数据",[56,3649,3650,3654],{},[33,3651,3653],{"href":3652},"#5-%E7%AC%AC%E4%BA%8C%E6%AD%A5%E9%85%8D%E7%BD%AE%E5%88%86%E6%9E%90%E6%A8%A1%E5%9D%97","第二步:配置分析模块",[75,3655,3656,3662,3668,3674,3680],{},[56,3657,100,3658],{},[33,3659,3661],{"href":3660},"#51-%E6%A8%A1%E5%9D%97%E4%B8%80--%E7%9B%B4%E7%B3%BB%E5%90%8C%E6%BA%90%E7%B0%87%E5%88%86%E6%9E%90%E5%BF%85%E9%80%89","模块一 · 直系同源簇分析(必选)",[56,3663,107,3664],{},[33,3665,3667],{"href":3666},"#52-%E6%A8%A1%E5%9D%97%E4%BA%8C--%E7%89%A9%E7%A7%8D%E8%BF%9B%E5%8C%96%E6%A0%91%E5%88%86%E6%9E%90","模块二 · 物种进化树分析",[56,3669,114,3670],{},[33,3671,3673],{"href":3672},"#53-%E6%A8%A1%E5%9D%97%E4%B8%89--%E5%88%86%E6%AD%A7%E6%97%B6%E9%97%B4%E6%8E%A8%E6%96%AD","模块三 · 分歧时间推断",[56,3675,121,3676],{},[33,3677,3679],{"href":3678},"#54-%E6%A8%A1%E5%9D%97%E5%9B%9B--%E5%9F%BA%E5%9B%A0%E5%AE%B6%E6%97%8F%E6%94%B6%E7%BC%A9%E4%B8%8E%E6%89%A9%E5%BC%A0%E5%88%86%E6%9E%90","模块四 · 基因家族收缩与扩张分析",[56,3681,128,3682],{},[33,3683,3685],{"href":3684},"#55-%E6%A8%A1%E5%9D%97%E4%BA%94--%E6%9F%93%E8%89%B2%E4%BD%93%E5%85%B1%E7%BA%BF%E6%80%A7%E5%88%86%E6%9E%90","模块五 · 染色体共线性分析",[56,3687,3688],{},[33,3689,3691],{"href":3690},"#6-%E7%AC%AC%E4%B8%89%E6%AD%A5%E9%A2%84%E8%A7%88%E4%B8%8E%E6%8F%90%E4%BA%A4","第三步:预览与提交",[56,3693,3694],{},[33,3695,3697],{"href":3696},"#7-%E6%9F%A5%E7%9C%8B%E8%BF%9B%E5%BA%A6%E4%B8%8E%E5%8E%86%E5%8F%B2%E4%BB%BB%E5%8A%A1","查看进度与历史任务",[56,3699,3700],{},[33,3701,3703],{"href":3702},"#8-%E7%BB%93%E6%9E%9C%E8%A7%A3%E8%AF%BB%E4%B8%8E%E5%AF%BC%E5%87%BA","结果解读与导出",[56,3705,3706],{},[33,3707,3709],{"href":3708},"#9-%E5%9F%BA%E5%9B%A0%E7%B0%87%E6%AD%A3%E9%80%89%E6%8B%A9%E5%8E%8B%E5%8A%9B%E5%88%86%E6%9E%90%E7%BB%93%E6%9E%9C%E7%94%9F%E6%88%90%E5%90%8E%E6%8C%89%E9%9C%80%E8%BF%90%E8%A1%8C","基因簇正选择压力分析(结果生成后按需运行)",[56,3711,3712],{},[33,3713,3715],{"href":3714},"#10-%E5%9C%A8%E7%BA%BF%E8%BE%85%E5%8A%A9%E5%B7%A5%E5%85%B7web-tools","在线辅助工具(Web Tools)",[56,3717,3718],{},[33,3719,3721],{"href":3720},"#11-%E6%9C%AC%E5%9C%B0%E9%83%A8%E7%BD%B2%E7%89%88%E6%9C%ACdocker","本地部署版本(Docker)",[56,3723,3724],{},[33,3725,3727],{"href":3726},"#12-%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98%E4%B8%8E%E8%A7%A3%E5%86%B3%E6%96%B9%E6%A1%88","常见问题与解决方案",[45,3729],{},[48,3731,3733],{"id":3732},"_1-平台简介","1. 平台简介",[18,3735,3736],{},"OrthoVenn Plus 把比较基因组学分析中原本需要分别安装、手动衔接的多种算法,整合成一条完整的在线分析流程。你只需在网页上选物种、调几个关键参数、点提交,就能完成从直系同源基因鉴定到正选择检测的全链条分析。",[18,3738,3739],{},[21,3740,3741],{},"OrthoVenn Plus 能帮你回答这些科学问题:",[75,3743,3744,3747,3750,3753,3756],{},[56,3745,3746],{},"我的物种和近缘物种共享了哪些基因?哪些是它独有的?",[56,3748,3749],{},"这些物种之间的进化关系如何?它们大约在什么时候分化?",[56,3751,3752],{},"进化过程中,哪些基因家族发生了显著的扩张或收缩?",[56,3754,3755],{},"是否有基因在特定谱系上经历了正选择(适应性进化)?",[56,3757,3758],{},"不同物种的基因组在染色体结构上是否保持共线性?",[18,3760,3761],{},[21,3762,3763],{},"六类分析及其逻辑关系:",[15,3765,3766],{},[18,3767,3768],{},"【此处插入图:六模块流程示意图(用文章的流程图 \u002F 技术路线图)】",[217,3770,3771,3790],{},[220,3772,3773],{},[223,3774,3775,3778,3781,3784,3787],{},[226,3776,3777],{},"分析",[226,3779,3780],{},"作用",[226,3782,3783],{},"输入",[226,3785,3786],{},"主要输出",[226,3788,3789],{},"运行方式",[242,3791,3792,3809,3826,3842,3858,3874],{},[223,3793,3794,3797,3800,3803,3806],{},[247,3795,3796],{},"① 直系同源簇分析",[247,3798,3799],{},"跨物种基因分组,提取单拷贝直系同源基因,功能注释",[247,3801,3802],{},"蛋白序列",[247,3804,3805],{},"基因簇、泛基因组结构、单拷贝基因集、GO 注释",[247,3807,3808],{},"必选,随任务运行",[223,3810,3811,3814,3817,3820,3823],{},[247,3812,3813],{},"② 物种进化树分析",[247,3815,3816],{},"构建物种进化树",[247,3818,3819],{},"① 的单拷贝基因",[247,3821,3822],{},"带支持度的物种树",[247,3824,3825],{},"随任务运行",[223,3827,3828,3831,3834,3837,3840],{},[247,3829,3830],{},"③ 分歧时间推断",[247,3832,3833],{},"估算物种分化的绝对年代",[247,3835,3836],{},"② 的物种树 + 化石校准点",[247,3838,3839],{},"含时间标注的时间树",[247,3841,3825],{},[223,3843,3844,3847,3850,3853,3856],{},[247,3845,3846],{},"④ 基因家族收缩与扩张分析",[247,3848,3849],{},"检测显著扩张\u002F收缩的基因家族",[247,3851,3852],{},"③ 的时间树 + ① 的基因计数",[247,3854,3855],{},"各分支上的扩张\u002F收缩事件",[247,3857,3825],{},[223,3859,3860,3863,3866,3869,3872],{},[247,3861,3862],{},"⑤ 染色体共线性分析",[247,3864,3865],{},"检测染色体结构保守性",[247,3867,3868],{},"GFF 注释",[247,3870,3871],{},"共线性区块、桑基图",[247,3873,3825],{},[223,3875,3876,3879,3882,3885,3888],{},[247,3877,3878],{},"⑥ 基因簇正选择压力分析",[247,3880,3881],{},"检测经历适应性进化的基因",[247,3883,3884],{},"基因簇 + CDS 序列",[247,3886,3887],{},"正选择分支\u002F位点",[247,3889,3890,3893],{},[21,3891,3892],{},"结果生成后,针对感兴趣的簇按需运行","(见第 9 章)",[15,3895,3896],{},[18,3897,3898,3901,3902,3905],{},[21,3899,3900],{},"为什么正选择是\"按需运行\"?"," 前五类分析对整个数据集自动跑;而正选择针对的是",[21,3903,3904],{},"单个你感兴趣的基因簇","(例如某个显著扩张的家族),需要你先看到结果、再挑簇来分析,因此它不在提交向导里,而是在结果页对每个簇单独触发。",[18,3907,3908,3911],{},[21,3909,3910],{},"内置物种数据库:"," 平台内置植物、动物、真菌、原生生物、古菌、细菌六大类共 1,566 个物种、约 2,580 万条蛋白序列(数据来源:Ensembl 2025)。所有序列已完成格式统一、去冗余和 ID 标准化。你可以直接从界面选物种开始分析,无需自行下载和准备数据。",[45,3913],{},[48,3915,3917],{"id":3916},"_2-数据准备与格式要求","2. 数据准备与格式要求",[372,3919,3921],{"id":3920},"_21-三类输入文件","2.1 三类输入文件",[217,3923,3924,3940],{},[220,3925,3926],{},[223,3927,3928,3931,3934,3937],{},[226,3929,3930],{},"文件类型",[226,3932,3933],{},"格式",[226,3935,3936],{},"用途",[226,3938,3939],{},"何时需要",[242,3941,3942,3961,3975],{},[223,3943,3944,3949,3952,3955],{},[247,3945,3946],{},[21,3947,3948],{},"蛋白质序列",[247,3950,3951],{},"FASTA(.fa \u002F .fasta)",[247,3953,3954],{},"直系同源簇分析的核心输入",[247,3956,3957,3960],{},[21,3958,3959],{},"必需","(所有分析的基础)",[223,3962,3963,3966,3969,3972],{},[247,3964,3965],{},"基因注释",[247,3967,3968],{},"GFF3(.gff \u002F .gff3)或 BED(.bed)",[247,3970,3971],{},"提供基因在染色体上的位置",[247,3973,3974],{},"染色体共线性分析",[223,3976,3977,3980,3983,3986],{},[247,3978,3979],{},"CDS 核苷酸序列",[247,3981,3982],{},"FASTA(.cds.fa)",[247,3984,3985],{},"提供密码子水平信息(dN\u002FdS)",[247,3987,3988],{},"基因簇正选择压力分析",[15,3990,3991,4002],{},[18,3992,3993,3994,3997,3998,4001],{},"蛋白质序列是",[21,3995,3996],{},"唯一必需","的文件。GFF 和 CDS 都是可选的——只在你需要做共线性或正选择时才用到。两者既可以在上传物种时一起提供,也可以",[21,3999,4000],{},"之后再补充","(后补只解锁对应分析,不影响已完成的其它结果)。",[18,4003,4004,4005,4008],{},"从内置数据库选的物种,这三类文件都已就绪,",[21,4006,4007],{},"完全不用上传","。",[372,4010,4012],{"id":4011},"_22-最重要的一条三类文件的基因-id-必须一致","2.2 最重要的一条:三类文件的基因 ID 必须一致",[18,4014,4015,4016,4019],{},"这是上传分析失败的",[21,4017,4018],{},"头号原因",",务必先看。",[18,4021,4022,4023,4026],{},"同一个物种的蛋白质 FASTA、CDS、GFF 三个文件里,描述同一个基因时用的 ID 必须",[21,4024,4025],{},"完全相同",":",[480,4028,4031],{"className":4029,"code":4030,"language":485},[483],"蛋白质 FASTA  >GeneA001   MSTDVPAK...\nCDS  FASTA    >GeneA001   ATGTCTACT...\nGFF  注释      ... gene_id \"GeneA001\" ...\n",[487,4032,4030],{"__ignoreMap":489},[75,4034,4035,4038,4041],{},[56,4036,4037],{},"如果 CDS 的 ID 和蛋白不一致 → 正选择分析无法把蛋白和密码子对应起来,该簇会被标记为无法分析。",[56,4039,4040],{},"如果 GFF 的 ID 和蛋白不一致 → 共线性图为空或绘制失败。",[56,4042,4043,4044,4047],{},"平台",[21,4045,4046],{},"不会靠字符串猜测","这种对应关系,所以请在上传前确认 ID 统一(可用第 10 章的预处理工具一键校验)。",[372,4049,4051],{"id":4050},"_23-蛋白质-fasta-格式","2.3 蛋白质 FASTA 格式",[480,4053,4055],{"className":4054,"code":512,"language":485},[483],[487,4056,512],{"__ignoreMap":489},[18,4058,4059],{},"注意事项:",[75,4061,4062,4072,4084],{},[56,4063,4064,4065,4067,4068,4071],{},"序列 ID(",[487,4066,525],{}," 后的名称)在",[21,4069,4070],{},"每个物种文件内","必须唯一。",[56,4073,4074,4075,4008],{},"ID 尽量简洁,",[21,4076,4077,4078,4080,4081,4083],{},"避免空格、斜杠 ",[487,4079,538],{},"、引号 ",[487,4082,542],{}," 等特殊字符",[56,4085,4086,4087,4089,4090,4092,4093,4096],{},"不同物种用不同文件名,建议用物种拉丁文全称命名(如 ",[487,4088,549],{},"、",[487,4091,553],{},")。",[21,4094,4095],{},"文件名会作为物种名显示在结果里",",请用有意义的命名。",[372,4098,4100],{"id":4099},"_24-cds-核苷酸序列格式","2.4 CDS 核苷酸序列格式",[75,4102,4103,4110],{},[56,4104,4105,4106,4109],{},"基因 ID 必须与对应蛋白质 FASTA 中的 ID ",[21,4107,4108],{},"完全一致","(见 2.2)。",[56,4111,4112,4113,4008],{},"序列最好是完整密码子(长度为 3 的倍数);若不是 3 的倍数,平台会自动处理,",[21,4114,4115],{},"无需手动补齐",[372,4117,4119],{"id":4118},"_25-gff-bed-注释格式","2.5 GFF \u002F BED 注释格式",[75,4121,4122,4125,4128,4141],{},[56,4123,4124],{},"需包含基因的位置信息:染色体、基因 ID、起始、终止、链方向。",[56,4126,4127],{},"基因 ID 必须与蛋白质 FASTA 中的 ID 一致(见 2.2)。",[56,4129,4130,4131,4089,4133,4089,4135,4137,4138,4140],{},"支持 ",[487,4132,595],{},[487,4134,598],{},[487,4136,601],{},"。从 Ensembl \u002F NCBI \u002F Phytozome 下载的标准 GFF3,可用第 10 章的 ",[21,4139,605],{}," 工具一键转换。",[56,4142,4143,4146,4147,4150,4151,4154,4155,4158],{},[21,4144,4145],{},"共线性分析专用要求:"," 务必使用",[21,4148,4149],{},"染色体级别","的注释,且",[21,4152,4153],{},"染色体数目种类 \u003C 50","。若注释为 scaffold \u002F contig 级别(片段很多),请先",[21,4156,4157],{},"只保留最长的前 50 条片段","再上传,否则共线性图难以解读(详见 5.5)。",[372,4160,4162],{"id":4161},"_26-数据预处理工具强烈推荐","2.6 数据预处理工具(强烈推荐)",[15,4164,4165,4171,4187],{},[18,4166,4167,4168,4170],{},"上传前,建议用平台 ",[21,4169,636],{}," 入口下的在线辅助工具检查输入文件。它能自动:",[75,4172,4173,4176,4179,4182],{},[56,4174,4175],{},"检测并报告重复的序列 ID;",[56,4177,4178],{},"移除非法字符、统一换行符;",[56,4180,4181],{},"把 GFF3 转换为平台所需的 BED 格式;",[56,4183,4184,4008],{},[21,4185,4186],{},"校验蛋白质 \u002F CDS \u002F GFF 三者之间的 ID 一致性",[18,4188,4189],{},"该工具也提供可下载的本地版(Windows \u002F macOS \u002F Linux),见主页 Resources 页面。",[45,4191],{},[48,4193,4195],{"id":4194},"_3-分析流程总览","3. 分析流程总览",[18,4197,4198],{},"整个分析分三步走,加一个可选的按需分析:",[480,4200,4203],{"className":4201,"code":4202,"language":485},[483],"Step 1 选择物种 \u002F 上传数据      Step 2 配置分析模块        Step 3 预览并提交\n   ├ 内置物种或上传蛋白    ──▶   ├ 模块一 直系同源(必选)  ──▶   提交后台运行\n   └ 可选:GFF \u002F CDS           ├ 模块二~五 按需开关             邮件通知结果\n                              └ 调 Basic 参数即可\n\n                          ────────  结果生成后  ────────▶\n                          第 9 章:针对感兴趣的基因簇,按需运行正选择分析\n",[487,4204,4202],{"__ignoreMap":489},[18,4206,4207],{},"每个模块的参数分两层显示:",[75,4209,4210,4216],{},[56,4211,4212,4215],{},[21,4213,4214],{},"Basic(默认展开):"," 真正需要你决策的 2–4 个关键参数。",[56,4217,4218,4221],{},[21,4219,4220],{},"Advanced(默认折叠):"," 影响精度或耗时的调优参数,绝大多数情况保持默认即可。",[15,4223,4224],{},[18,4225,4226,4229,4230,4233],{},[21,4227,4228],{},"关于线程数(Threads):"," 每个模块的 Advanced 里有线程数参数,",[21,4231,4232],{},"线上版固定为 48 且不可修改","(由平台统一调度资源)。如需自定义线程加速大规模分析,请下载本地部署版(见第 11 章)。鼠标悬停该参数会给出下载提示。",[372,4235,4237],{"id":4236},"_31-修改参数与版本重跑不覆盖旧结果orthovenn-plus-特色","3.1 修改参数与版本重跑:不覆盖旧结果(OrthoVenn Plus 特色)",[18,4239,4240,4241,4244],{},"OrthoVenn Plus 的后台架构经过重新设计,",[21,4242,4243],{},"重跑分析不会覆盖旧结果","——这是本版本的一大特色,让你能够快速、高效地探索不同参数对结果的影响。",[18,4246,4247,4248,4251,4252,4255,4256,4259,4260,4008],{},"每当你修改一个",[21,4249,4250],{},"会影响计算结果","的参数,平台会为受影响的模块生成一个",[21,4253,4254],{},"新版本",",同时",[21,4257,4258],{},"保留旧版本供对照",";而只影响展示的调整(阈值、颜色、排序、窗口大小等)只刷新视图、瞬时完成,",[21,4261,4262],{},"不触发任何重算",[217,4264,4265,4278],{},[220,4266,4267],{},[223,4268,4269,4272,4275],{},[226,4270,4271],{},"你的修改",[226,4273,4274],{},"通常影响",[226,4276,4277],{},"是否需要重跑",[242,4279,4280,4291,4302,4313,4323,4334,4345],{},[223,4281,4282,4285,4288],{},[247,4283,4284],{},"GO p-value 过滤、表格排序、图形颜色",[247,4286,4287],{},"只影响展示",[247,4289,4290],{},"不需要(瞬时刷新视图)",[223,4292,4293,4296,4299],{},[247,4294,4295],{},"直系同源簇算法或阈值",[247,4297,4298],{},"影响 orthogroups 及几乎全部下游",[247,4300,4301],{},"重跑当前模块 + 全部下游",[223,4303,4304,4307,4310],{},[247,4305,4306],{},"物种树算法或定根方式",[247,4308,4309],{},"影响时间树和 CAFE5",[247,4311,4312],{},"重跑物种树及其下游",[223,4314,4315,4318,4320],{},[247,4316,4317],{},"分歧时间校准点 \u002F Root Age",[247,4319,4309],{},[247,4321,4322],{},"重跑时间树和 CAFE5",[223,4324,4325,4328,4331],{},[247,4326,4327],{},"CAFE5 的 k 值",[247,4329,4330],{},"只影响扩张\u002F收缩分析",[247,4332,4333],{},"只重跑 CAFE5",[223,4335,4336,4339,4342],{},[247,4337,4338],{},"正选择方法或前景枝",[247,4340,4341],{},"只影响所选 cluster 的正选择结果",[247,4343,4344],{},"只重跑对应的正选择任务",[223,4346,4347,4350,4353],{},[247,4348,4349],{},"共线性展示窗口大小",[247,4351,4352],{},"只影响视图",[247,4354,4355],{},"通常不需要重跑",[18,4357,4358],{},[21,4359,4360],{},"这套机制带来的优势:",[75,4362,4363,4369,4375,4381],{},[56,4364,4365,4368],{},[21,4366,4367],{},"不丢历史、可对照:"," 旧结果始终保留,你可以并排比较不同参数(如 Inflation 1.5 vs 1.2、CAFE5 k=2 vs Base、不同建树方法)的结果,从中挑出最合理的。",[56,4370,4371,4374],{},[21,4372,4373],{},"只算该算的、省时省算力:"," 平台只重跑受影响的模块及其下游,未受影响的上游结果自动复用,无需从头再来。",[56,4376,4377,4380],{},[21,4378,4379],{},"完全可复现:"," 每个结果版本都绑定「输入数据 + 标准化参数 + 工具版本」三要素,任何人都能据此精确复现。",[56,4382,4383,4386],{},[21,4384,4385],{},"展示调整零等待:"," 阈值、颜色、排序、窗口这类只影响视图的改动瞬时生效,不消耗计算资源。",[15,4388,4389],{},[18,4390,4391,4392,4395],{},"提示:在结果页可以看到每个模块的",[21,4393,4394],{},"版本列表",",切换版本即可对照不同参数下的结果。",[45,4397],{},[48,4399,4401],{"id":4400},"_4-第一步选择物种与上传数据","4. 第一步:选择物种与上传数据",[372,4403,4405],{"id":4404},"_41-操作步骤","4.1 操作步骤",[53,4407,4408,4416],{},[56,4409,4410,4411,4413,4414,4008],{},"进入 OrthoVenn Plus,点 ",[21,4412,880],{}," 新建项目,进入 ",[21,4415,884],{},[56,4417,4418,4419,4421,4424,4432,4434,4437],{},"选择数据来源:",[26,4420],{},[21,4422,4423],{},"方式一 · 从内置库选择(Cloud Species)",[75,4425,4426,4429],{},[56,4427,4428],{},"在搜索框输入物种拉丁名(支持模糊搜索),点击即可加入分析列表。",[56,4430,4431],{},"无需上传任何文件,序列已标准化。",[26,4433],{},[21,4435,4436],{},"方式二 · 上传自定义数据(Custom Upload)",[75,4438,4439,4445],{},[56,4440,4441,4442,4092],{},"把蛋白质 FASTA 拖入上传区,或点击选择文件,支持同时上传多个物种(",[21,4443,4444],{},"在线版最多 12 个物种",[56,4446,4447,4450],{},[21,4448,4449],{},"可选:"," 如果之后要做共线性或正选择,可在此一并上传对应的 GFF \u002F CDS;也可以之后再补。",[18,4452,4453],{},[924,4454],{"alt":926,"src":927},[53,4456,4457,4463,4469],{"start":930},[56,4458,4459,4460,4462],{},"右侧 ",[21,4461,936],{}," 区会列出已添加物种及其文件状态(蛋白 \u002F GFF \u002F CDS)。请确认数量和文件类型无误。",[56,4464,4465,4466,4468],{},"想先练手,可点 ",[21,4467,943],{}," 载入示例数据。",[56,4470,4471,4472,4474],{},"确认后点 ",[21,4473,950],{}," 进入第二步。",[372,4476,4478],{"id":4477},"_42-注意事项","4.2 注意事项",[75,4480,4481,4484,4487],{},[56,4482,4483],{},"两种方式可混用:既能选内置物种,又能上传自定义物种。",[56,4485,4486],{},"GFF \u002F CDS 是可选的,留空不影响直系同源、进化树等核心分析。",[56,4488,4489],{},"文件格式有误时,平台会在上传时提示。",[45,4491],{},[48,4493,4495],{"id":4494},"_5-第二步配置分析模块","5. 第二步:配置分析模块",[18,4497,4498,4499,4502],{},"左侧是分析模块列表,",[21,4500,4501],{},"模块一(直系同源簇分析)为必选",",模块二~五可按需开关。点模块名切换右侧参数面板。",[18,4504,4505],{},[924,4506],{"alt":994,"src":995},[18,4508,4509,4512],{},[21,4510,4511],{},"模块依赖关系","(开启下游模块会自动包含其依赖):",[480,4514,4517],{"className":4515,"code":4516,"language":485},[483],"模块一(直系同源) ──▶ 模块二(进化树) ──▶ 模块三(分歧时间) ──▶ 模块四(基因家族)\n模块五(共线性) 独立,只需 GFF\n",[487,4518,4516],{"__ignoreMap":489},[15,4520,4521],{},[18,4522,4523],{},"提示:正选择不在此处配置。它会在结果生成后,在结果页针对单个基因簇按需运行(见第 9 章)。",[45,4525],{},[372,4527,4529],{"id":4528},"_51-模块一-直系同源簇分析必选","5.1 模块一 · 直系同源簇分析(必选)",[18,4531,4532],{},[21,4533,4534],{},"这个模块帮你做什么?",[18,4536,4537],{},"它是整条流程的起点:把所有物种的蛋白质两两比对,按相似性分到不同的\"簇\"(orthogroups)。同一簇内的基因被认为源自共同祖先。产出:",[75,4539,4540,4546,4552],{},[56,4541,4542,4545],{},[21,4543,4544],{},"泛基因组结构:"," 哪些簇是所有物种共有的(核心基因组),哪些是某些物种独有的。",[56,4547,4548,4551],{},[21,4549,4550],{},"单拷贝直系同源基因:"," 在每个物种中都恰好一个拷贝的基因集——构建物种树最理想的数据。",[56,4553,4554,4557],{},[21,4555,4556],{},"GO 功能注释:"," 每个簇的功能分类,帮助理解其生物学意义。",[18,4559,4560],{},[924,4561],{"alt":1051,"src":1052},[1054,4563,4565],{"id":4564},"basic-参数","Basic 参数",[18,4567,4568,4571,4572],{},[21,4569,4570],{},"直系同源算法(Algorithm)"," — 默认 ",[21,4573,1066],{},[217,4575,4576,4589],{},[220,4577,4578],{},[223,4579,4580,4583,4586],{},[226,4581,4582],{},"算法",[226,4584,4585],{},"特点",[226,4587,4588],{},"适用场景",[242,4590,4591,4604,4615],{},[223,4592,4593,4598,4601],{},[247,4594,4595,4597],{},[21,4596,1066],{},"(推荐,默认)",[247,4599,4600],{},"基于基因树-物种树协调,能区分直系同源(物种形成产生)与旁系同源(基因复制产生),精度高",[247,4602,4603],{},"绝大多数比较基因组学分析",[223,4605,4606,4609,4612],{},[247,4607,4608],{},"OrthoMCL(经典)",[247,4610,4611],{},"基于全局序列比对相似性 + 马尔可夫聚类(MCL)的经典方法,通过 Inflation 值控制聚类粒度",[247,4613,4614],{},"通用场景,物种间进化距离中等;需要与经典文献结果对照时",[223,4616,4617,4620,4623],{},[247,4618,4619],{},"SonicParanoid2(高级)",[247,4621,4622],{},"针对大规模数据优化的超快算法",[247,4624,4625],{},"物种很多(>30)或快速探索",[15,4627,4628],{},[18,4629,4630,4633,4634,4636,4637,4639],{},[21,4631,4632],{},"如何选择?"," 不确定就用 ",[21,4635,1066],{},"——它是当前精度最高、最通用的方法,能明确区分直系\u002F旁系同源。若你想用最经典的 MCL 聚类方法、或需要和已有 OrthoMCL 结果对照,可选 ",[21,4638,1132],{},"(注意它对 Inflation 值较敏感,见下)。只有当物种数很多、想快速得到初步结果时,才考虑 SonicParanoid2。",[18,4641,4642,4571,4645],{},[21,4643,4644],{},"搜索灵敏度(Search Sensitivity)",[21,4646,4647],{},"标准(diamond)",[75,4649,4650,4656],{},[56,4651,4652,4655],{},[21,4653,4654],{},"标准(快):"," 适合多数场景,速度快。",[56,4657,4658,4661,4662,4665],{},[21,4659,4660],{},"超灵敏(diamond_ultra_sens,慢):"," 能找到更多远距离同源关系,适合",[21,4663,4664],{},"进化距离较远","的物种(如跨门类比较),但更耗时。",[15,4667,4668],{},[18,4669,4670,4672],{},[21,4671,4632],{}," 近缘物种用标准即可;物种亲缘较远、担心漏掉远距离同源时,选超灵敏。",[18,4674,4675,4571,4678],{},[21,4676,4677],{},"Inflation 值(MCL 聚类松紧度)",[21,4679,1174],{},[75,4681,4682,4685],{},[56,4683,4684],{},"用于基于 MCL 的聚类(OrthoMCL 以及 OrthoFinder 内部的 MCL 步骤)。控制簇的\"松紧\":值越大,簇越小越紧密;越小,簇越大越宽松。",[56,4686,4687,4689],{},[21,4688,4632],{}," 多数情况保持 1.5。若发现明显相关的基因被分到了不同簇,可降到 1.2;若一个簇里混了功能差异很大的基因,可升到 2.0。选择 OrthoMCL 时该值影响更明显。",[18,4691,4692,4571,4695],{},[21,4693,4694],{},"功能注释(Run Annotation)",[21,4696,4697],{},"开启",[75,4699,4700],{},[56,4701,4702,4703,4706],{},"开启后产出 GO 功能注释和富集分析。",[21,4704,4705],{},"强烈建议保持开启",",为下游功能解读打基础;只在想省时间时关闭。",[1054,4708,4710],{"id":4709},"advanced-参数","Advanced 参数",[217,4712,4713,4726],{},[220,4714,4715],{},[223,4716,4717,4720,4723],{},[226,4718,4719],{},"参数",[226,4721,4722],{},"默认",[226,4724,4725],{},"说明",[242,4727,4728,4738,4748,4759],{},[223,4729,4730,4733,4735],{},[247,4731,4732],{},"比对 E-value",[247,4734,1230],{},[247,4736,4737],{},"同源判定的统计显著性阈值。更宽松(1e-2)适合远缘物种;更严格(1e-10)适合近缘精细比较。多数情况无需改",[223,4739,4740,4743,4745],{},[247,4741,4742],{},"注释数据库",[247,4744,1241],{},[247,4746,4747],{},"功能注释的参考库",[223,4749,4750,4753,4756],{},[247,4751,4752],{},"GO 多重检验校正",[247,4754,4755],{},"BH(FDR)",[247,4757,4758],{},"富集分析的 p 值校正方法",[223,4760,4761,4764,4766],{},[247,4762,4763],{},"线程数",[247,4765,1263],{},[247,4767,4768],{},"线上只读,本地可改",[1054,4770,4771],{"id":4771},"结果解读",[18,4773,4774],{},[924,4775],{"alt":1275,"src":1276},[75,4777,4778,4784,4790,4795,4801],{},[56,4779,4780,4783],{},[21,4781,4782],{},"UpSet 交集图:"," 展示物种间共享与特有的基因簇分布;点交集条可查看该交集的具体簇。",[56,4785,4786,4789],{},[21,4787,4788],{},"Venn 图:"," 经典韦恩图(适合 ≤6 个物种)。",[56,4791,4792,4794],{},[21,4793,1301],{}," 矩阵展示每个簇在各物种中的拷贝数,可按列排序找特定分布模式的簇。",[56,4796,4797,4800],{},[21,4798,4799],{},"泛基因组统计:"," 每个物种的蛋白总数、簇数、单拷贝基因数、单体基因(singleton)数。",[56,4802,4803,4806],{},[21,4804,4805],{},"单簇详情:"," 点任意簇 ID,可看物种组成、多序列比对、保守基序、簇内进化树、相似性网络、簇间关系、GO 富集——也是从这里发起正选择分析(见第 9 章)。",[18,4808,4809],{},[21,4810,4811],{},"术语速览(基因簇分类):",[217,4813,4814,4824],{},[220,4815,4816],{},[223,4817,4818,4821],{},[226,4819,4820],{},"术语",[226,4822,4823],{},"含义",[242,4825,4826,4836,4854,4871,4885,4899,4912,4922,4932],{},[223,4827,4828,4833],{},[247,4829,4830],{},[21,4831,4832],{},"Orthogroup \u002F Cluster(簇)",[247,4834,4835],{},"一组被判定为源自共同祖先基因的同源基因",[223,4837,4838,4843],{},[247,4839,4840,4842],{},[21,4841,1350],{},"(单拷贝核心簇)",[247,4844,4845,4846,4849,4850,4853],{},"在",[21,4847,4848],{},"全部物种","中",[21,4851,4852],{},"各只有一个拷贝","的直系同源簇——构建物种树最理想的数据",[223,4855,4856,4861],{},[247,4857,4858,4860],{},[21,4859,1369],{},"(多拷贝核心簇)",[247,4862,4863,4864,4866,4867,4870],{},"包含",[21,4865,4848],{},"、但至少在某些物种中有",[21,4868,4869],{},"多个拷贝","的簇",[223,4872,4873,4878],{},[247,4874,4875],{},[21,4876,4877],{},"Species-specific(物种特异簇)",[247,4879,4880,4881,4884],{},"簇内基因",[21,4882,4883],{},"全部来自同一个物种",",常与该物种的特有功能相关",[223,4886,4887,4892],{},[247,4888,4889],{},[21,4890,4891],{},"Other \u002F Orthoer(其他簇)",[247,4893,4894,4895,4898],{},"只包含",[21,4896,4897],{},"部分物种","的(多拷贝)直系同源簇",[223,4900,4901,4906],{},[247,4902,4903],{},[21,4904,4905],{},"Singletons(单例基因)",[247,4907,4908,4911],{},[21,4909,4910],{},"未参与聚类","的孤立基因(没有找到同源关系)",[223,4913,4914,4919],{},[247,4915,4916],{},[21,4917,4918],{},"Core genome(核心基因组)",[247,4920,4921],{},"所有物种共有的簇集合",[223,4923,4924,4929],{},[247,4925,4926],{},[21,4927,4928],{},"Pan-genome(泛基因组)",[247,4930,4931],{},"所有物种的全部簇的并集",[223,4933,4934,4939],{},[247,4935,4936],{},[21,4937,4938],{},"直系同源 vs 旁系同源",[247,4940,4941,4942,4945,4946,4949],{},"直系同源(ortholog)由",[21,4943,4944],{},"物种形成","产生,旁系同源(paralog)由",[21,4947,4948],{},"基因复制","产生",[45,4951],{},[372,4953,4955],{"id":4954},"_52-模块二-物种进化树分析","5.2 模块二 · 物种进化树分析",[18,4957,4958,4960],{},[21,4959,4534],{}," 用模块一的单拷贝直系同源基因构建物种进化树。这棵树是分歧时间、基因家族动态、正选择等下游分析的\"进化骨架\"。",[18,4962,4963,4966,4967,4008],{},[21,4964,4965],{},"开启方式:"," 在左侧勾选 ",[21,4968,1480],{},[18,4970,4971],{},[924,4972],{"alt":1486,"src":1487},[1054,4974,4565],{"id":4975},"basic-参数-1",[18,4977,4978,4571,4981],{},[21,4979,4980],{},"建树方法(Tree Method)",[21,4982,1498],{},[217,4984,4985,5001],{},[220,4986,4987],{},[223,4988,4989,4992,4995,4998],{},[226,4990,4991],{},"方法",[226,4993,4994],{},"速度",[226,4996,4997],{},"精度",[226,4999,5000],{},"适用",[242,5002,5003,5019,5032],{},[223,5004,5005,5010,5013,5016],{},[247,5006,5007,5009],{},[21,5008,1498],{},"(默认)",[247,5011,5012],{},"最快(分钟级)",[247,5014,5015],{},"良好",[247,5017,5018],{},"在线分析、快速预览、物种数多",[223,5020,5021,5023,5026,5029],{},[247,5022,1538],{},[247,5024,5025],{},"较慢(数十分钟起)",[247,5027,5028],{},"高",[247,5030,5031],{},"追求发表级最高精度;内置 ModelFinder 自动选模型",[223,5033,5034,5036,5039,5041],{},[247,5035,1552],{},[247,5037,5038],{},"较慢",[247,5040,5028],{},[247,5042,5043],{},"严格最大似然法,精度与 IQ-TREE 2 相当;适合用不同方法交叉验证拓扑",[15,5045,5046],{},[18,5047,5048,5050,5051,5053,5054,5056,5057,5059],{},[21,5049,4632],{}," 在线分析推荐 ",[21,5052,1498],{},",速度快、结果对多数研究已足够。若你要发表级的最高精度、且能接受更长等待,选 ",[21,5055,1538],{},"(内置自动模型选择);若想用另一种主流最大似然实现交叉验证你的树,可用 ",[21,5058,1552],{},"。大规模分析建议用本地版。",[18,5061,5062,4571,5065],{},[21,5063,5064],{},"根位置确定方法(Root Method)",[21,5066,5067],{},"Midpoint(中点根)",[217,5069,5070,5080],{},[220,5071,5072],{},[223,5073,5074,5076,5078],{},[226,5075,4991],{},[226,5077,4823],{},[226,5079,5000],{},[242,5081,5082,5094],{},[223,5083,5084,5088,5091],{},[247,5085,5086,5009],{},[21,5087,1584],{},[247,5089,5090],{},"把根放在最长路径中点",[247,5092,5093],{},"不确定外群时,快速方便",[223,5095,5096,5098,5101],{},[247,5097,1615],{},[247,5099,5100],{},"指定一个已知外群物种作为根",[247,5102,5103],{},"有明确外群时,结果更可靠",[15,5105,5106],{},[18,5107,5108,5109,5112,5113,5115],{},"选 Outgroup 时,下方会出现",[21,5110,5111],{},"外群物种选择器","。外群应是与所有研究物种都有明确亲缘、但位于研究类群之外的物种(如研究蔷薇科时用葡萄 ",[1632,5114,1634],{}," 作外群)。",[1054,5117,4710],{"id":5118},"advanced-参数-1",[217,5120,5121,5131],{},[220,5122,5123],{},[223,5124,5125,5127,5129],{},[226,5126,4719],{},[226,5128,4722],{},[226,5130,4725],{},[242,5132,5133,5143,5154,5164],{},[223,5134,5135,5138,5140],{},[247,5136,5137],{},"多序列比对算法",[247,5139,1660],{},[247,5141,5142],{},"大数据可选 MUSCLE v5 Super5",[223,5144,5145,5148,5151],{},[247,5146,5147],{},"替换模型",[247,5149,5150],{},"自动检测(MFP)\u002F LG+CAT",[247,5152,5153],{},"见下方说明",[223,5155,5156,5159,5161],{},[247,5157,5158],{},"比对修剪",[247,5160,1682],{},[247,5162,5163],{},"可选 gappyout \u002F none(none 仅专家)",[223,5165,5166,5168,5170],{},[247,5167,4763],{},[247,5169,1263],{},[247,5171,4768],{},[15,5173,5174,5180],{},[18,5175,5176,5179],{},[21,5177,5178],{},"关于替换模型与自动检测(MFP):"," 替换模型描述了氨基酸在进化中替换的频率与模式,选错可能导致错误的拓扑。",[75,5181,5182,5196],{},[56,5183,5184,5185,5187,5188,5191,5192,5195],{},"选 ",[21,5186,1709],{}," 时,默认用 ",[21,5189,5190],{},"MFP(ModelFinder Plus)自动检测",":算法依据信息论准则(BIC \u002F AIC)从一系列候选模型中自动挑出最适合你数据的模型,",[21,5193,5194],{},"无需手动指定","。若你已知合适的模型,也可手动填写以省去检测时间。",[56,5197,5184,5198,5200,5201,5203],{},[21,5199,1498],{}," 时使用固定的 ",[21,5202,1726],{}," 模型(也可切换 WAG \u002F JTT),不做模型搜索,因此更快。",[1054,5205,4771],{"id":5206},"结果解读-1",[18,5208,5209],{},[924,5210],{"alt":1735,"src":1736},[75,5212,5213,5223],{},[56,5214,5215,5216,5219,5220,4008],{},"悬停树节点可查看 bootstrap 支持度:",[21,5217,5218],{},"≥95% 高度可靠",",70%–95% 中等,",[21,5221,5222],{},"\u003C70% 需谨慎解读",[56,5224,5225],{},"树可导出为 Newick 文本和 SVG \u002F PNG 图。",[18,5227,5228],{},[21,5229,5230],{},"术语速览:",[217,5232,5233,5241],{},[220,5234,5235],{},[223,5236,5237,5239],{},[226,5238,4820],{},[226,5240,4823],{},[242,5242,5243,5253,5263,5272],{},[223,5244,5245,5250],{},[247,5246,5247],{},[21,5248,5249],{},"Topology(拓扑)",[247,5251,5252],{},"树的分支结构,即谁与谁更近缘的关系图样",[223,5254,5255,5260],{},[247,5256,5257],{},[21,5258,5259],{},"Bootstrap 支持度",[247,5261,5262],{},"通过重抽样评估某个分支可信度的百分比;越高越可靠",[223,5264,5265,5269],{},[247,5266,5267],{},[21,5268,1795],{},[247,5270,5271],{},"用括号嵌套表示树结构的标准文本格式",[223,5273,5274,5279],{},[247,5275,5276],{},[21,5277,5278],{},"单拷贝直系同源基因",[247,5280,5281],{},"来自模块一的 1:1:1 簇,是建树的输入",[45,5283],{},[372,5285,5287],{"id":5286},"_53-模块三-分歧时间推断","5.3 模块三 · 分歧时间推断",[18,5289,5290,5292],{},[21,5291,4534],{}," 把模块二的\"相对\"进化树,转化为标注了绝对地质年代的\"时间树\"。通过化石校准点 + 分子钟模型,估算每个节点的分化时间,从而把基因组进化事件与地质、气候事件对应起来。",[18,5294,5295,5297,5298,5300],{},[21,5296,4965],{}," 勾选 ",[21,5299,1826],{},"(依赖模块二)。",[18,5302,5303],{},[924,5304],{"alt":1832,"src":1833},[1054,5306,4565],{"id":5307},"basic-参数-2",[18,5309,5310,4571,5313],{},[21,5311,5312],{},"方法(Method)",[21,5314,1843],{},[217,5316,5317,5330],{},[220,5318,5319],{},[223,5320,5321,5323,5325,5328],{},[226,5322,4991],{},[226,5324,4994],{},[226,5326,5327],{},"输出",[226,5329,5000],{},[242,5331,5332,5350],{},[223,5333,5334,5338,5341,5347],{},[247,5335,5336,5009],{},[21,5337,1843],{},[247,5339,5340],{},"快(分钟级)",[247,5342,5343,5344],{},"各节点时间的",[21,5345,5346],{},"点估计",[247,5348,5349],{},"快速获取分歧时间框架",[223,5351,5352,5354,5356,5362],{},[247,5353,1883],{},[247,5355,5025],{},[247,5357,5358,5359],{},"时间 + 95% HPD",[21,5360,5361],{},"置信区间",[247,5363,5364],{},"发表级、需要不确定性区间",[15,5366,5367],{},[18,5368,5369,5371,5372,5374],{},[21,5370,4632],{}," 想快速了解大致分化时间,用 ",[21,5373,1843],{},"。需要发表、且想给出每个节点的 95% 置信区间,用 MCMCTree(更慢,建议大规模分析用本地版)。",[18,5376,5377,1910,5380],{},[21,5378,5379],{},"化石校准点(Calibration Points)",[21,5381,5382],{},"必填,这是本模块最关键的输入",[75,5384,5385,5397,5422],{},[56,5386,5387,5388,5390,5391,5393,5394,4008],{},"选一对物种,平台可从 ",[21,5389,1921],{}," 数据库自动检索该物种对的分化时间。点 ",[21,5392,1925],{}," 可添加多组,",[21,5395,5396],{},"建议至少 1–2 个",[56,5398,5399,5402,5403],{},[21,5400,5401],{},"两种方法使用校准信息的方式不同",":\n",[75,5404,5405,5414],{},[56,5406,5407,5409,5410,5413],{},[21,5408,1843],{}," 使用 TimeTree 查询到的分化时间",[21,5411,5412],{},"中位值","(单个点)。R8s 不传播校准的不确定性、只给点估计,用中位值更稳定、也更诚实。",[56,5415,5416,5409,5418,5421],{},[21,5417,1883],{},[21,5419,5420],{},"范围","(最小值 Min \u002F 最大值 Max),并在此区间内进行贝叶斯采样,从而给出 95% HPD 置信区间。",[56,5423,5424,5427,5428],{},[21,5425,5426],{},"来源建议:"," 优先用本领域公认、有古生物\u002F地质证据的校准点。",[21,5429,5430],{},"错误的校准点会让整棵时间树系统性偏差。",[18,5432,5433],{},[21,5434,5435],{},"设置示例(蔷薇科):",[217,5437,5438,5452],{},[220,5439,5440],{},[223,5441,5442,5445,5447,5449],{},[226,5443,5444],{},"物种对",[226,5446,1979],{},[226,5448,1982],{},[226,5450,5451],{},"依据",[242,5453,5454,5469],{},[223,5455,5456,5462,5464,5466],{},[247,5457,5458,1995,5460],{},[1632,5459,1994],{},[1632,5461,1998],{},[247,5463,2001],{},[247,5465,2004],{},[247,5467,5468],{},"化石记录",[223,5470,5471,5477,5479,5481],{},[247,5472,5473,1995,5475],{},[1632,5474,1994],{},[1632,5476,1634],{},[247,5478,2018],{},[247,5480,2021],{},[247,5482,5468],{},[18,5484,5485],{},[21,5486,5487],{},"根节点年龄(Root Age, Ma)",[75,5489,5490,5499,5509],{},[56,5491,5492,5495,5496,4008],{},[21,5493,5494],{},"默认值:"," 平台自动设为当前树中",[21,5497,5498],{},"最大分支分歧时间的 1.5 倍",[56,5500,5501,5504,5505,5508],{},[21,5502,5503],{},"⚠️ 强烈建议手动确认此值。"," Root Age 是整棵树分歧时间的总约束,它",[21,5506,5507],{},"必须大于树中最古老(根)节点的真实分化时间",",否则会把整棵时间树压缩、得到错误的年代估计。",[56,5510,5511,5514,5515],{},[21,5512,5513],{},"如果不确定具体数值",",宁可填一个偏大的值——它只作为分歧时间的上界约束,偏大不会显著扭曲结果,而偏小一定会出错。可参考 TimeTree(",[33,5516,5519],{"href":5517,"rel":5518},"https:\u002F\u002Ftimetree.org)%E6%9F%A5%E8%AF%A2%E4%BD%A0%E7%9A%84%E7%B1%BB%E7%BE%A4%E6%A0%B9%E8%8A%82%E7%82%B9%E7%9A%84%E5%A4%A7%E8%87%B4%E5%B9%B4%E4%BB%A3%E3%80%82",[37],"https:\u002F\u002Ftimetree.org)查询你的类群根节点的大致年代。",[1054,5521,4710],{"id":5522},"advanced-参数-2",[217,5524,5525,5535],{},[220,5526,5527],{},[223,5528,5529,5531,5533],{},[226,5530,4719],{},[226,5532,4722],{},[226,5534,4725],{},[242,5536,5537,5548,5559],{},[223,5538,5539,5542,5545],{},[247,5540,5541],{},"交叉验证(R8s)",[247,5543,5544],{},"关",[247,5546,5547],{},"打开更慢",[223,5549,5550,5553,5556],{},[247,5551,5552],{},"链长 \u002F 运算复杂度(仅 MCMCTree)",[247,5554,5555],{},"标准",[247,5557,5558],{},"区间过宽或多次结果差异大时,调高以确保 MCMC 收敛",[223,5560,5561,5563,5565],{},[247,5562,4763],{},[247,5564,1263],{},[247,5566,4768],{},[1054,5568,4771],{"id":5569},"结果解读-2",[18,5571,5572],{},[924,5573],{"alt":2116,"src":2117},[75,5575,5576],{},[56,5577,5578,5581],{},[21,5579,5580],{},"时间树:"," 一棵超度量树,横轴为地质时间(百万年前);MCMCTree 会给出每个节点的 95% HPD 区间。",[18,5583,5584],{},[21,5585,5230],{},[217,5587,5588,5596],{},[220,5589,5590],{},[223,5591,5592,5594],{},[226,5593,4820],{},[226,5595,4823],{},[242,5597,5598,5608,5618,5628,5638],{},[223,5599,5600,5605],{},[247,5601,5602],{},[21,5603,5604],{},"Ultrametric tree(超度量树)",[247,5606,5607],{},"所有叶子到根的距离相等的树,即把分支长度换算成了时间",[223,5609,5610,5615],{},[247,5611,5612],{},[21,5613,5614],{},"Ma \u002F Mya(百万年 \u002F 百万年前)",[247,5616,5617],{},"时间单位,1 Ma = 100 万年",[223,5619,5620,5625],{},[247,5621,5622],{},[21,5623,5624],{},"Molecular clock(分子钟)",[247,5626,5627],{},"用序列变化速率推断时间的模型假设",[223,5629,5630,5635],{},[247,5631,5632],{},[21,5633,5634],{},"95% HPD 区间",[247,5636,5637],{},"最高后验密度区间,贝叶斯方法(MCMCTree)给出的分歧时间置信范围",[223,5639,5640,5645],{},[247,5641,5642],{},[21,5643,5644],{},"Calibration point(校准点)",[247,5646,5647],{},"已知的某物种对分化时间,用来把相对树标定到绝对年代",[45,5649],{},[372,5651,5653],{"id":5652},"_54-模块四-基因家族收缩与扩张分析","5.4 模块四 · 基因家族收缩与扩张分析",[18,5655,5656,5658],{},[21,5657,4534],{}," 基于模块三的时间树和模块一的基因计数,识别在物种树各分支上发生了**统计显著扩张(基因增多)或收缩(基因减少)**的基因家族。这类变化常与适应性进化、功能创新或退化相关——例如抗病基因家族在某栽培谱系上显著扩张,可能提示驯化过程中的选择。",[18,5660,5661,5297,5663,5665,5666,5668],{},[21,5662,4965],{},[21,5664,2213],{},"(依赖模块一和模块三)。算法使用 ",[21,5667,2217],{},"(基于随机出生-死亡模型)。",[18,5670,5671],{},[924,5672],{"alt":2223,"src":2224},[1054,5674,4565],{"id":5675},"basic-参数-3",[18,5677,5678,5681,5682,5685,5686],{},[21,5679,5680],{},"k 值(基因家族间速率异质性)"," — 决定使用 ",[21,5683,5684],{},"Base 模型","还是 ",[21,5687,5688],{},"Gamma 模型",[217,5690,5691,5706],{},[220,5692,5693],{},[223,5694,5695,5698,5701,5703],{},[226,5696,5697],{},"设置",[226,5699,5700],{},"启用的模型",[226,5702,4823],{},[226,5704,5705],{},"何时使用",[242,5707,5708,5725,5741],{},[223,5709,5710,5715,5719,5722],{},[247,5711,5712],{},[21,5713,5714],{},"k 留空(none)",[247,5716,5717],{},[21,5718,5684],{},[247,5720,5721],{},"假设所有基因家族的进化速率(λ)完全相同",[247,5723,5724],{},"数据少、初步探索;或希望\"失败时能被明显察觉\"的稳健分析",[223,5726,5727,5731,5735,5738],{},[247,5728,5729],{},[21,5730,2284],{},[247,5732,5733],{},[21,5734,5688],{},[247,5736,5737],{},"允许家族速率服从 2 类 gamma 分布",[247,5739,5740],{},"多数场景的常用选择",[223,5742,5743,5748,5752,5755],{},[247,5744,5745],{},[21,5746,5747],{},"k = 3 及以上",[247,5749,5750],{},[21,5751,5688],{},[247,5753,5754],{},"更多速率类别,拟合更精细",[247,5756,5757],{},"数据量大、家族速率差异明显",[15,5759,5760,5774],{},[18,5761,5762,5765,5766,5769,5770,5773],{},[21,5763,5764],{},"机制说明:"," 只有在",[21,5767,5768],{},"设置了 k 值","时才会启用 Gamma 模型(允许不同家族以不同速率演化);",[21,5771,5772],{},"不设 k(留空)则使用 Base 模型","(单一速率)。",[18,5775,5776,5778,5779,5781],{},[21,5777,4632],{}," 真实数据里不同家族进化速率几乎必然不同(免疫基因快、核糖体蛋白慢),因此 ",[21,5780,2335],{}," 的 Gamma 模型通常比 Base 更合理,建议从 k=2 开始。但 Gamma 模型可能在不收敛时\"静默失败\",而 Base 模型的问题更容易被察觉——如果你需要一个最稳健、可诊断的基线结果,可以先用 Base(k 留空)跑一遍对照。",[18,5783,5784,4571,5787,5789],{},[21,5785,5786],{},"使用 Poisson 根分布(Use Poisson)",[21,5788,4697],{},",推荐保持。",[1054,5791,4710],{"id":5792},"advanced-参数-3",[217,5794,5795,5805],{},[220,5796,5797],{},[223,5798,5799,5801,5803],{},[226,5800,4719],{},[226,5802,4722],{},[226,5804,4725],{},[242,5806,5807,5817,5828],{},[223,5808,5809,5812,5814],{},[247,5810,5811],{},"最大家族大小",[247,5813,2387],{},[247,5815,5816],{},"过滤极大家族,避免模型不收敛",[223,5818,5819,5822,5825],{},[247,5820,5821],{},"误差模型",[247,5823,5824],{},"无",[247,5826,5827],{},"可选(专家);留空时平台自动降级重试",[223,5829,5830,5832,5834],{},[247,5831,4763],{},[247,5833,1263],{},[247,5835,4768],{},[15,5837,5838],{},[18,5839,5840,5843,5844,5847,5848,5851],{},[21,5841,5842],{},"显著性阈值(p-value,默认 0.05):"," 用于判定哪些家族在某分支上的大小变化达到",[21,5845,5846],{},"统计显著","。这一阈值决定了后续 GO 富集分析所用的家族集合(见结果解读)。在",[21,5849,5850],{},"结果页","调整该值只刷新视图、不重跑分析;更严格设 0.01,想多看候选家族设 0.10。",[1054,5853,4771],{"id":5854},"结果解读-3",[18,5856,5857],{},[924,5858],{"alt":2433,"src":2434},[75,5860,5861,5880,5883,5893],{},[56,5862,5863,5864,5867,5868,5871,5872,5875,5876,5879],{},"物种树每个分支上标两个数:",[21,5865,5866],{},"红色 +"," 为该分支上",[21,5869,5870],{},"全部发生扩张","的家族数,",[21,5873,5874],{},"蓝色 −"," 为全部发生收缩的家族数。注意这是",[21,5877,5878],{},"描述性计数","(统计该分支上所有大小变化的家族),不限于统计显著的那些。",[56,5881,5882],{},"点分支上的数字 → 查看该分支上发生变化的家族列表;点家族 ID → 查看其各物种拷贝数、成员基因和 GO 注释。",[56,5884,5885,5888,5889,5892],{},[21,5886,5887],{},"GO 富集分析",":当你点击某个扩张\u002F收缩节点查看其 GO 富集时,平台",[21,5890,5891],{},"仅对该节点上 p \u003C 0.05 的显著家族(OG 簇)做富集分析",",以保证富集结果反映的是真正显著的进化事件。",[56,5894,5895],{},"可重点检查通向目标物种的末端分支上扩张的家族,看功能是否与已知表型相关。",[18,5897,5898],{},[21,5899,5230],{},[217,5901,5902,5910],{},[220,5903,5904],{},[223,5905,5906,5908],{},[226,5907,4820],{},[226,5909,4823],{},[242,5911,5912,5922,5932,5942,5952],{},[223,5913,5914,5919],{},[247,5915,5916],{},[21,5917,5918],{},"Expansion \u002F Contraction(扩张 \u002F 收缩)",[247,5920,5921],{},"一个基因家族在某分支上拷贝数增多 \u002F 减少",[223,5923,5924,5929],{},[247,5925,5926],{},[21,5927,5928],{},"Birth-death model(出生-死亡模型)",[247,5930,5931],{},"CAFE5 用来描述基因获得(birth)与丢失(death)的统计模型",[223,5933,5934,5939],{},[247,5935,5936],{},[21,5937,5938],{},"λ(lambda)",[247,5940,5941],{},"基因获得\u002F丢失速率;Base 模型全树一个 λ,Gamma 模型允许家族间不同",[223,5943,5944,5949],{},[247,5945,5946],{},[21,5947,5948],{},"Base vs Gamma 模型",[247,5950,5951],{},"见 Basic 参数 k 的说明:k 留空用 Base,设 k 用 Gamma",[223,5953,5954,5959],{},[247,5955,5956],{},[21,5957,5958],{},"显著家族",[247,5960,5961],{},"p \u003C 0.05 的家族;GO 富集只用这部分",[45,5963],{},[372,5965,5967],{"id":5966},"_55-模块五-染色体共线性分析","5.5 模块五 · 染色体共线性分析",[18,5969,5970,5972],{},[21,5971,4534],{}," 分析物种间在染色体水平的结构保守性。若两个物种的某些染色体区段含相同基因且排列顺序大致一致,则称这两段\"共线\"。可揭示:染色体结构保守程度、大规模重排(倒位\u002F易位\u002F融合\u002F裂解)、全基因组复制(WGD)痕迹、特定基因家族的染色体分布。",[18,5974,5975,5978,5979,5982],{},[21,5976,5977],{},"数据需求:"," 需上传 ",[21,5980,5981],{},"GFF 注释文件","(见 2.5)。",[15,5984,5985,5991],{},[18,5986,5987,5990],{},[21,5988,5989],{},"⚠️ 务必使用染色体级别(chromosome-level)的注释文件,且染色体数目种类 \u003C 50。"," 共线性分析依赖染色体作为坐标轴,过多的序列片段会让桑基图无法解读。",[75,5992,5993,6004],{},[56,5994,5995,5996,5999,6000,6003],{},"如果你的注释是 ",[21,5997,5998],{},"scaffold \u002F contig 级别","(片段数量很多),请先筛选,",[21,6001,6002],{},"仅保留最长的前 50 条片段","再上传。",[56,6005,6006],{},"染色体级别的基因组(如已挂载到染色体的参考基因组)可直接使用。",[18,6008,6009,5297,6011,4008],{},[21,6010,4965],{},[21,6012,2588],{},[18,6014,6015],{},[924,6016],{"alt":2593,"src":2594},[1054,6018,4565],{"id":6019},"basic-参数-4",[18,6021,6022,4571,6025],{},[21,6023,6024],{},"运行全部物种对(Run All Pairs)",[21,6026,6027],{},"开启(小项目)",[75,6029,6030,6033],{},[56,6031,6032],{},"开启:对所有物种两两做共线性。物种少时推荐。",[56,6034,6035],{},"关闭:出现物种对选择器,只跑你选的物种对。物种很多时建议关闭以省时。",[1054,6037,4710],{"id":6038},"advanced-参数-4",[217,6040,6041,6051],{},[220,6042,6043],{},[223,6044,6045,6047,6049],{},[226,6046,4719],{},[226,6048,4722],{},[226,6050,4725],{},[242,6052,6053,6063,6073,6083],{},[223,6054,6055,6058,6060],{},[247,6056,6057],{},"Match Size(-s)",[247,6059,2638],{},[247,6061,6062],{},"共线性区块的最少锚点基因数",[223,6064,6065,6068,6070],{},[247,6066,6067],{},"Max Gaps(-m)",[247,6069,2649],{},[247,6071,6072],{},"区块内允许的最大间隔",[223,6074,6075,6078,6080],{},[247,6076,6077],{},"Anchor E-value(-e)",[247,6079,1230],{},[247,6081,6082],{},"锚点比对阈值",[223,6084,6085,6087,6089],{},[247,6086,4763],{},[247,6088,1263],{},[247,6090,4768],{},[15,6092,6093],{},[18,6094,6095,6098,6099,6101],{},[21,6096,6097],{},"上下游基因窗口"," 在",[21,6100,5850],{},"调整,只刷新视图。",[1054,6103,4771],{"id":6104},"结果解读-4",[18,6106,6107],{},[924,6108],{"alt":2689,"src":2690},[75,6110,6111,6117],{},[56,6112,6113,6116],{},[21,6114,6115],{},"桑基图:"," 左右两侧为两物种的染色体,连线为共线性区块;连线越密集,结构越保守;中断与交叉代表重排事件。",[56,6118,6119,6122],{},[21,6120,6121],{},"基因搜索高亮(特色):"," 在搜索框输入基因 ID(例如模块四中发现的扩张家族成员),图中会高亮包含这些基因的共线性区块——把基因家族动态和染色体结构变化关联起来。",[18,6124,6125],{},[21,6126,5230],{},[217,6128,6129,6137],{},[220,6130,6131],{},[223,6132,6133,6135],{},[226,6134,4820],{},[226,6136,4823],{},[242,6138,6139,6149,6159,6169,6179,6189],{},[223,6140,6141,6146],{},[247,6142,6143],{},[21,6144,6145],{},"Collinearity \u002F Synteny(共线性)",[247,6147,6148],{},"两物种染色体区段含相同基因且排列顺序大致一致",[223,6150,6151,6156],{},[247,6152,6153],{},[21,6154,6155],{},"Collinear block(共线性区块)",[247,6157,6158],{},"一段被判定为保守的、成串的同源基因区域",[223,6160,6161,6166],{},[247,6162,6163],{},[21,6164,6165],{},"Anchor(锚点)",[247,6167,6168],{},"区块内成对的同源基因,是判定共线性的依据",[223,6170,6171,6176],{},[247,6172,6173],{},[21,6174,6175],{},"Sankey diagram(桑基图)",[247,6177,6178],{},"用连线展示两物种染色体间共线关系的图",[223,6180,6181,6186],{},[247,6182,6183],{},[21,6184,6185],{},"Rearrangement(重排)",[247,6187,6188],{},"倒位、易位、融合、裂解等染色体结构变化",[223,6190,6191,6196],{},[247,6192,6193],{},[21,6194,6195],{},"WGD(全基因组复制)",[247,6197,6198],{},"whole-genome duplication,会在共线性中留下成倍区块的痕迹",[45,6200],{},[48,6202,6204],{"id":6203},"_6-第三步预览与提交","6. 第三步:预览与提交",[53,6206,6207,6213,6219,6226],{},[56,6208,6209,6210,6212],{},"配置好所需模块后,点 ",[21,6211,2794],{}," 预览任务配置概要。",[56,6214,6215,6216,6218],{},"确认无误后点 ",[21,6217,2801],{}," 提交。",[56,6220,6221,6222,6225],{},"页面显示唯一",[21,6223,6224],{},"任务 ID",",请保存以便查询进度。",[56,6227,6228,6229,6232],{},"任务在后台运行,",[21,6230,6231],{},"可关闭浏览器",";完成后系统会发邮件通知,含结果页链接。",[45,6234],{},[48,6236,6238],{"id":6237},"_7-查看进度与历史任务","7. 查看进度与历史任务",[75,6240,6241,6247],{},[56,6242,6243,6244,6246],{},"点 ",[21,6245,2837],{}," 查看所有任务及状态(排队中 \u002F 运行中 \u002F 已完成 \u002F 失败)。",[56,6248,6249],{},"点任务 ID 进入结果页。",[45,6251],{},[48,6253,6255],{"id":6254},"_8-结果解读与导出","8. 结果解读与导出",[372,6257,6259],{"id":6258},"_81-交互式探索","8.1 交互式探索",[18,6261,6262,6263,6266,6267,6270,6271,6274,6275,6278,6279,6282],{},"所有结果都是交互式可视化,你可以:",[21,6264,6265],{},"点击","图中元素(簇、树节点、桑基图区块)看详情;",[21,6268,6269],{},"悬停","看具体数值(支持度、分歧时间、p-value);对表格",[21,6272,6273],{},"筛选与排序",";",[21,6276,6277],{},"搜索","基因 ID;",[21,6280,6281],{},"缩放与拖拽","图形。",[372,6284,6286],{"id":6285},"_82-导出","8.2 导出",[75,6288,6289,6300,6306],{},[56,6290,6291,6294,6295,6297,6298,4008],{},[21,6292,6293],{},"图形:"," 所有图表可导出 ",[21,6296,2889],{},"(矢量,适合出版)或 ",[21,6299,2893],{},[56,6301,6302,6305],{},[21,6303,6304],{},"数据:"," 基因簇列表、Newick 树文件、统计表(TSV \u002F CSV)等可下载。",[56,6307,6308,6311,6312,6314,6315,4008],{},[21,6309,6310],{},"云端:"," 支持一键导出到 ",[21,6313,2914],{}," 或 ",[21,6316,2918],{},[372,6318,6320],{"id":6319},"_83-分析报告可重复性","8.3 分析报告(可重复性)",[18,6322,6323],{},"每次分析自动生成报告,记录所用参数、集成工具的版本、完整命令历史,便于他人用相同数据和参数独立复现。",[45,6325],{},[48,6327,6329],{"id":6328},"_9-基因簇正选择压力分析结果生成后按需运行","9. 基因簇正选择压力分析(结果生成后按需运行)",[18,6331,6332,6335],{},[21,6333,6334],{},"这个分析帮你做什么?"," 检测哪些基因经历了正选择(适应性进化)。分子信号是非同义替换率显著高于同义替换率,即 ω = dN\u002FdS > 1——意味着自然选择\"偏好\"改变蛋白功能的突变,常与对新环境的适应有关。",[15,6337,6338],{},[18,6339,6340,6343,6344,6347,6348,6351],{},[21,6341,6342],{},"它和前五个模块不同:"," 正选择针对的是",[21,6345,6346],{},"单个基因簇",",需要你先看到结果、再挑感兴趣的簇(如显著扩张的家族、GO 富集显著的簇)来分析。所以它",[21,6349,6350],{},"不在提交向导里",",而是在结果页按需触发。",[372,6353,6355],{"id":6354},"_91-准备-cds","9.1 准备 CDS",[18,6357,6358,6359,6361],{},"本分析需要该物种的 ",[21,6360,3979],{},"(见 2.4)。如果上传物种时没传 CDS,可随时在物种数据里补充 CDS——后补只解锁正选择,不影响已完成的其它结果。内置库物种的 CDS 已就绪。",[372,6363,6365],{"id":6364},"_92-入口","9.2 入口",[18,6367,4845,6368,6371],{},[21,6369,6370],{},"簇详情页","点击\"正选择压力分析\",或在结果亮点(CAFE5 显著家族、GO 富集簇、搜索命中簇)上直接发起。",[372,6373,6375],{"id":6374},"_93-选择你的科学问题关键","9.3 选择你的科学问题(关键)",[18,6377,6378],{},[924,6379],{"alt":2983,"src":2984},[18,6381,6382,6383,6385],{},"弹窗顶部问:",[21,6384,2990],{}," 你不需要懂工具内部差异,只要选你想回答的问题——算法名作为副标题显示。",[217,6387,6388,6404],{},[220,6389,6390],{},[223,6391,6392,6395,6398,6401],{},[226,6393,6394],{},"你想回答的问题(界面文案)",[226,6396,6397],{},"对应方法",[226,6399,6400],{},"输出粒度",[226,6402,6403],{},"是否需额外输入",[242,6405,6406,6427,6443,6459],{},[223,6407,6408,6415,6421,6424],{},[247,6409,6410,6412,6414],{},[21,6411,3017],{},[26,6413],{},"哪些物种谱系(分支)经历过正选择?",[247,6416,6417,6418,3024],{},"HyPhy aBSREL(",[21,6419,6420],{},"推荐,默认",[247,6422,6423],{},"分支级",[247,6425,6426],{},"否",[223,6428,6429,6436,6438,6441],{},[247,6430,6431,6433,6435],{},[21,6432,3037],{},[26,6434],{},"哪些氨基酸位点经历过 episodic 正选择?",[247,6437,3040],{},[247,6439,6440],{},"位点级",[247,6442,6426],{},[223,6444,6445,6452,6455,6457],{},[247,6446,6447,6449,6451],{},[21,6448,3052],{},[26,6450],{},"这个家族整体是否存在正选择位点?",[247,6453,6454],{},"PAML M7 vs M8(专家)",[247,6456,6440],{},[247,6458,6426],{},[223,6460,6461,6468,6471,6473],{},[247,6462,6463,6465,6467],{},[21,6464,3066],{},[26,6466],{},"我指定的某些分支上,哪些位点正选择?",[247,6469,6470],{},"PAML branch-site(专家)",[247,6472,6440],{},[247,6474,6475],{},[21,6476,6477],{},"需在树上选前景枝",[15,6479,6480],{},[18,6481,6482,6485,6486,6488],{},[21,6483,6484],{},"不确定选哪个?"," 先用 ",[21,6487,3087],{},"——它最快、最稳健,适合作为第一次分析。想进一步定位到具体氨基酸位点,再用 MEME(怀疑只在部分分支上的阵发性选择)或 PAML M7\u002FM8(检测整树持续的正选择位点)。若你已经有\"某个谱系受选择\"的假说,用 PAML branch-site 并在树上标出前景枝。",[372,6490,6492],{"id":6491},"_94-其它选项","9.4 其它选项",[75,6494,6495,6505,6511,6517,6527],{},[56,6496,6497,6500,6501,6504],{},[21,6498,6499],{},"前景枝(仅 branch-site):"," 弹出可交互的簇树,点击你假设受选择的物种或谱系;",[21,6502,6503],{},"至少选 1 个","才能提交。",[56,6506,6507,6510],{},[21,6508,6509],{},"遗传密码(Advanced):"," 默认通用密码子(Universal);线粒体、纤毛虫等非标准密码子物种需在此切换。",[56,6512,6513,6516],{},[21,6514,6515],{},"线程数(Advanced):"," 线上只读 48,本地可改。",[56,6518,6519,6522,6523,6526],{},[21,6520,6521],{},"在线规模上限:"," 单个簇在线分析",[21,6524,6525],{},"最多 100 个蛋白","。超过时请挑更小的簇,或用本地版(见第 11 章)。",[56,6528,6529],{},"首次分析某个簇时,平台会先为它构建比对和树(只需一次),稍等片刻即可。",[372,6531,6533],{"id":6532},"_95-结果解读","9.5 结果解读",[18,6535,6536],{},[924,6537],{"alt":3138,"src":3139},[217,6539,6540,6552],{},[220,6541,6542],{},[223,6543,6544,6546,6549],{},[226,6545,4991],{},[226,6547,6548],{},"结果展示",[226,6550,6551],{},"结果页可调过滤",[242,6553,6554,6563,6573,6583],{},[223,6555,6556,6558,6561],{},[247,6557,3087],{},[247,6559,6560],{},"分支级表 + 树(显著分支高亮)",[247,6562,3165],{},[223,6564,6565,6567,6570],{},[247,6566,3170],{},[247,6568,6569],{},"位点级表 + 比对(显著位点高亮)",[247,6571,6572],{},"p-value、EBF",[223,6574,6575,6577,6580],{},[247,6576,3181],{},[247,6578,6579],{},"位点级表(似然比检验 + 后验)",[247,6581,6582],{},"p-value、BEB 后验",[223,6584,6585,6587,6590],{},[247,6586,3192],{},[247,6588,6589],{},"前景枝上的位点级表",[247,6591,6582],{},[75,6593,6594,6600],{},[56,6595,6596,6597,6599],{},"被贝叶斯方法(BEB \u002F EBF)鉴定为正选择的氨基酸位点会高亮标记(如 ",[487,6598,3205],{},"),这些就是在分子水平留下适应性印记的具体位置。",[56,6601,6602],{},"调整结果页的阈值只刷新视图,不会重跑分析。",[18,6604,6605],{},[21,6606,5230],{},[217,6608,6609,6617],{},[220,6610,6611],{},[223,6612,6613,6615],{},[226,6614,4820],{},[226,6616,4823],{},[242,6618,6619,6629,6639,6649,6659,6669],{},[223,6620,6621,6626],{},[247,6622,6623],{},[21,6624,6625],{},"dN\u002FdS(ω)",[247,6627,6628],{},"非同义替换率与同义替换率之比;ω > 1 提示正选择",[223,6630,6631,6636],{},[247,6632,6633],{},[21,6634,6635],{},"Positive selection(正选择)",[247,6637,6638],{},"自然选择偏好改变蛋白功能的突变,即适应性进化",[223,6640,6641,6646],{},[247,6642,6643],{},[21,6644,6645],{},"分支级 vs 位点级",[247,6647,6648],{},"分支级回答\"哪些谱系受选择\",位点级回答\"哪些氨基酸位点受选择\"",[223,6650,6651,6656],{},[247,6652,6653],{},[21,6654,6655],{},"Foreground branch(前景枝)",[247,6657,6658],{},"branch-site 中你假设受选择、需在树上标出的分支",[223,6660,6661,6666],{},[247,6662,6663],{},[21,6664,6665],{},"Episodic selection(阵发性选择)",[247,6667,6668],{},"只在部分分支或部分时间发生的正选择",[223,6670,6671,6675],{},[247,6672,6673],{},[21,6674,3282],{},[247,6676,6677],{},"贝叶斯经验方法,给出某位点受正选择的后验概率 \u002F 经验贝叶斯因子",[15,6679,6680],{},[18,6681,6682],{},"不同方法检测的是不同类型的选择信号,建议对同一个簇尝试多种方法,获得互补证据。",[45,6684],{},[48,6686,6688],{"id":6687},"_10-在线辅助工具web-tools","10. 在线辅助工具(Web Tools)",[18,6690,6691,6692,6694],{},"导航栏 ",[21,6693,636],{}," 下提供三个独立工具,无需提交任务即可使用。",[372,6696,6698],{"id":6697},"_101-cluster-venn通用直系同源簇韦恩图","10.1 Cluster-Venn:通用直系同源簇韦恩图",[18,6700,6701],{},"上传自定义的基因簇分组文件,直接生成交互式韦恩图 \u002F UpSet 图,无需在平台重跑聚类。适合已用第三方工具(OrthoFinder、OrthoMCL 等)完成聚类、只想快速可视化的场景。",[18,6703,6704,6707,6708,6711,6712,6714],{},[21,6705,6706],{},"输入格式(.csv \u002F .txt):"," 每行一个簇,簇内基因以空格分隔,基因名采用 ",[487,6709,6710],{},"物种名|基因ID",",平台用 ",[487,6713,3321],{}," 前的前缀识别物种归属。",[480,6716,6718],{"className":6717,"code":3325,"language":485},[483],[487,6719,3325],{"__ignoreMap":489},[372,6721,6723],{"id":6722},"_102-gff-to-bed注释格式转换","10.2 GFF to BED:注释格式转换",[18,6725,6726],{},"把标准 9 列 GFF \u002F GFF3 转换为 4 列 \u002F 5 列 BED,满足共线性分析的输入要求。从 Ensembl \u002F NCBI \u002F Phytozome 下载的 GFF3 可一键转换后上传。",[372,6728,6730],{"id":6729},"_103-newick-viewer进化树在线查看器","10.3 Newick Viewer:进化树在线查看器",[18,6732,6733],{},"上传或粘贴 Newick 树文件,在线查看和交互浏览拓扑结构。适合快速检查树文件是否正确、预览树形。",[45,6735],{},[48,6737,6739],{"id":6738},"_11-本地部署版本docker","11. 本地部署版本(Docker)",[18,6741,6742],{},"适用于对分析规模、计算速度或数据隐私有更高需求的用户。",[75,6744,6745,6751,6757,6763,6769],{},[56,6746,6747,6750],{},[21,6748,6749],{},"不限物种数量:"," 在线版为保证公共资源公平使用,限制同时分析 12 个物种;本地版取消该限制,可做数十个物种的大规模比较。",[56,6752,6753,6756],{},[21,6754,6755],{},"调用本地算力,更快:"," 任务在本地运行,无需排队;大数据集运算显著提速。",[56,6758,6759,6762],{},[21,6760,6761],{},"支持自定义线程数:"," 各模块 Advanced 里的线程数在本地版可编辑,充分利用多核 CPU(在线版固定 48 不可改)。",[56,6764,6765,6768],{},[21,6766,6767],{},"更大的正选择规模:"," 单簇蛋白数上限可调高(在线版限 100)。",[56,6770,6771,6774],{},[21,6772,6773],{},"数据隐私 \u002F 离线:"," 数据全程本地处理,适合未发表或敏感数据;部署后可离线运行。",[18,6776,6777,6780,6781,6783],{},[21,6778,6779],{},"部署方式:"," 通过 Docker 容器分发,安装与配置见主页 ",[21,6782,3391],{}," 页面。",[45,6785],{},[48,6787,6789],{"id":6788},"_12-常见问题与解决方案","12. 常见问题与解决方案",[217,6791,6792,6805],{},[220,6793,6794],{},[223,6795,6796,6799,6802],{},[226,6797,6798],{},"问题",[226,6800,6801],{},"可能原因",[226,6803,6804],{},"解决方案",[242,6806,6807,6818,6829,6840,6851,6862,6873,6884],{},[223,6808,6809,6812,6815],{},[247,6810,6811],{},"文件上传失败或提示格式错误",[247,6813,6814],{},"FASTA 头含特殊字符;GFF\u002FCDS 的 ID 与蛋白不匹配",[247,6816,6817],{},"用在线辅助工具(第 10 章)预处理并校验 ID 一致性",[223,6819,6820,6823,6826],{},[247,6821,6822],{},"任务长时间\"排队\"",[247,6824,6825],{},"公共服务器负载高",[247,6827,6828],{},"等待邮件通知;如需更快,用 Docker 本地版",[223,6830,6831,6834,6837],{},[247,6832,6833],{},"物种树拓扑与已知关系不符",[247,6835,6836],{},"单拷贝基因太少(\u003C50);或方法精度不足",[247,6838,6839],{},"检查单拷贝基因数;改用 IQ-TREE 2 提升精度",[223,6841,6842,6845,6848],{},[247,6843,6844],{},"分歧时间区间过宽或结果不稳定",[247,6846,6847],{},"校准点不足;MCMC 未收敛",[247,6849,6850],{},"增加可靠校准点;MCMCTree 调高链长 \u002F 复杂度",[223,6852,6853,6856,6859],{},[247,6854,6855],{},"正选择未检测到显著信号",[247,6857,6858],{},"替换事件不足;信号本身弱",[247,6860,6861],{},"换用 aBSREL 检测阵发性选择;关注 ω 偏高但未达显著的基因",[223,6863,6864,6867,6870],{},[247,6865,6866],{},"正选择提示蛋白数超限",[247,6868,6869],{},"该簇 >100 蛋白(在线上限)",[247,6871,6872],{},"选更小的簇,或用本地版分析大家族",[223,6874,6875,6878,6881],{},[247,6876,6877],{},"CDS \u002F GFF 上传后提示不匹配",[247,6879,6880],{},"ID 与蛋白质文件不一致",[247,6882,6883],{},"确保 ID 完全一致;用在线工具校验(见 2.2)",[223,6885,6886,6889,6892],{},[247,6887,6888],{},"共线性图非常稀疏",[247,6890,6891],{},"物种间进化距离太远;GFF 不完整",[247,6893,6894],{},"比较更近缘的物种对;检查 GFF 的基因覆盖度",[45,6896],{},[15,6898,6899,6907],{},[18,6900,6901,6904,6905,6783],{},[21,6902,6903],{},"获取帮助:"," 如有疑问,请通过主页联系方式与我们联系,或参考 ",[21,6906,3516],{},[18,6908,6909,6912],{},[21,6910,6911],{},"引用 OrthoVenn:"," 如果你在研究中使用了 OrthoVenn,请引用对应论文(以平台主页最新发布为准)。",{"title":489,"searchDepth":3525,"depth":3525,"links":6914},[6915,6916,6917,6925,6928,6932,6939,6940,6941,6946,6953,6958,6959],{"id":3613,"depth":3525,"text":3613},{"id":3732,"depth":3525,"text":3733},{"id":3916,"depth":3525,"text":3917,"children":6918},[6919,6920,6921,6922,6923,6924],{"id":3920,"depth":930,"text":3921},{"id":4011,"depth":930,"text":4012},{"id":4050,"depth":930,"text":4051},{"id":4099,"depth":930,"text":4100},{"id":4118,"depth":930,"text":4119},{"id":4161,"depth":930,"text":4162},{"id":4194,"depth":3525,"text":4195,"children":6926},[6927],{"id":4236,"depth":930,"text":4237},{"id":4400,"depth":3525,"text":4401,"children":6929},[6930,6931],{"id":4404,"depth":930,"text":4405},{"id":4477,"depth":930,"text":4478},{"id":4494,"depth":3525,"text":4495,"children":6933},[6934,6935,6936,6937,6938],{"id":4528,"depth":930,"text":4529},{"id":4954,"depth":930,"text":4955},{"id":5286,"depth":930,"text":5287},{"id":5652,"depth":930,"text":5653},{"id":5966,"depth":930,"text":5967},{"id":6203,"depth":3525,"text":6204},{"id":6237,"depth":3525,"text":6238},{"id":6254,"depth":3525,"text":6255,"children":6942},[6943,6944,6945],{"id":6258,"depth":930,"text":6259},{"id":6285,"depth":930,"text":6286},{"id":6319,"depth":930,"text":6320},{"id":6328,"depth":3525,"text":6329,"children":6947},[6948,6949,6950,6951,6952],{"id":6354,"depth":930,"text":6355},{"id":6364,"depth":930,"text":6365},{"id":6374,"depth":930,"text":6375},{"id":6491,"depth":930,"text":6492},{"id":6532,"depth":930,"text":6533},{"id":6687,"depth":3525,"text":6688,"children":6954},[6955,6956,6957],{"id":6697,"depth":930,"text":6698},{"id":6722,"depth":930,"text":6723},{"id":6729,"depth":930,"text":6730},{"id":6738,"depth":3525,"text":6739},{"id":6788,"depth":3525,"text":6789},{},"\u002Fdocument\u002Fusermanual_cn",{"title":3581,"description":489},"document\u002Fusermanual_cn","pJGamoudjNHvJd2FqwPGjSnnzqB4-wZFIbuRTLVy6rU",1781705893047]