diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index b3279cd..d83aae3 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -28,36 +28,31 @@ jobs: runs-on: ubuntu-latest needs: build steps: - - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.2' # Not needed with a .ruby-version file - bundler-cache: true # runs 'bundle install' and caches installed gems automatically - cache-version: 0 # https://github.com/ruby/setup-ruby#dealing-with-a-corrupted-cache - - name: install htmlproofer - run: gem install html-proofer -v 5.0.8 - # Cache HTMLProofer external URL results for 30 days - # https://github.com/gjtorikian/html-proofer/#caching-with-continuous-integration - - uses: actions/cache/restore@v3 - id: restore-cache-htmlproofer - with: - path: tmp/.htmlproofer - key: ${{ runner.os }}-html-proofer + # Cache lychee external URL results for 30 days - name: Download site uses: actions/download-artifact@v3 with: name: github-pages - run: tar -xf artifact.tar && rm artifact.tar - - name: run htmlproofer - # keep --swap-urls in sync with mkdocs.yml site_url - run: | - htmlproofer --ignore-missing-alt --cache '{"timeframe": {"external": "30d"}}' --swap-urls /informatics-website/:/ - # https://github.com/actions/cache/blob/main/save/README.md#always-save-cache - - uses: actions/cache/save@v3 - id: save-cache-htmlproofer + # https://github.com/lycheeverse/lychee-action#utilising-the-cache-feature + - name: Restore lychee cache + id: restore-cache + uses: actions/cache/restore@v3 + with: + path: .lycheecache + key: cache-lychee-${{ github.sha }} + restore-keys: cache-lychee- + - name: Run lychee + uses: lycheeverse/lychee-action@v1.8.0 + with: + args: "--base . --cache --max-cache-age 30d --require-https --timeout 5 --exclude 'fonts.gstatic.com' --exclude 'www.microsoft.com/en-us/microsoft-365/onedrive/online-cloud-storage' --exclude-path 404.html -- './**/*.html' './**/*.css'" + fail: true + - name: Save lychee cache + uses: actions/cache/save@v3 if: always() with: - path: tmp/.htmlproofer - key: ${{ runner.os }}-html-proofer + path: .lycheecache + key: ${{ steps.restore-cache.outputs.cache-primary-key }} deploy: if: github.ref == format('refs/heads/{0}', github.event.repository.default_branch) diff --git a/data/resources/links_and_tags.csv b/data/resources/links_and_tags.csv deleted file mode 100644 index 8071f19..0000000 --- a/data/resources/links_and_tags.csv +++ /dev/null @@ -1,58 +0,0 @@ -R for data science book,https://r4ds.hadley.nz/,R -R for data science book,https://r4ds.hadley.nz/,data science -Software carpentry novice git lesson,https://swcarpentry.github.io/git-novice/,git -Software carpentry novice git lesson,https://swcarpentry.github.io/git-novice/,unix -Data carpentry data wrangling for genomics,https://datacarpentry.org/wrangling-genomics/index.html,bioinformatics -Data carpentry data wrangling for genomics,https://datacarpentry.org/wrangling-genomics/index.html,genomics -Data carpentry data wrangling for genomics,https://datacarpentry.org/wrangling-genomics/index.html,variant calling -Missing semester of your CS education,https://missing.csail.mit.edu/,unix -Missing semester of your CS education,https://missing.csail.mit.edu/,git -Modern statistics for modern biology,https://web.stanford.edu/class/bios221/book/,statistics -Modern statistics for modern biology,https://web.stanford.edu/class/bios221/book/,bioinformatics -Modern statistics for modern biology,https://web.stanford.edu/class/bios221/book/,R -Modern statistics for modern biology,https://web.stanford.edu/class/bios221/book/,data science -Bacterial and viral bioinformatics resource center,https://www.bv-brc.org/,microbiology -Bacterial and viral bioinformatics resource center,https://www.bv-brc.org/,genomics -Bacterial and viral bioinformatics resource center,https://www.bv-brc.org/,metagenomics -Tidy Tuesday,https://github.com/rfordatascience/tidytuesday ,data science -Tidy Tuesday,https://github.com/rfordatascience/tidytuesday ,R -Tidy Tuesday,https://github.com/rfordatascience/tidytuesday ,python -Metagenomics wiki,https://www.metagenomics.wiki/,bioinformatics -Metagenomics wiki,https://www.metagenomics.wiki/,metagenomics -Harvard library data services,https://hlrdm.library.harvard.edu/,data science -Harvard library data services youtube series on data management,https://www.youtube.com/playlist?list=PLWIsV2soJK-VaW7IhxYyyOwiamjVV_FuB,data science -FAS RC User Codes,https://github.com/fasrc/User_Codes,unix -FAS RC User Codes,https://github.com/fasrc/User_Codes,high-performance computing -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,data science -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,bioinformatics -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,statistics -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,R -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,genomics -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,high-performance computing -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,RNA seq -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,python -Data analysis for the life sciences Harvard EdX course,http://rafalab.dfci.harvard.edu/pages/harvardx.html,machine learning -Riffomonas project: R tutorials for biologist on youtube,https://www.youtube.com/c/riffomonasproject,R -Riffomonas project: R tutorials for biologist on youtube,https://www.youtube.com/c/riffomonasproject,data science -Riffomonas project: R tutorials for biologist on youtube,https://www.youtube.com/c/riffomonasproject,data visualization -Bioinformatics workbook,https://bioinformaticsworkbook.org,bioinformatics -Bioinformatics workbook,https://bioinformaticsworkbook.org,genomics -Bioinformatics workbook,https://bioinformaticsworkbook.org,RNA seq -Bioinformatics workbook,https://bioinformaticsworkbook.org,genome assembly -Bioinformatics workbook,https://bioinformaticsworkbook.org,metagenomics -Bioinformatics workbook,https://bioinformaticsworkbook.org,nextflow -Bioinformatics workbook,https://bioinformaticsworkbook.org,data science -Guided workflows for R package Suerat (for RNA-seq),https://satijalab.org/seurat/articles/get_started.html,bioinformatics -Guided workflows for R package Suerat (for RNA-seq),https://satijalab.org/seurat/articles/get_started.html,RNA seq -Guided workflows for R package Suerat (for RNA-seq),https://satijalab.org/seurat/articles/get_started.html,single cell -Guided workflows for R package Suerat (for RNA-seq),https://satijalab.org/seurat/articles/get_started.html,R -The Python Tutorial,https://docs.python.org/3/tutorial/index.html,python -The Python Tutorial,https://docs.python.org/3/tutorial/index.html,data science -The Python Tutorial,https://docs.python.org/3/tutorial/index.html,data visualization -Python for data science handbook,https://jakevdp.github.io/PythonDataScienceHandbook/,data science -Python for data science handbook,https://jakevdp.github.io/PythonDataScienceHandbook/,python -Python for data science handbook,https://jakevdp.github.io/PythonDataScienceHandbook/,machine learning -Python for data science handbook,https://jakevdp.github.io/PythonDataScienceHandbook/,data visualization -Harvard Research Computing quickstart guide,https://docs.rc.fas.harvard.edu/kb/quickstart-guide/,high-performance computing -Harvard Research Computing quickstart guide,https://docs.rc.fas.harvard.edu/kb/quickstart-guide/,unix -Harvard Statistics department consulting office hours,https://statistics.fas.harvard.edu/harvard-statistics-consulting-service,statistics diff --git a/data/resources/resources-primary.json b/data/resources/resources-primary.json index c039788..49311ad 100644 --- a/data/resources/resources-primary.json +++ b/data/resources/resources-primary.json @@ -84,7 +84,7 @@ "id": "9", "name": "Harvard library data services", "link": "https://hlrdm.library.harvard.edu/", - "tags": ["data sciecne", "harvard"], + "tags": ["data science", "harvard"], "date-added": "11-28-2023", "status" : "active", "description": "" diff --git a/data/resources/tag-csv/.gitignore b/data/resources/tag-csv/.gitignore new file mode 100644 index 0000000..72e8ffc --- /dev/null +++ b/data/resources/tag-csv/.gitignore @@ -0,0 +1 @@ +* diff --git a/data/resources/tag-csv/R.csv b/data/resources/tag-csv/R.csv deleted file mode 100644 index 80881e5..0000000 --- a/data/resources/tag-csv/R.csv +++ /dev/null @@ -1,7 +0,0 @@ -Resource,Tags -R for data science book,data science R -Modern statistics for modern biology,bioinformatics data science R statistics -Tidy Tuesday,data science python R -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Riffomonas project: R tutorials for biologist on YouTube,data science data visualization R -Guided workflows for R package Suerat (for RNA-seq),bioinformatics R RNA seq single cell diff --git a/data/resources/tag-csv/RNA-seq.csv b/data/resources/tag-csv/RNA-seq.csv deleted file mode 100644 index 689b518..0000000 --- a/data/resources/tag-csv/RNA-seq.csv +++ /dev/null @@ -1,4 +0,0 @@ -Resource,Tags -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq -Guided workflows for R package Suerat (for RNA-seq),bioinformatics R RNA seq single cell diff --git a/data/resources/tag-csv/bioinformatics.csv b/data/resources/tag-csv/bioinformatics.csv deleted file mode 100644 index 9f6697f..0000000 --- a/data/resources/tag-csv/bioinformatics.csv +++ /dev/null @@ -1,7 +0,0 @@ -Resource,Tags -Data carpentry data wrangling for genomics,bioinformatics genomics variant calling -Modern statistics for modern biology,bioinformatics data science R statistics -Metagenomics wiki,bioinformatics metagenomics -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq -Guided workflows for R package Suerat (for RNA-seq),bioinformatics R RNA seq single cell diff --git a/data/resources/tag-csv/data-science.csv b/data/resources/tag-csv/data-science.csv deleted file mode 100644 index 3fdb8be..0000000 --- a/data/resources/tag-csv/data-science.csv +++ /dev/null @@ -1,10 +0,0 @@ -Resource,Tags -R for data science book,data science R -Modern statistics for modern biology,bioinformatics data science R statistics -Tidy Tuesday,data science python R -Harvard library data services youtube series on data management,data science harvard -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Riffomonas project: R tutorials for biologist on YouTube,data science data visualization R -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq -The Python Tutorial,data science data visualization python -Python for data science handbook,data science data visualization machine learning python diff --git a/data/resources/tag-csv/data-visualization.csv b/data/resources/tag-csv/data-visualization.csv deleted file mode 100644 index 95dc96d..0000000 --- a/data/resources/tag-csv/data-visualization.csv +++ /dev/null @@ -1,4 +0,0 @@ -Resource,Tags -Riffomonas project: R tutorials for biologist on YouTube,data science data visualization R -The Python Tutorial,data science data visualization python -Python for data science handbook,data science data visualization machine learning python diff --git a/data/resources/tag-csv/genome-assembly.csv b/data/resources/tag-csv/genome-assembly.csv deleted file mode 100644 index e016b9e..0000000 --- a/data/resources/tag-csv/genome-assembly.csv +++ /dev/null @@ -1,2 +0,0 @@ -Resource,Tags -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq diff --git a/data/resources/tag-csv/genomics.csv b/data/resources/tag-csv/genomics.csv deleted file mode 100644 index 0953e10..0000000 --- a/data/resources/tag-csv/genomics.csv +++ /dev/null @@ -1,5 +0,0 @@ -Resource,Tags -Data carpentry data wrangling for genomics,bioinformatics genomics variant calling -Bacterial and viral bioinformatics resource center,genomics metagenomics microbiology -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq diff --git a/data/resources/tag-csv/git.csv b/data/resources/tag-csv/git.csv deleted file mode 100644 index efef90d..0000000 --- a/data/resources/tag-csv/git.csv +++ /dev/null @@ -1,3 +0,0 @@ -Resource,Tags -Software carpentry novice git lesson,git unix -Missing semester of your CS education,git unix diff --git a/data/resources/tag-csv/harvard.csv b/data/resources/tag-csv/harvard.csv deleted file mode 100644 index e7971b7..0000000 --- a/data/resources/tag-csv/harvard.csv +++ /dev/null @@ -1,7 +0,0 @@ -Resource,Tags -Harvard library data services,data sciecne harvard -Harvard library data services youtube series on data management,data science harvard -FAS RC User Codes,harvard high-performance computing unix -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Harvard Research Computing quickstart guide,harvard high-performance computing unix -Harvard Statistics department consulting office hours,harvard statistics diff --git a/data/resources/tag-csv/high-performance-computing.csv b/data/resources/tag-csv/high-performance-computing.csv deleted file mode 100644 index dbc3e9b..0000000 --- a/data/resources/tag-csv/high-performance-computing.csv +++ /dev/null @@ -1,4 +0,0 @@ -Resource,Tags -FAS RC User Codes,harvard high-performance computing unix -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Harvard Research Computing quickstart guide,harvard high-performance computing unix diff --git a/data/resources/tag-csv/machine-learning.csv b/data/resources/tag-csv/machine-learning.csv deleted file mode 100644 index 3fa5ef3..0000000 --- a/data/resources/tag-csv/machine-learning.csv +++ /dev/null @@ -1,3 +0,0 @@ -Resource,Tags -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Python for data science handbook,data science data visualization machine learning python diff --git a/data/resources/tag-csv/metagenomics.csv b/data/resources/tag-csv/metagenomics.csv deleted file mode 100644 index 0f86231..0000000 --- a/data/resources/tag-csv/metagenomics.csv +++ /dev/null @@ -1,4 +0,0 @@ -Resource,Tags -Bacterial and viral bioinformatics resource center,genomics metagenomics microbiology -Metagenomics wiki,bioinformatics metagenomics -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq diff --git a/data/resources/tag-csv/microbiology.csv b/data/resources/tag-csv/microbiology.csv deleted file mode 100644 index a2e1191..0000000 --- a/data/resources/tag-csv/microbiology.csv +++ /dev/null @@ -1,2 +0,0 @@ -Resource,Tags -Bacterial and viral bioinformatics resource center,genomics metagenomics microbiology diff --git a/data/resources/tag-csv/nextflow.csv b/data/resources/tag-csv/nextflow.csv deleted file mode 100644 index e016b9e..0000000 --- a/data/resources/tag-csv/nextflow.csv +++ /dev/null @@ -1,2 +0,0 @@ -Resource,Tags -Bioinformatics workbook,bioinformatics data science genome assembly genomics metagenomics nextflow RNA seq diff --git a/data/resources/tag-csv/python.csv b/data/resources/tag-csv/python.csv deleted file mode 100644 index 1a9d032..0000000 --- a/data/resources/tag-csv/python.csv +++ /dev/null @@ -1,5 +0,0 @@ -Resource,Tags -Tidy Tuesday,data science python R -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -The Python Tutorial,data science data visualization python -Python for data science handbook,data science data visualization machine learning python diff --git a/data/resources/tag-csv/single-cell.csv b/data/resources/tag-csv/single-cell.csv deleted file mode 100644 index 03c0347..0000000 --- a/data/resources/tag-csv/single-cell.csv +++ /dev/null @@ -1,2 +0,0 @@ -Resource,Tags -Guided workflows for R package Suerat (for RNA-seq),bioinformatics R RNA seq single cell diff --git a/data/resources/tag-csv/statistics.csv b/data/resources/tag-csv/statistics.csv deleted file mode 100644 index 5b30cd9..0000000 --- a/data/resources/tag-csv/statistics.csv +++ /dev/null @@ -1,4 +0,0 @@ -Resource,Tags -Modern statistics for modern biology,bioinformatics data science R statistics -Data analysis for the life sciences Harvard EdX course,bioinformatics data science genomics harvard high-performance computing machine learning python R RNA seq statistics -Harvard Statistics department consulting office hours,harvard statistics diff --git a/data/resources/tag-csv/template.csv b/data/resources/tag-csv/template.csv deleted file mode 100644 index 484a142..0000000 --- a/data/resources/tag-csv/template.csv +++ /dev/null @@ -1 +0,0 @@ -Resource,Tags diff --git a/data/resources/tag-csv/unix.csv b/data/resources/tag-csv/unix.csv deleted file mode 100644 index eaaeb3c..0000000 --- a/data/resources/tag-csv/unix.csv +++ /dev/null @@ -1,5 +0,0 @@ -Resource,Tags -Software carpentry novice git lesson,git unix -Missing semester of your CS education,git unix -FAS RC User Codes,harvard high-performance computing unix -Harvard Research Computing quickstart guide,harvard high-performance computing unix diff --git a/data/resources/tag-csv/variant-calling.csv b/data/resources/tag-csv/variant-calling.csv deleted file mode 100644 index f29b756..0000000 --- a/data/resources/tag-csv/variant-calling.csv +++ /dev/null @@ -1,2 +0,0 @@ -Resource,Tags -Data carpentry data wrangling for genomics,bioinformatics genomics variant calling diff --git a/docs/resources/Workshops/R/R-workshop-2023-Part3.md b/docs/resources/Workshops/R/R-workshop-2023-Part3.md index c875c3b..fd3d795 100644 --- a/docs/resources/Workshops/R/R-workshop-2023-Part3.md +++ b/docs/resources/Workshops/R/R-workshop-2023-Part3.md @@ -74,7 +74,7 @@ Furthermore, while these basic plots are easy to generate, more complex plots, s ## Introduction to ggplot -[ggplot](https://ggplot2.tidyverse.org/) is a **package** (library of code with various functions) that is part of the **tidyverse**. It uses a somewhat standardized 'grammar of graphics' ([book](http://id.lib.harvard.edu/alma/990138412570203941/catalog); [paper](https://hollis.harvard.edu/permalink/f/1mdq5o5/TN_cdi_informaworld_taylorfrancis_310_1198_jcgs_2009_07098)) in its syntax to make almost every aspect of a plot customizable. Using ggplot it is easy to make reproducible scientific figures that look nice and are easily understandable. With ggplot, I rarely need to tweak my figures outside of R. +[ggplot](https://ggplot2.tidyverse.org/) is a **package** (library of code with various functions) that is part of the **tidyverse**. It uses a somewhat standardized 'grammar of graphics' ([book](https://id.lib.harvard.edu/alma/990138412570203941/catalog); [paper](https://hollis.harvard.edu/permalink/f/1mdq5o5/TN_cdi_informaworld_taylorfrancis_310_1198_jcgs_2009_07098)) in its syntax to make almost every aspect of a plot customizable. Using ggplot it is easy to make reproducible scientific figures that look nice and are easily understandable. With ggplot, I rarely need to tweak my figures outside of R. This workshop is also heavily influenced by the book [Fundamentals of Data Visualization](https://hollis.harvard.edu/permalink/f/1s5nto6/01HVD_ALMA512300276480003941) by [Claus Wilke](https://clauswilke.com/). diff --git a/docs/resources/Workshops/R/css/rmd_header.html b/docs/resources/Workshops/R/css/rmd_header.html index 9956e66..a85a95d 100644 --- a/docs/resources/Workshops/R/css/rmd_header.html +++ b/docs/resources/Workshops/R/css/rmd_header.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/resources/Workshops/R/end.html b/docs/resources/Workshops/R/end.html index 68037ac..b62dc0c 100644 --- a/docs/resources/Workshops/R/end.html +++ b/docs/resources/Workshops/R/end.html @@ -61,7 +61,7 @@
To begin, click the link for your operating system to bring up a list of files to download. diff --git a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day1.md b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day1.md index e2bf6df..d288d58 100644 --- a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day1.md +++ b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day1.md @@ -29,7 +29,7 @@ pre code { Welcome to the first day of the [FAS Informatics](https://informatics.fas.harvard.edu/) [Bioinformatics Tips & Tricks workshop](https://harvardinformatics.github.io/workshops/2023-fall/biotips/)! -If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit and execute the code in this file*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023_Day1-student.Rmd) +If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit and execute the code in this file*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023-Day1-student.Rmd) This is the workshop file that should be opened in RStudio. This is an RMarkdown file, meaning that both formatted text and code blocks can be added to it, and the code blocks can be executed from the RStudio interface. RStudio also has an easy to access **Terminal** tab which is how one would normally execute **Unix** commands. For this workshop, we will be going through this document and copy-pasting code blocks within it to run in the terminal to demonstrate some basic concepts. We will also be doing exercises directly in the terminal panel of RStudio. Once you've got the correct command, you can paste it into the RMarkdown document to keep a record. @@ -641,7 +641,7 @@ Let's take a look at a SAM file. We could use the typical bash commands like `ca ## SAMtools -[SAMtools](http://www.htslib.org/doc/samtools.html) is a suite of programs that are extremely useful for processing mapped reads and for downstream analysis. As stated above, SAM/BAM files from different programs are (mostly) interchangeable, so `samtools` will work with a file SAM/BAM file no matter what program produced it. It has a ton of functions (which you can check out on the [manual page](http://www.htslib.org/doc/samtools.html)), but we will go through several of the most common uses. +[SAMtools](https://www.htslib.org/doc/samtools.html) is a suite of programs that are extremely useful for processing mapped reads and for downstream analysis. As stated above, SAM/BAM files from different programs are (mostly) interchangeable, so `samtools` will work with a file SAM/BAM file no matter what program produced it. It has a ton of functions (which you can check out on the [manual page](https://www.htslib.org/doc/samtools.html)), but we will go through several of the most common uses. ### samtools view diff --git a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2-student.Rmd b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2-student.Rmd index a86b90d..08800b3 100644 --- a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2-student.Rmd +++ b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2-student.Rmd @@ -145,7 +145,7 @@ In addition to this optional fourth column for an ID, **bed** files have several For more information on bed files and these extra columns, visit the following links: - [Description of bed files from bedtools](https://bedtools.readthedocs.io/en/latest/content/general-usage.html) -- [Description of bed files from UCSC](http://genome.ucsc.edu/FAQ/FAQformat#format1) +- [Description of bed files from UCSC](https://genome.ucsc.edu/FAQ/FAQformat#format1) # Summarizing SVs from the command line diff --git a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2.md b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2.md index a75b466..53f8a9b 100644 --- a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2.md +++ b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day2.md @@ -26,7 +26,7 @@ pre code { Welcome to the second day of the [FAS Informatics](https://informatics.fas.harvard.edu/) [Bioinformatics Tips & Tricks workshop](https://harvardinformatics.github.io/workshops/2023-fall/biotips/)! -If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit and execute the code in this file*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023_Day2-student.Rmd) +If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit and execute the code in this file*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023-Day2-student.Rmd) Today we're going to continue our tour and explanation of common genomics file formats and their associated tools by talking about interval files, that is files which indicate regions of a genome (.bed files, .gff files). @@ -149,7 +149,7 @@ In addition to this optional fourth column for an ID, **bed** files have several For more information on bed files and these extra columns, visit the following links: - [Description of bed files from bedtools](https://bedtools.readthedocs.io/en/latest/content/general-usage.html) -- [Description of bed files from UCSC](http://genome.ucsc.edu/FAQ/FAQformat#format1) +- [Description of bed files from UCSC](https://genome.ucsc.edu/FAQ/FAQformat#format1) # Summarizing SVs from the command line diff --git a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day3.md b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day3.md index a09a46b..6b27a09 100644 --- a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day3.md +++ b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day3.md @@ -10,7 +10,7 @@ output: Welcome to the third day of the [FAS Informatics](https://informatics.fas.harvard.edu/) [Bioinformatics Tips and Tricks Workshop](https://harvardinformatics.github.io/workshops/2023-fall/biotips/)! -If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit and execute the code in this file*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023_Day3-student.Rmd) +If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit and execute the code in this file*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023-Day3-student.Rmd) Today we're going to continue our tour and explanation of common genomics file formats and their associated tools, starting with GFF files, which are typically used to store gene annotations. We'll then talk about VCF files, which are used to store variants. @@ -54,7 +54,7 @@ The format for encoding information about genic regions (commonly called a **gen For more detailed information on **GFF** files, see the following links: - [UCSC description (GFF2)](https://genome.ucsc.edu/FAQ/FAQformat.html#format3) -- [Ensembl description (GFF2)](http://www.ensembl.org/info/website/upload/gff.html?redirect=no) +- [Ensembl description (GFF2)](https://www.ensembl.org/info/website/upload/gff.html?redirect=no) - [NCBI description (GFF3)](https://www.ncbi.nlm.nih.gov/datasets/docs/v2/reference-docs/file-formats/annotation-files/about-ncbi-gff3/) - [GFF3 Specification](https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md) @@ -682,4 +682,4 @@ If everything is working, you should get the same answer as before. But now we c This is better, but it still requires a lot of typing if we want to run this on 10 files. If, for example, we wanted to compute SNP density separately for each chromosome, or for different interval types (e.g., genes, introns, exons), we'd have to type out each bed file separately. -We can get around this by using loops, which we'll cover next time \ No newline at end of file +We can get around this by using loops, which we'll cover next time diff --git a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4-instructor.html b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4-instructor.html index 4408bd0..b287b30 100644 --- a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4-instructor.html +++ b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4-instructor.html @@ -364,7 +364,7 @@
If you’re viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will edit the file while -executing code in the terminal. Please download the raw file here
+executing code in the terminal. Please download the raw file hereToday you’ll learn more about how to write scripts, control the behavior of your scripts using loops and conditional statements, and more!
diff --git a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4.md b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4.md index 92025df..e396c54 100644 --- a/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4.md +++ b/docs/resources/Workshops/Unix/Biotips-workshop-2023-Day4.md @@ -10,7 +10,7 @@ output: Welcome to the fourth day of the [FAS Informatics](https://informatics.fas.harvard.edu/) [Bioinformatics Tips and Tricks Workshop](https://harvardinformatics.github.io/workshops/2023-fall/biotips/)! -If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit the file while executing code in the terminal*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-spring/biotips/Biotips-workshop-2023_Day4-student.Rmd) +If you're viewing this file on the website, you are viewing the final, formatted version of the workshop. The workshop itself will take place in the RStudio program and you will *edit the file while executing code in the terminal*. Please download the raw file [here](https://harvardinformatics.github.io/workshops/2023-fall/biotips/Biotips-workshop-2023-Day4-student.Rmd) Today you'll learn more about how to write scripts, control the behavior of your scripts using loops and conditional statements, and more! diff --git a/docs/resources/Workshops/Unix/css/rmd_header.html b/docs/resources/Workshops/Unix/css/rmd_header.html index 9956e66..a85a95d 100644 --- a/docs/resources/Workshops/Unix/css/rmd_header.html +++ b/docs/resources/Workshops/Unix/css/rmd_header.html @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/docs/resources/Workshops/Unix/index.html b/docs/resources/Workshops/Unix/index.html index 52633e7..c96a761 100644 --- a/docs/resources/Workshops/Unix/index.html +++ b/docs/resources/Workshops/Unix/index.html @@ -123,7 +123,7 @@Gregg Thomas: A bioinformatics scientist in the FAS Informatics group at Harvard University and recent postdoc - at the University of Montana where he studied the phylogenetics and comparative + at the University of Montana where he studied the phylogenetics and comparative genomics of the mouse and rat radiation. He got his PhD at Indiana University where he worked on comparative genomics of arthropods, mutation rate evolution in primates, and convergent evolution using comparative genomics. In general, Gregg uses and develops computational methods to study molecular evolution and phylogenetics to determine what forces drive divergence and @@ -207,7 +207,7 @@
Program | Author | Year | Use cases | Link | Paper |
---|---|---|---|---|---|
bedtools | Quinnlan and Hall | 2010 | Perform operations on sets of genomic coordinates. | Website | Paper |
bcftools | NA | NA | Perform operations on VCF and BCF formatted files. | Website | NA |
samtools | Li | 2009 | Perform operations on SAM/BAM/CRAM formatted files. | Website | Paper |
Picard tools | Broad Institute | 2019 | Performs many operations on SAM/BAM/CRAM and VCF files. | Website | Paper |
gffread | Pertea & Pertea | 2020 | General purpose GFF file manipulation | Website | Paper |
seqtk | Li | NA | A fast and lightweight tool for processing sequences in the FASTA or FASTQ format | Website | NA | Program | Author | Year | Use cases | Link | Paper |
bedtools | Quinnlan and Hall | 2010 | Perform operations on sets of genomic coordinates. | Website | Paper |
bcftools | NA | NA | Perform operations on VCF and BCF formatted files. | Website | NA |
samtools | Li | 2009 | Perform operations on SAM/BAM/CRAM formatted files. | Website | Paper |
Picard tools | Broad Institute | 2019 | Performs many operations on SAM/BAM/CRAM and VCF files. | Website | Paper |
gffread | Pertea & Pertea | 2020 | General purpose GFF file manipulation | Website | Paper |
seqtk | Li | NA | A fast and lightweight tool for processing sequences in the FASTA or FASTQ format | Website | NA |