major farmshare updates

stanford-rc · Oct 17, 2024 · 93a2e28 · 93a2e28
1 parent aaae732
commit 93a2e28
Show file tree

Hide file tree

Showing 72 changed files with 1,341 additions and 450 deletions.
diff --git a/Gemfile b/Gemfile
@@ -5,7 +5,7 @@ source 'https://rubygems.org'
 git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
 
 # Synchronize with https://pages.github.com/versions
-ruby '3.0.4'
+ruby '3.3.4'
 
 gem 'github-pages', group: :jekyll_plugins
 

diff --git a/_config.yml b/_config.yml
@@ -12,9 +12,9 @@
 # below with those in `_config_options.yml` from the
 # library. E.g, to customise for Cirrus at EPCC, running
 # Slurm, we could replace the options below with those from
-# 
+#
 # _includes/snippets_library/EPCC_Cirrus_slurm/_config_options.yml
-# 
+#
 # If your cluster is not represented in the library, please
 # copy an existing folder, rename it, and customize for your
 # installation. Remember to keep the leading slash on the
@@ -32,23 +32,23 @@
 #
 # Compute responsibly.
 
-snippets: "/snippets_library/HPCC_MagicCastle_slurm"
+snippets: "/snippets_library/SRC_FarmShare_slurm"
 
 local:
   prompt: "[you@laptop:~]$"
   bash_shebang: "#!/usr/bin/env bash"
 
 remote:
-  name: "HPC Carpentry's Cloud Cluster"
-  login: "cluster.hpc-carpentry.org"
-  portal: "https://mokey.cluster.hpc-carpentry.org"
-  host: "login1"
-  node: "node1"
+  name: "FarmShare"
+  login: "login.farmshare.stanford.edu"
+  portal: "https://ondemand.farmshare.stanford.edu"
+  host: "rice-02"
+  node: "wheat-01"
   location: "cluster.hpc-carpentry.org"
-  homedir: "/home"
-  user: "yourUsername"
-  module_python3: "Python"
-  prompt: "[yourUsername@login1 ~]$"
+  homedir: "/home/"
+  user: "SUNetID"
+  module_python3: "python"
+  prompt: "[SUNetID@rice-02:~]$"
   bash_shebang: "#!/bin/bash"
 
 sched:
@@ -57,11 +57,11 @@ sched:
     name: "sbatch"
     options: ""
   queue:
-    debug: "smnode"
+    debug: "wheat-01"
     testing: "cpubase_bycore_b1"
   status: "squeue"
   flag:
-    user: "-u yourUsername"
+    user: "-u $USER"
     interactive: ""
     histdetail: "-l -j"
     name: "-J"
@@ -73,7 +73,7 @@ sched:
   interactive: "srun"
   info: "sinfo"
   comment: "#SBATCH"
-  hist: "sacct -u yourUsername"
+  hist: "sacct -u $USER"
   hist_filter: ""
 
 episode_order:
@@ -97,7 +97,7 @@ episode_order:
 # "dc":  Data Carpentry
 # "lc":  Library Carpentry
 # "cp":  Carpentries (e.g., instructor training)
-carpentry: "incubator"
+carpentry: "cp"
 
 # Overall title for pages.
 title: "Introduction to High-Performance Computing"
@@ -106,7 +106,7 @@ title: "Introduction to High-Performance Computing"
 email: "[email protected]"
 
 # Life cycle stage of the lesson ("pre-alpha", "alpha", "beta", "stable")?
-life_cycle: "alpha"
+life_cycle: "stable"
 
 #------------------------------------------------------------
 # Generic settings (should not need to change).

diff --git a/_episodes/10-hpc-intro.md b/_episodes/10-hpc-intro.md
@@ -27,7 +27,7 @@ of the desktop or laptop computer where they started:
 <div class="row">
   <div class="col-sm-6 col-md-4">
     <div class="thumbnail">
-      <img src="/hpc-intro/fig/dna-solid.svg" width="30%" height="30%" alt="icon of strand of DNA">
+      <img src="/hpc-intro/fig/dna-solid.svg" width="30%" height="30%" alt="...">
       <div class="caption">
         <h3>Genomics</h3>
         <p>A genomics researcher has been using small datasets of sequence data,
@@ -43,23 +43,23 @@ of the desktop or laptop computer where they started:
 
   <div class="col-sm-6 col-md-4">
     <div class="thumbnail">
-      <img src="/hpc-intro/fig/cubes-solid.svg" width="38%" height="38%" alt="icon of three cubes stacked on top of each other">
+      <img src="/hpc-intro/fig/cubes-solid.svg" width="38%" height="38%" alt="...">
       <div class="caption">
         <h3>Engineering</h3>
         <p>An engineer is using a fluid dynamics package that has an option to
-           run in parallel. In this research problem, the calculations in each
-           region of the simulation are largely independent of calculations in
-           other regions of the simulation. It's possible to run each region's
-           calculations simultaneously (in <b>parallel</b>), communicate selected
-           results to adjacent regions as needed, and repeat the calculations to
-           converge on a final set of results.</p>
+           run in parallel. In this research problem, the calculations in each region
+           of the simulation are largely independent of calculations in other regions
+           of the simulation. It's possible to run each region's calculations
+           simultaneously (in <b>parallel</b>), communicate selected results to
+           adjacent regions as needed, and repeat the calculations to converge on a
+           final set of results.</p>
       </div>
     </div>
   </div>
 
   <div class="col-sm-6 col-md-4">
     <div class="thumbnail">
-      <img src="/hpc-intro/fig/book-open-solid.svg" width="38%" height="38%" alt="icon of an open book">
+      <img src="/hpc-intro/fig/book-open-solid.svg" width="38%" height="38%" alt="...">
       <div class="caption">
         <h3>Humanities</h3>
         <p>A graduate student is using a named entity recognizer to identify named

diff --git a/_episodes/11-connecting.md b/_episodes/11-connecting.md
@@ -19,14 +19,13 @@ keypoints:
 ## Secure Connections
 
 The first step in using a cluster is to establish a connection from our laptop
-to the cluster. When we are sitting at a computer (or standing, or holding it
-in our hands or on our wrists), we have come to expect a visual display with
-icons, widgets, and perhaps some windows or applications: a _graphical user
-interface_, or GUI. Since computer clusters are remote resources that we
-connect to over slow or intermittent interfaces (WiFi and VPNs especially), it
-is more practical to use a _command-line interface_, or CLI, to send commands
-as plain-text. If a command returns output, it is printed as plain text as
-well. The commands we run today will not open a window to show graphical
+to the cluster. When we are sitting at a computer, we have come to expect a
+visual display with icons, widgets, and perhaps some windows or applications:
+a _graphical userinterface_, or GUI. Since computer clusters are remote resources
+that we connect to over slow or intermittent interfaces (WiFi and VPNs
+especially), it is more practical to use a _command-line interface_, or CLI, to
+send commands as plain-text. If a command returns output, it is printed as plain
+text as well. The commands we run today will not open a window to show graphical
 results.
 
 If you have ever opened the Windows Command Prompt or macOS Terminal, you have
@@ -53,17 +52,9 @@ email address: the "@" symbol is used to separate the personal ID from the
 address of the remote machine.
 
 When logging in to a laptop, tablet, or other personal device, a username,
-password, or pattern are normally required to prevent unauthorized access. In
-these situations, the likelihood of somebody else intercepting your password is
-low, since logging your keystrokes requires a malicious exploit or physical
-access. For systems like {{ site.remote.host }} running an SSH server, anybody
-on the network can log in, or try to. Since usernames are often public or easy
-to guess, your password is often the weakest link in the security chain. Many
-clusters therefore forbid password-based login, requiring instead that you
-generate and configure a public-private key pair with a much stronger password.
-Even if your cluster does not require it, the next section will guide you
-through the use of SSH keys and an SSH agent to both strengthen your security
-_and_ make it more convenient to log in to remote systems.
+password, or pattern are normally required to prevent unauthorized access.
+In addition to your Stanford password, you will be required to use Duo Two-Factor
+Authentication.
 
 ## Log In to the Cluster
 
@@ -136,7 +127,7 @@ Great, we know where we are! Let's see what's in our current directory:
 ```
 {: .language-bash}
 ```
-id_ed25519.pub
+  afs-home    go
 ```
 {: .output}
 
@@ -150,8 +141,8 @@ double-check, include hidden files in your directory listing:
 ```
 {: .language-bash}
 ```
-  .            .bashrc           id_ed25519.pub
-  ..           .ssh
+  .            .bashrc           afs-home
+  ..           .ssh              go
 ```
 {: .output}
 

diff --git a/_episodes/12-cluster.md b/_episodes/12-cluster.md
@@ -73,9 +73,10 @@ devices are anchored to the "root" directory, which is `/`:
 ```
 {: .language-bash}
 ```
-bin   etc   lib64  proc  sbin     sys  var
-boot  {{ site.remote.homedir | replace: "/", "" }}  mnt    root  scratch  tmp  working
-dev   lib   opt    run   srv      usr
+afs   etc         lib32     lost+found  proc     sbin       srv        usr
+bin   farmshare   lib64     media       root     scratch    swap.img   var
+boot  {{ site.remote.homedir | replace: "/", "" }}        mnt       root        snap     tmp        sys
+dev   lib         libx32    opt         run      software   tmp
 ```
 {: .output}
 
@@ -89,18 +90,12 @@ system files and change as you install new software or upgrade your OS.
 > These differ in both the amount of space allocated and whether or not they
 > are backed up.
 >
-> * __Home__ -- often a _network filesystem_, data stored here is available
+> * __Home__ (`$HOME`)-- often a _network filesystem_, data stored here is available
 >   throughout the HPC system, and often backed up periodically. Files stored
 >   here are typically slower to access, the data is actually stored on another
 >   computer and is being transmitted and made available over the network!
-> * __Scratch__ -- typically faster than the networked Home directory, but not
+> * __Scratch__ (`$SCRATCH`)-- typically faster than the networked Home directory, but not
 >   usually backed up, and should not be used for long term storage.
-> * __Work__ -- sometimes provided as an alternative to Scratch space, Work is
->   a fast file system accessed over the network. Typically, this will have
->   higher performance than your home directory, but lower performance than
->   Scratch; it may not be backed up. It differs from Scratch space in that
->   files in a work file system are not automatically deleted for you: you must
->   manage the space yourself.
 {: .callout}
 
 ## Nodes
@@ -131,18 +126,17 @@ This may show only your user ID, but there are likely several other people
 
 > ## Dedicated Transfer Nodes
 >
-> If you want to transfer larger amounts of data to or from the cluster, some
-> systems offer dedicated nodes for data transfers only. The motivation for
+> If you want to transfer larger amounts of data to or from the cluster, SRC
+> offers dedicated nodes for data transfers only. The motivation for
 > this lies in the fact that larger data transfers should not obstruct
-> operation of the login node for anybody else. Check with your cluster's
-> documentation or its support team if such a transfer node is available. As a
-> rule of thumb, consider all transfers of a volume larger than 500 MB to 1 GB
-> as large. But these numbers change, e.g., depending on the network connection
-> of yourself and of your cluster or other factors.
+> operation of the login node for anybody else. As a rule of thumb, consider all
+> transfers of a volume larger than 500 MB to 1 GB as large. But these numbers
+> change, e.g., depending on the network connection of yourself and of your
+> cluster or other factors.
 {: .callout}
 
 The real work on a cluster gets done by the _compute_ (or _worker_) _nodes_.
-compute nodes come in many shapes and sizes, but generally are dedicated to long
+Compute nodes come in many shapes and sizes, but generally are dedicated to long
 or hard tasks that require a lot of computational resources.
 
 All interaction with the compute nodes is handled by a specialized piece of
@@ -208,20 +202,30 @@ connect to a shared, remote fileserver or cluster of servers.
 > >
 > > * Run system utilities
 > >   ```
+> >   # Linux
 > >   {{ site.local.prompt }} nproc --all
 > >   {{ site.local.prompt }} free -m
+> >
+> >   # MacOS
+> >   {{ site.local.prompt }} sysctl -n hw.ncpu
 > >   ```
 > >   {: .language-bash}
 > >
 > > * Read from `/proc`
 > >   ```
+> >   # Linux
 > >   {{ site.local.prompt }} cat /proc/cpuinfo
 > >   {{ site.local.prompt }} cat /proc/meminfo
+> >
+> >   # MacOS
+> >   {{ site.local.prompt }} sysctl -a | grep machdep.cpu
+> >   {{ site.local.prompt }} vm_stat
 > >   ```
 > >   {: .language-bash}
 > >
 > > * Run system monitor
 > >   ```
+> >   # Linux, and can be installed on MacOS
 > >   {{ site.local.prompt }} htop
 > >   ```
 > >   {: .language-bash}
@@ -246,6 +250,7 @@ connect to a shared, remote fileserver or cluster of servers.
 > >
 > > ```
 > > {{ site.remote.prompt }} less /proc/meminfo
+> > # Use "q" to exit
 > > ```
 > > {: .language-bash}
 > >

diff --git a/_episodes/13-scheduler.md b/_episodes/13-scheduler.md
@@ -299,7 +299,7 @@ return of your command prompt indicates that the request to cancel the job was
 successful.
 
 ```
-{{ site.remote.prompt }} {{site.sched.del }} 38759
+{{ site.remote.prompt }} {{site.sched.del }} 277347
 # It might take a minute for the job to disappear from the queue...
 {{ site.remote.prompt }} {{ site.sched.status }} {{ site.sched.flag.user }}
 ```

diff --git a/_episodes/14-environment-variables.md b/_episodes/14-environment-variables.md
@@ -46,15 +46,14 @@ $ set
 {: .language-bash}
 
 ~~~
-COMPUTERNAME=TURING
-HOME=/home/vlad
-HOSTNAME=TURING
-HOSTTYPE=i686
-NUMBER_OF_PROCESSORS=4
-PATH=/Users/vlad/bin:/usr/local/git/bin:/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin
-PWD=/home/vlad
-UID=1000
-USERNAME=vlad
+BASH=/bin/bash
+BASHOPTS=checkwinsize:cmdhist:complete_fullquote:expand_aliases:extglob:extquote:force_fignore:globasciiranges:histappend:interactive_comments:login_shell:progcomp:promptvars:sourcepath
+BASH_ALIASES=()
+BASH_ARGC=([0]="0")
+BASH_ARGV=()
+BASH_CMDS=()
+BASH_COMPLETION_VERSINFO=([0]="2" [1]="11")
+BASH_ENV=/software/spack/opt/spack/linux-ubuntu22.04-x86_64_v3/gcc-12.3.0/lmod-8.7.24-zo2r3he7kqr2ohenyvha5mmsxh7t3x54/lmod/lmod/init/bash
 ...
 ~~~
 {: .output}
@@ -97,7 +96,7 @@ $ echo $HOME
 {: .language-bash}
 
 ~~~
-/home/vlad
+/home/users/SUNetID
 ~~~
 {: .output}
 
@@ -222,27 +221,31 @@ To show how this works,
 here are the components of `PATH` listed one per line:
 
 ~~~
-/Users/vlad/bin
-/usr/local/git/bin
-/usr/bin
-/bin
+/home/users/SUNetID/bin
+/home/users/SUNetID/.local/bin
+/usr/local/sbin
+/usr/local/bin
 /usr/sbin
+/usr/bin
 /sbin
-/usr/local/bin
+/bin
+/usr/games
+/usr/local/games
+/snap/bin
 ~~~
 {: .output}
 
 On our computer,
 there are actually three programs called `analyze`
 in three different directories:
-`/bin/analyze`,
+`/home/users/SUNetID/bin/analyze`,
 `/usr/local/bin/analyze`,
-and `/users/vlad/analyze`.
+and `/bin/analyze`.
 Since the shell searches the directories in the order they're listed in `PATH`,
-it finds `/bin/analyze` first and runs that.
-Notice that it will *never* find the program `/users/vlad/analyze`
+it finds `/home/users/SUNetID/bin/analyze` first and runs that.
+Notice that it will *never* find the program `/scratch/users/SUNetID/analyze`
 unless we type in the full path to the program,
-since the directory `/users/vlad` isn't in `PATH`.
+since the directory `/scratch/users/SUNetID` isn't in `PATH`.
 
 This means that I can have executables in lots of different places as long as
 I remember that I need to to update my `PATH` so that my shell can find them.