nextstrain · jameshadfield · Jul 3, 2023 · Jul 2, 2023 · Jul 4, 2023 · Jul 5, 2023
diff --git a/src/sources/core.js b/src/sources/core.js
@@ -39,18 +39,25 @@ class CoreSource extends Source {
   async collectResources() {
     if (!this._allResources) this._allResources = new Map();
     const s3objects = await parseInventory();
-    const datasets = new Map();
+    const datasets = new Map(), files = new Map();
     s3objects.forEach((object) => {
       const [name, resourceType] = CoreCollectedResources.objectName(object);
       if (!name) return;
-      if (resourceType!=='dataset') return;
-      datasets.has(name) ? datasets.get(name).push(object) : datasets.set(name, [object]);
+      if (resourceType === 'dataset') {
+        datasets.has(name) ? datasets.get(name).push(object) : datasets.set(name, [object]);
+      } else if (resourceType === 'file') {
+        files.has(name) ? files.get(name).push(object) : files.set(name, [object]);
+      }
     })
     this._allResources.set(
       'dataset',
       Array.from(datasets).map(([, objects]) => new CoreCollectedResources(this, objects))
     );
-    // TODO XXX narratives + files (etc)
+    this._allResources.set(
+      'file',
+      Array.from(files).map(([, objects]) => new CoreCollectedResources(this, objects))
+    );
+    // TODO XXX narratives
   }
 
   // DEPRECATED availableDatasets is used by /charon/getAvailable and will be replaced once we move to a new API
@@ -180,11 +187,17 @@ class CoreCollectedResources extends CollectedResources {
   }
 
   nextstrainUrl(version=this._versions[0]) {
-    if (this._resourceType!=='dataset') return false;
-    // if the version is not the latest (in S3 terminology), then we don't yet have the
-    // ability to access it via Auspice. Or perhaps we do via /fetch? TODO
-    if (version.IsLatest!=="true") return false;
-    return version.Key.replace('.json', '').replace(/_/g, '/')
+    if (this._resourceType === 'file') {
+      if (version.IsLatest!=="true") return false;
+      return `https://nextstrain-data.s3.amazonaws.com/${version.Key}`
+    }
+    if (this._resourceType === 'dataset') {
+      if (version.IsLatest!=="true") return false;
+      // if the version is not the latest (in S3 terminology), then we don't yet have the
+      // ability to access it via Auspice. Or perhaps we do via /fetch? TODO
+      return version.Key.replace('.json', '').replace(/_/g, '/')
+    }
+    return false;
   }
 }
 

diff --git a/src/utils/inventories.js b/src/utils/inventories.js
@@ -75,15 +75,36 @@ const DATESTAMP_REGEX = /_\d{4}-\d{2}-\d{2}$/;
  */
 const coreBucketKeyMunger = (object) => {
   const key = object.Key;
+  let name;
   /* now-deleted keys are not currently displayed, but we could consider grouping
   them with current keys if applicable */
   if (object.deleted) return [false];
-  /* keys with a directory-like structure + non-JSON keys are not considered,
-  but they will be when we start listing available intermediate files */
-  if (key.includes("/")) return [false];
+  /* keys with a directory-like structure may be 'files', but we perform a bunch of ad-hoc filtering
+  as we have a huge number of potential files in the core bucket and we don't want to expose them all! */
+  if (key.includes("/")) {
+    if (key.startsWith('files/')) {
+      if (
+        key.includes('/archive/')
+        || key.includes('/test/')
+        || key.includes('/branch/')
+        || key.includes('/trial/')
+        || key.includes('/test-data/')
+        || key.startsWith('jen_test/')
+        || key.match(/\/\d{4}-\d{2}-\d{2}_results.json/) // forecasts-ncov
+        || key.endsWith('.png')                          // forecasts-ncov
+      ) {
+        return [false];
+      }
+      name = key.replace(/^files\//, '')  // don't want the files/ prefix in the displayed name
+        .replace(/^workflows\//, '')      // (similarly for workflows/)
+      return [name, 'file']
+    }
+    return [false];
+  }
+
   if (!key.endsWith('.json')) return [false];
   /* Attempts to exclude sidecar files, manifests, search results etc */
-  let name = key.replace(/\.json$/, '');
+  name = key.replace(/\.json$/, '');
   for (const suffix of SIDECARS) {
     if (name.endsWith(suffix)) return [false];
   }

diff --git a/static-site/gatsby-node.js b/static-site/gatsby-node.js
@@ -255,6 +255,10 @@ exports.createPages = ({graphql, actions}) => {
           path: "/pathogens",
           component: path.resolve("src/sections/pathogens.jsx")
         });
+        createPage({
+          path: "/pathogens/inputs",
+          component: path.resolve("src/sections/remote-inputs.jsx")
+        });
 
         /* NOTE: we are using "influenza" URLs for dev purposes only. This will be switched to "flu"
         when this functionality is released & publicized. For unknown reasons, if the component is named

diff --git a/static-site/src/components/CardsV2/card-component.jsx b/static-site/src/components/CardsV2/card-component.jsx
@@ -36,7 +36,7 @@ export const Card = ({data, outer=false}) => {
         color: '#4F4B50',
       }}>
         {outer && Logo(data.name)}
-        <Name name={data.name} isDataset={!!data.url}/>
+        <Name name={data.name} url={data.url}/>
         <SparkLine versions={data.versions || []} onClick={()=>{setDetails(details==='versions'?'':'versions')}}/>
       </div>
 
@@ -170,19 +170,22 @@ function Versions({card}) {
 /**
  * <Name> is the element for a collection's title. It may or may not be a link.
  */
-function Name({name, isDataset}) {
+function Name({name, url}) {
   const prettyName = name.replace(/\//g, " / ")
-
-  if (!isDataset) return (
+  if (!url) return (
     <div
       data-tooltip-id="iconTooltip" data-tooltip-place="top"
       data-tooltip-content={"Not an actual dataset - sort of an internal node in the naming hierarchy"}
     >
       {prettyName}
     </div>
   );
+  /* The structure of URLs in the API response is a WIP */
+  const href = url.startsWith('http') ?
+    url :
+    `https://nextstrain.org/${url}`;
   return (
-    <a href={`https://nextstrain.org/${name}`} target="_blank" rel="noreferrer"
+    <a href={href} target="_blank" rel="noreferrer"
       data-tooltip-id="iconTooltip" data-tooltip-place="top"
       data-tooltip-content={"Click to view the (current) dataset"}
       style={{ fontSize: '1.8rem', fontWeight: '300'}}

diff --git a/static-site/src/sections/remote-inputs.jsx b/static-site/src/sections/remote-inputs.jsx
@@ -0,0 +1,41 @@
+import React from "react";
+import {Link} from 'gatsby';
+import {SmallSpacer,HugeSpacer,FlexCenter} from "../layouts/generalComponents";
+import * as splashStyles from "../components/splash/styles";
+import GenericPage from "../layouts/generic-page";
+import CardsV2 from "../components/CardsV2/index";
+
+const title = "Nextstrain-maintained pathogen analyses";
+const abstract = (
+  <>  
+    This page lists Nextstrain's publicly available intermediate files. 
+    Most of these files are used to generate phylogenetic analyses shown on{` `}
+    <Link to="/">the (core) pathogens page</Link>
+    .
+  </>
+);
+
+class Index extends React.Component {
+  render() {
+    console.log('<Pathogens>')
+    return (
+      <GenericPage location={this.props.location}>
+        <splashStyles.H1>{title}</splashStyles.H1>
+        <SmallSpacer />
+
+        <FlexCenter>
+          <splashStyles.CenteredFocusParagraph>
+            {abstract}
+          </splashStyles.CenteredFocusParagraph>
+        </FlexCenter>
+
+        <HugeSpacer />
+        <CardsV2 apiQuery={'prefix=/&versions&type=file'} dataType='file'/>
+        <HugeSpacer />
+      </GenericPage>
+    );
+  }
+}
+
+
+export default Index;