From da2c98a3cded17dcf4632936d3ea3160d0a9ce08 Mon Sep 17 00:00:00 2001
From: Paul McCarthy <pauldmccarthy@gmail.com>
Date: Thu, 30 Dec 2021 12:06:33 +0000
Subject: [PATCH] ENH,RF: Re-arrange code to incorporate update of externally
 hosted packages

---
 share/fsl/sbin/update_fsl_package | 139 +++++++++++++++++++-----------
 1 file changed, 90 insertions(+), 49 deletions(-)

diff --git a/share/fsl/sbin/update_fsl_package b/share/fsl/sbin/update_fsl_package
index f6956a4..f38209c 100755
--- a/share/fsl/sbin/update_fsl_package
+++ b/share/fsl/sbin/update_fsl_package
@@ -246,12 +246,10 @@ class Package:
 
 
 @ft.lru_cache
-def query_installed_packages(include_external : bool) -> Dict[str, Package]:
+def query_installed_packages() -> Dict[str, Package]:
     """Uses conda to find out the versions of all packages installed in
-    $FSLDIR, which are sourced from the FSL conda channels.
-
-    If "include_external" is True, externally hosted packages that are listed
-    in EXTERNALLY_HOSTED_PACKAGES are also queried.
+    $FSLDIR, and which are sourced from the FSL conda channels, or
+    which are listed in EXTERNALLY_HOSTED_PACKAGES.
 
     Returns a dict of {pkgname : Package} mappings. The "dependencies"
     attributes of the package objects are not populated.
@@ -277,7 +275,7 @@ def query_installed_packages(include_external : bool) -> Dict[str, Package]:
         internal = pkg['base_url'].rstrip() in channels
         external = pkg['name']              in EXTERNALLY_HOSTED_PACKAGES
 
-        if internal or (external and include_external):
+        if internal or external:
             pkgs[pkg['name']] = Package(pkg['name'],
                                         pkg['version'],
                                         pkg['channel'],
@@ -286,26 +284,35 @@ def query_installed_packages(include_external : bool) -> Dict[str, Package]:
 
 
 @ft.lru_cache
-def download_external_package_metadata(pkgname     : str,
-                                       channel_url : str,
-                                       development : bool) -> Optional[Package]:
+def download_package_metadata(pkgname     : str,
+                              channel     : str,
+                              development : bool) -> Optional[Package]:
     """Downloads metadata about one externally hosted package. The returned
-    Package object does not contain platform or dependency information.
+    Package object does not contain information about dependencies.
+
+    Returns None if the package does not appear to be hosted on the channel,
+    or if there are no suitable versions available for the host platform.
+
+    pkgname:     Name of package to lookup
+    channel:     Name of channel on anaconda.org. Can also be a full channel
+                 URL
+    development: Whether to consider development versions of packages
     """
 
-    # if channel_url is a full url, we
+    # if we've been given a full url, we
     # download the full channel metdata
-    # and look up the package
-    if any(channel_url.startswith(p) for p in ('https:', 'http:', 'file:')):
+    # and look up the package. This is
+    # expensive if downloadintg from
+    # anaconda.org
+    if any(channel.startswith(p) for p in ('https:', 'http:', 'file:')):
         chandata = download_channel_metadata(channel_url)
         pkgs     = identify_packages([chandata], [pkgname], development)
         return pkgs.get(pkgname, [None])[-1]
 
-    # Othrerwise channel_url is the name
-    # of an anaconda.org channel - we
-    # just retrieve information about
-    # the package
-    channel     = channel_url
+    # Otherwise channel_url is the name of
+    # an anaconda.org channel - we just
+    # retrieve information about the one
+    # package using the HTTP API.
     api_url     = f'https://api.anaconda.org/package/{channel}/'
     channel_url = f'https://anaconda.org/{channel}/'
 
@@ -385,19 +392,17 @@ def download_channel_metadata(channel_url : str, **kwargs) -> Tuple[Dict, Dict]:
     return chandata, platdata
 
 
-def identify_packages(
+def parse_channel_metadata(
         channeldata : List[Tuple[Dict, Dict]],
         pkgnames    : Sequence[str],
         development : bool
-) -> Dict[str, List[Package]]:
-    """Return metadata about the requested packages.
+) -> Dict[str, Package]:
+    """Extract metadata about the requested packages from the channel metadata.
 
-    Loads channel and platform metadata from the conda channels. Parses the
-    metadata, and creates a Package object for every requested package.
+    Parses the channel metadata, and creates a Package object for every
+    requested package.
 
-    Returns a dict of {name : [Package]} mappings, where each entry contains
-    Package objects for all available versions of the packae, ordered from
-    oldest (first) to newest (last).
+    Returns a dict of {name : Package} mappings.
 
     channeldata:  Sequence of channel data from one or more conda channels, as
                   returned by the download_channel_metadata function.
@@ -414,13 +419,18 @@ def identify_packages(
             if pkgname in cdata['packages']:
                 pkgchannels[pkgname] = (cdata, pdata)
                 break
-        # This package is not available
+        # This package is not present in
+        # the provided channel metadata
         else:
             log.debug(f'Package {pkgname} is not available - ignoring.')
             continue
 
     # Create Package objects for every available version of
-    # the requested packages. The packages dict has structure
+    # the requested packages. Information about available
+    # versions is not necessarily sorted, so we have to
+    # parse and sort every entry to find the most recent.
+    #
+    # The packages dict has structure
     #
     # {pkgname : [Package, Package, ...]}
     #
@@ -437,10 +447,12 @@ def identify_packages(
                     continue
                 bisect.insort(packages[pkgname], pkg)
 
-    return packages
+    # After sorting from oldest->newest we can just
+    # return the newest version for each package
+    return {pkgname : pkgs[-1] for pkgname, pkgs in packages.items()}
 
 
-def filter_packages(packages : Dict[str, List[Package]]) -> List[Package]:
+def filter_packages(packages : Dict[str, Package]) -> List[Package]:
     """Identifies the versions of packages that should be installed.
 
     Removes packages that are not installed, or that are already up to date.
@@ -451,7 +463,7 @@ def filter_packages(packages : Dict[str, List[Package]]) -> List[Package]:
 
     filtered = []
 
-    for pkgname, pkgs in packages.items():
+    for pkgname, pkg in packages.items():
 
         # Find the Package object corresponding
         # to the installed version
@@ -461,11 +473,6 @@ def filter_packages(packages : Dict[str, List[Package]]) -> List[Package]:
             log.debug(f'Package {pkgname} is not installed - ignoring')
             continue
 
-        # select the newest available
-        # version of the package as
-        # the installation candidate
-        pkg = pkgs[-1]
-
         if pkg <= installed:
             log.debug(f'{pkg.name} is already up to date (available: '
                       f'{pkg.version}, installed: {installed.version}) '
@@ -535,6 +542,8 @@ def parse_args(argv : Optional[Sequence[str]]) -> argparse.Namespace:
                         help='Install package[s] without prompting for confirmation')
     parser.add_argument('-a', '--all', action='store_true',
                         help='Update all installed FSL packages')
+    parser.add_argument('-e', '--external', action='store_true',
+                        help='Consider externally hosted packages')
 
     parser.add_argument('--internal', help=argparse.SUPPRESS)
     parser.add_argument('--username', help=argparse.SUPPRESS)
@@ -547,6 +556,11 @@ def parse_args(argv : Optional[Sequence[str]]) -> argparse.Namespace:
         parser.error('Specify at least one package, or use --all '
                      'to update all installed FSL packages.')
 
+    # externally hosted package has been requested
+    if (len(args.package) > 0) and \
+       any(args.package in EXTERNALLY_HOSTED_PACKAGES):
+        args.external = True
+
     return args
 
 
@@ -568,10 +582,39 @@ def main(argv : Sequence[str] = None):
     if args.verbose: log.setLevel(logging.DEBUG)
     else:            log.setLevel(logging.INFO)
 
-    # Download information about all
-    # available packages on the FSL
-    # conda channels.
-    print('Downloading FSL conda channel metadata ...')
+    # Build a list of all packages
+    # to consider for the update
+    print('Building FSL package list...')
+    if args.all:
+        pkgnames = list(query_installed_packages().keys())
+    else:
+        pkgnames = args.package
+
+    # build a dict of {pkgname : Package}
+    # mappings for all candidate packages
+    packages = {}
+
+    # We start with externally hosted
+    # packages (e.g. from conda-forge),
+    # if requested
+    if args.external:
+        if args.all:
+            extpkgs = list(EXTERNALLY_HOSTED_PACKAGES.keys())
+        else:
+            extpkgs = [p for p in pkgnames if p in EXTERNALLY_HOSTED_PACKAGES]
+
+        print(f'Downloading externally hosted package metadata...')
+        for pkgname in extpkgs:
+            pkg = download_package_metadata(
+                pkgname,
+                EXTERNALLY_HOSTED_PACKAGES[pkgname],
+                args.development)
+            if pkg is not None:
+                packages[pkgname] = pkg
+
+    # Download information about all available
+    # packages on the FSL conda channels.
+    print('Downloading FSL conda channel metadata...')
     channeldata = [download_channel_metadata(PUBLIC_FSL_CHANNEL)]
     if args.internal:
         channeldata.insert(0, download_channel_metadata(
@@ -579,16 +622,14 @@ def main(argv : Sequence[str] = None):
             username=args.username,
             password=args.password))
 
-    print('Building FSL package list ...')
-    if args.all:
-        packages = list(query_installed_packages(args.external).keys())
-    else:
-        packages = args.package
+    # Extract metadata for all requested
+    # packages from the FSL channel metadata
+    packages.update(parse_channel_metadata(
+        channeldata,
+        pkgnames,
+        args.development))
 
-    # Identify the versions that are
-    # available for the packages the
-    # user has requested.
-    packages = identify_packages(channeldata, packages, args.development)
+    # Remove packages which are already up to date
     packages = filter_packages(packages)
 
     if len(packages) == 0:
-- 
GitLab