diff --git a/.travis.yml b/.travis.yml
index f5da67a9..4861796e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,7 +24,7 @@ before_script:
 - export DISPLAY=:99.0
 - sh -e /etc/init.d/xvfb start
 - sleep 3
-- wget http://marks.hms.harvard.edu/evcouplings_test_cases.tar.gz
+- wget https://marks.hms.harvard.edu/evcouplings/models/data/evcouplings_test_cases.tar.gz
 - tar -xf evcouplings_test_cases.tar.gz -C $HOME/
 script:
 - coverage run -m unittest discover -s test -p "Test*.py"
diff --git a/evcouplings/align/alignment.py b/evcouplings/align/alignment.py
index 6714e6cc..0de50fd5 100644
--- a/evcouplings/align/alignment.py
+++ b/evcouplings/align/alignment.py
@@ -30,6 +30,8 @@
 ALPHABET_RNA_NOGAP = "ACGU"
 ALPHABET_RNA = GAP + ALPHABET_RNA_NOGAP
 
+HMMER_PREFIX_WARNING = "# WARNING: seq names have been made unique by adding a prefix of"
+
 
 def read_fasta(fileobj):
     """
@@ -111,7 +113,7 @@ def write_aln(sequences, fileobj, width=80):
 )
 
 
-def read_stockholm(fileobj, read_annotation=False):
+def read_stockholm(fileobj, read_annotation=False, raise_hmmer_prefixes=True):
     """
     Generator function to read Stockholm format alignment
     file (e.g. from hmmer).
@@ -129,6 +131,10 @@ def read_stockholm(fileobj, read_annotation=False):
         Stockholm alignment file
     read_annotation : bool, optional (default=False)
         Read annotation columns from alignment
+    raise_hmmer_prefixes : bool, optional (default: True)
+        HMMER adds number prefixes to sequence identifiers if
+        identifiers are not unique. If True, the parser will
+        raise an exception if the alignment has such prefixes.
 
     Returns
     -------
@@ -165,6 +171,17 @@ def read_stockholm(fileobj, read_annotation=False):
                 "Header missing. {}".format(line.rstrip())
             )
 
+        if raise_hmmer_prefixes and line.startswith(HMMER_PREFIX_WARNING):
+            raise ValueError(
+                "HMMER added identifier prefixes to alignment because of non-unique "
+                "sequence identifiers. Either some sequence identifier is present "
+                "twice in the sequence database, or your target sequence identifier is "
+                "the same as an identifier in the database. In the first case, please fix "
+                "your sequence database. In the second case, please choose a different "
+                "sequence identifier for your target sequence that does not overlap with "
+                "the sequence database."
+            )
+
         # annotation lines
         if line.startswith("#"):
             if read_annotation:
@@ -584,7 +601,8 @@ def from_dict(cls, sequences, **kwargs):
 
     @classmethod
     def from_file(cls, fileobj, format="fasta",
-                  a3m_inserts="first", **kwargs):
+                  a3m_inserts="first", raise_hmmer_prefixes=True,
+                  **kwargs):
         """
         Construct an alignment object by reading in an
         alignment file.
@@ -598,6 +616,12 @@ def from_file(cls, fileobj, format="fasta",
         a3m_inserts : {"first", "delete"}, optional (default: "first")
             Strategy to deal with inserts in a3m alignment files
             (see read_a3m documentation for details)
+        raise_hmmer_prefixes : bool, optional (default: True)
+            HMMER adds number prefixes to sequence identifiers in Stockholm
+            files if identifiers are not unique. If True, the parser will
+            raise an exception if a Stockholm alignment has such prefixes.
+        **kwargs
+            Additional arguments to be passed to class constructor
 
         Returns
         -------
@@ -618,7 +642,12 @@ def from_file(cls, fileobj, format="fasta",
                 seqs[seq_id] = seq
         elif format == "stockholm":
             # only reads first Stockholm alignment contained in file
-            ali = next(read_stockholm(fileobj, read_annotation=True))
+            ali = next(
+                read_stockholm(
+                    fileobj, read_annotation=True,
+                    raise_hmmer_prefixes=raise_hmmer_prefixes
+                )
+            )
             seqs = ali.seqs
             annotation["GF"] = ali.gf
             annotation["GC"] = ali.gc
diff --git a/evcouplings/align/protocol.py b/evcouplings/align/protocol.py
index 4c4fe9ee..8732b047 100644
--- a/evcouplings/align/protocol.py
+++ b/evcouplings/align/protocol.py
@@ -41,6 +41,40 @@
 )
 
 
+def _verify_sequence_id(sequence_id):
+    """
+    Verify if a target sequence identifier is in proper
+    format for the pipeline to run without errors
+    (not none, and contains no whitespace)
+        
+    Parameters
+    ----------
+    id : str
+        Target sequence identifier to verify
+
+    Raises
+    ------
+    InvalidParameterError
+        If sequence identifier is not valid
+    """
+    if sequence_id is None:
+        raise InvalidParameterError(
+            "Target sequence identifier (sequence_id) must be defined and "
+            "cannot be None/null."
+        )
+
+    try:
+        if len(sequence_id.split()) != 1 or len(sequence_id) != len(sequence_id.strip()):
+            raise InvalidParameterError(
+                "Target sequence identifier (sequence_id) may not contain any "
+                "whitespace (spaces, tabs, ...)"
+            )
+    except AttributeError:
+        raise InvalidParameterError(
+            "Target sequence identifier (sequence_id) must be a string"
+        )
+
+
 def _make_hmmsearch_raw_fasta(alignment_result, prefix):
     """
     HMMsearch results do not contain the query sequence
@@ -79,7 +113,9 @@ def _add_gaps_to_query(query_sequence_ali, ali):
         if len(match_index) != query_sequence_ali.L:
             raise ValueError(
                 "HMMsearch result {} does not have a one-to-one"
-                " mapping to the query sequence columns".format(ar["raw_alignment_file"])
+                " mapping to the query sequence columns".format(
+                    alignment_result["raw_alignment_file"]
+                )
             )
 
         gapped_query_sequence = ""
@@ -663,10 +699,8 @@ def existing(**kwargs):
     # Target sequence of alignment
     sequence_id = kwargs["sequence_id"]
 
-    if sequence_id is None:
-        raise InvalidParameterError(
-            "Parameter sequence_id must be defined"
-        )
+    # check if sequence identifier is valid
+    _verify_sequence_id(sequence_id)
 
     # First, find focus sequence in alignment
     focus_index = None
@@ -997,6 +1031,9 @@ def jackhmmer_search(**kwargs):
     )
     prefix = kwargs["prefix"]
 
+    # check if sequence identifier is valid
+    _verify_sequence_id(kwargs["sequence_id"])
+
     # make sure output directory exists
     create_prefix_folders(prefix)
 
@@ -1258,6 +1295,9 @@ def _format_alignment_for_hmmbuild(input_alignment_file, **kwargs):
     )
     prefix = kwargs["prefix"]
 
+    # check if sequence identifier is valid
+    _verify_sequence_id(kwargs["sequence_id"])
+
     # make sure output directory exists
     create_prefix_folders(prefix)
 
@@ -1342,7 +1382,6 @@ def _format_alignment_for_hmmbuild(input_alignment_file, **kwargs):
         "hittable_file": ali["domtblout"],
     }
 
-
     # convert the raw output alignment to fasta format 
     # and add the appropriate query sequecne
     raw_focus_alignment_file = _make_hmmsearch_raw_fasta(outcfg, prefix)
diff --git a/evcouplings/compare/sifts.py b/evcouplings/compare/sifts.py
index 433c916f..1c11ec23 100644
--- a/evcouplings/compare/sifts.py
+++ b/evcouplings/compare/sifts.py
@@ -14,40 +14,36 @@
 """
 
 from os import path
-import json
 from collections import OrderedDict
 from copy import deepcopy
 
 import pandas as pd
 import requests
-import numpy as np
 
 from evcouplings.align.alignment import (
     Alignment, read_fasta, parse_header
 )
-
 from evcouplings.align.protocol import (
-	jackhmmer_search, hmmbuild_and_search, _make_hmmsearch_raw_fasta
+    jackhmmer_search, hmmbuild_and_search
 )
-
 from evcouplings.align.tools import read_hmmer_domtbl
-from evcouplings.compare.mapping import alignment_index_mapping, map_indices
+from evcouplings.compare.mapping import map_indices
 from evcouplings.utils.system import (
-    get_urllib, ResourceError, valid_file, tempdir
+    get_urllib, ResourceError, valid_file, tempdir, temp
 )
 from evcouplings.utils.config import (
-    parse_config, check_required
+    parse_config, check_required, InvalidParameterError
 )
 from evcouplings.utils.helpers import range_overlap
 
-UNIPROT_MAPPING_URL = "http://www.uniprot.org/mapping/"
-SIFTS_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/csv/pdb_chain_uniprot.csv.gz"
+UNIPROT_MAPPING_URL = "https://www.uniprot.org/mapping/"
+SIFTS_URL = "ftp://ftp.ebi.ac.uk/pub/databases/msd/sifts/flatfiles/csv/uniprot_segments_observed.csv.gz"
 SIFTS_REST_API = "http://www.ebi.ac.uk/pdbe/api/mappings/uniprot_segments/{}"
 
 # TODO: make this default parametrization more explicit (e.g. a config file in repository)
 # these parameters are fed as a default into SIFTS.by_alignment so that the method can be
 # easily used without a configuration file/any further setup
-JACKHMMER_CONFIG = """
+HMMER_CONFIG = """
 prefix:
 sequence_id:
 sequence_file:
@@ -115,12 +111,15 @@ def fetch_uniprot_mapping(ids, from_="ACC", to="ACC", format="fasta"):
     return r.text
 
 
-def find_homologs(**kwargs):
+def find_homologs(pdb_alignment_method="jackhmmer", **kwargs):
     """
     Identify homologs using jackhmmer or hmmbuild/hmmsearch
 
     Parameters
     ----------
+    pdb_alignment_method : {"jackhmmer", "hmmsearch"}, 
+             optional (default: "jackhmmer")
+        Sequence alignment method used for searching the PDB
     **kwargs
         Passed into jackhmmer / hmmbuild_and_search protocol
         (see documentation for available options)
@@ -135,7 +134,7 @@ def find_homologs(**kwargs):
     """
 
     # load default configuration
-    config = parse_config(JACKHMMER_CONFIG)
+    config = parse_config(HMMER_CONFIG)
 
     # update with overrides from kwargs
     config = {
@@ -152,10 +151,10 @@ def find_homologs(**kwargs):
     )
 
     # run hmmsearch (possibly preceded by hmmbuild)
-    if kwargs["pdb_alignment_method"] == "hmmsearch":
+    if pdb_alignment_method == "hmmsearch":
         # set up config to run hmmbuild_and_search on the unfiltered alignment file
         updated_config = deepcopy(config)
-        updated_config["alignment_file"] = config["raw_focus_alignment_file"]
+        updated_config["alignment_file"] = config.get("raw_focus_alignment_file")
         ar = hmmbuild_and_search(**updated_config)
 
         # For hmmbuild and search, we have to read the raw focus alignment file
@@ -166,7 +165,7 @@ def find_homologs(**kwargs):
     # run jackhmmer against sequence database
     # at this point we have already checked to ensure
     # that the input is either jackhmmer or hmmsearch
-    else:
+    elif pdb_alignment_method == "jackhmmer":
         ar = jackhmmer_search(**config)
 
         with open(ar["raw_alignment_file"]) as a:
@@ -176,6 +175,11 @@ def find_homologs(**kwargs):
         # if necessary
         with open(config["prefix"] + "_raw.fasta", "w") as f:
             ali.write(f)
+    else:
+        raise InvalidParameterError(
+            "Invalid pdb_alignment_method selected. Valid options are: " +
+            ", ".join(["jackhmmer", "hmmsearch"])
+        )
 
     # read hmmer hittable and simplify
     hits = read_hmmer_domtbl(ar["hittable_file"])
@@ -268,7 +272,7 @@ def __init__(self, sifts_table_file, sequence_file=None):
         )
 
         # final table has still some entries where lengths do not match,
-        # remove thlse
+        # remove these
         self.table = self.table.query(
             "(resseq_end - resseq_start) == (uniprot_end - uniprot_start)"
         )
@@ -288,7 +292,7 @@ def _create_mapping_table(self, sifts_table_file):
         """
         Create modified SIFTS mapping table (based on
         file at SIFTS_URL). For some of the entries,
-        the Uniprot sequence ranges do not map to a.
+        the Uniprot sequence ranges do not map to a
         SEQRES sequence range of the same length. These
         PDB IDs will be entirely replaced by a segment-
         based mapping extracted from the SIFTS REST API.
@@ -325,12 +329,14 @@ def extract_rows(M, pdb_id):
 
             return res
 
-        get_urllib(SIFTS_URL, sifts_table_file)
+        # download SIFTS table (gzip-compressed csv) to temp file
+        temp_download_file = temp()
+        get_urllib(SIFTS_URL, temp_download_file)
 
         # load table and rename columns for internal use, if SIFTS
         # ever decided to rename theirs
         table = pd.read_csv(
-            sifts_table_file, comment="#",
+            temp_download_file, comment="#",
             compression="gzip"
         ).rename(
             columns={
@@ -346,11 +352,17 @@ def extract_rows(M, pdb_id):
             }
         )
 
+        # TODO: remove the following if new segment-based table proves as robust solution
+        """
+        # this block disabled for now due to use of new table
+        # based on observed UniProt segments
+        # - can probably be removed eventually
+
         # identify problematic PDB IDs
         problematic_ids = table.query(
             "(resseq_end - resseq_start) != (uniprot_end - uniprot_start)"
         ).pdb_id.unique()
-
+        
         # collect new mappings from segment based REST API
         res = []
         for i, pdb_id in enumerate(problematic_ids):
@@ -363,12 +375,17 @@ def extract_rows(M, pdb_id):
 
         # remove bad PDB IDs from table and add new mapping
         new_table = table.loc[~table.pdb_id.isin(problematic_ids)]
+
+        # also disabled due to use of new table based on observed
+        # UniProt segments - can probably be removed eventually 
+        
         new_table = new_table.append(
             pd.DataFrame(res).loc[:, table.columns]
         )
+        """
 
         # save for later reuse
-        new_table.to_csv(sifts_table_file, index=False)
+        table.to_csv(sifts_table_file, index=False)
 
     def _add_uniprot_ids(self):
         """
@@ -386,7 +403,7 @@ def _add_uniprot_ids(self):
         # add column to dataframe
         self.table.loc[:, "uniprot_id"] = self.table.loc[:, "uniprot_ac"].map(ac_to_id)
 
-    def create_sequence_file(self, output_file):
+    def create_sequence_file(self, output_file, chunk_size=1000, max_retries=100):
         """
         Create FASTA sequence file containing all UniProt
         sequences of proteins in SIFTS. This file is required
@@ -399,21 +416,68 @@ def create_sequence_file(self, output_file):
         ----------
         output_file : str
             Path at which to store sequence file
+        chunk_size : int, optional (default: 1000)
+            Retrieve sequences from UniProt in chunks of this size
+            (too large chunks cause the mapping service to stall)
+        max_retries : int, optional (default: 100)
+            Allow this many retries when fetching sequences
+            from UniProt ID mapping service, which unfortunately
+            often suffers from connection failures.
         """
         ids = self.table.uniprot_ac.unique().tolist()
 
-        CHUNK_SIZE = 1000
-        chunks = [
-            ids[i:i + CHUNK_SIZE] for i in range(0, len(ids), CHUNK_SIZE)
+        # retrieve sequences in chunks since ID mapping service
+        # tends to fail on large requests
+        id_chunks = [
+            ids[i:i + chunk_size] for i in range(0, len(ids), chunk_size)
         ]
 
-        with open(output_file, "w") as f:
-            for ch in chunks:
-                # fetch sequence chunk
-                seqs = fetch_uniprot_mapping(ch)
+        # store individual retrieved chunks as list of strings
+        seq_chunks = []
+
+        # keep track of how many retries were necessary and
+        # abort if number exceeds max_retries
+        num_retries = 0
+
+        for ch in id_chunks:
+            # fetch sequence chunk;
+            # if there is a problem retry as long as we stay within
+            # maximum number of retries
+            while True:
+                try:
+                    seqs = fetch_uniprot_mapping(ch)
+                    break
+                except requests.ConnectionError as e:
+                    # count as failed try
+                    num_retries += 1
+
+                    # if we retried too often, abort
+                    if num_retries > max_retries:
+                        raise ResourceError(
+                            "Could not fetch sequences for SIFTS mapping tables from UniProt since "
+                            "maximum number of retries after connection errors was exceeded. Retry "
+                            "at a later time, or call SIFTS.create_sequence_file() with a higher value "
+                            "for max_retries."
+                        ) from e
+
+            # rename identifiers in sequence file, so
+            # we can circumvent Uniprot sequence identifiers
+            # being prefixed by hmmer if a hit has exactly the
+            # same identifier as the query sequence
+            seqs = seqs.replace(
+                ">sp|", ">evsp|",
+            ).replace(
+                ">tr|", ">evtr|",
+            )
 
-                # then store to FASTA file
-                f.write(seqs)
+            assert seqs.endswith("\n")
+
+            # store for writing
+            seq_chunks.append(seqs)
+
+        # store sequences to FASTA file in one go at the end
+        with open(output_file, "w") as f:
+            f.write("".join(seq_chunks))
 
         self.sequence_file = output_file
 
@@ -608,8 +672,6 @@ def by_alignment(self, min_overlap=20, reduce_chains=False, **kwargs):
         Find structures by sequence alignment between
         query sequence and sequences in PDB.
 
-        # TODO: offer option to start from HMM profile for this
-
         Parameters
         ----------
         min_overlap : int, optional (default: 20)
@@ -621,11 +683,29 @@ def by_alignment(self, min_overlap=20, reduce_chains=False, **kwargs):
             protein in PDB structures). Should be set to
             False to identify homomultimeric contacts.
         **kwargs
-            Passed into jackhmmer_search protocol
-            (see documentation for available options).
-            Additionally, if "prefix" is given, individual
-            mappings will be saved to files suffixed by
-            the respective key in mapping table.
+            Defines the behaviour of find_homologs() function
+            used to find homologs by sequence alignment:
+            - which alignment method is used 
+              (pdb_alignment_method: {"jackhmmer", "hmmsearch"}, 
+              default: "jackhmmer"),
+            - parameters passed into the protocol for the selected
+              alignment method (evcouplings.align.jackhmmer_search or
+              evcouplings.align.hmmbuild_and_search).
+              
+              Default parameters are set in the HMMER_CONFIG string in this
+              module, other parameters will need to be overriden; these
+              minimally are:
+              - for pdb_alignment_method == "jackhmmer":
+                - sequence_id : str, identifier of target sequence
+                - jackhmmer : str, path to jackhmmer binary if not on path                
+              - for pdb_alignment_method == "hmmsearch":
+                - sequence_id : str, identifier of target sequence
+                - raw_focus_alignment_file : str, path to input alignment file  
+                - hmmbuild : str, path to hmmbuild binary if not on path
+                - hmmsearch : str, path to search binary if not on path
+            - additionally, if "prefix" is given,
+              individual mappings will be saved to files suffixed
+              by the respective key in mapping table.
 
         Returns
         -------
diff --git a/evcouplings/couplings/protocol.py b/evcouplings/couplings/protocol.py
index e453bd20..cf7ee323 100644
--- a/evcouplings/couplings/protocol.py
+++ b/evcouplings/couplings/protocol.py
@@ -234,7 +234,7 @@ def infer_plmc(**kwargs):
     # store useful information about model in outcfg
     outcfg.update({
         "num_sites": plmc_result["num_valid_sites"],
-        "num_sequences": plmc_result["num_valid_seqs"],
+        "num_valid_sequences": plmc_result["num_valid_seqs"],
         "effective_sequences": plmc_result["effective_samples"],
         "region_start": plmc_result["region_start"],
     })
@@ -607,7 +607,7 @@ def mean_field(**kwargs):
     # store useful information about model in outcfg
     outcfg.update({
         "num_sites": model.L,
-        "num_sequences": model.N_valid,
+        "num_valid_sequences": model.N_valid,
         "effective_sequences": float(round(model.N_eff, 1)),
         "region_start": int(model.index_list[0]),
     })
diff --git a/evcouplings/visualize/pairs.py b/evcouplings/visualize/pairs.py
index dfadb321..4a9f27db 100644
--- a/evcouplings/visualize/pairs.py
+++ b/evcouplings/visualize/pairs.py
@@ -1204,6 +1204,10 @@ def enrichment_pymol_script(enrichment_table, output_file,
 
         # medium
         t.loc[t.iloc[boundary1:boundary2].index, "color"] = "orange"
+
+       # set the boundary for number of residues to be rendered as spheres
+        sphere_boundary = boundary2
+
         
     else:
         t = deepcopy(enrichment_table)
@@ -1248,8 +1252,11 @@ def enrichment_pymol_script(enrichment_table, output_file,
             t.loc[t.iloc[prior_boundary:boundary].index, "color"] = 'color{}'.format(idx)
             prior_boundary = boundary
 
+        # set the boundary for number of residues to be rendered as spheres
+        sphere_boundary = boundary_list[1]
+
     if sphere_view:
-        t.loc[t.iloc[0:boundary].index, "show"] = "spheres"
+        t.loc[t.iloc[0:sphere_boundary].index, "show"] = "spheres"
 
     if chain is not None:
         chain_sel = ", chain '{}'".format(chain)
diff --git a/setup.py b/setup.py
index ca44e7be..1d5b1964 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
     name='evcouplings',
 
     # Version:
-    version='0.0.4',
+    version='0.0.5',
 
     description='A Framework for evolutionary couplings analysis',
     long_description=readme,