Source code for academic_tracker.user_input_checking

# -*- coding: utf-8 -*-
"""
User Input Checking
~~~~~~~~~~~~~~~~~~~

Functions that check the user input for errors.
"""


import sys
import re
import copy

import jsonschema

from . import tracker_schema
from . import helper_functions






[docs]
def tracker_validate(instance, schema, pattern_messages={}, cls=None, *args, **kwargs):
    """Wrapper around jsonchema.validate to give better error messages.
    
    Args:
        instance (dict): JSON as a dict to validate
        schema (dict): JSON schema as a dict to validate instance against
        pattern_messages (dict): if the instance has a ValidationError of the pattern type then look up the attribute that failed the pattern in this dict and see if there is a custom message
        
    Raises:
        jsonshcema.ValidationError: If an unexpected jsonschema error happens this is raised rather than a system exit.
    """
    

    try:
        jsonschema.validate(instance=instance, schema=schema, cls=cls, *args, **kwargs)
    except jsonschema.ValidationError as e:
        ## code to easily see the contents of the error for building a better message.
#        for key, value in e._contents().items():
#            print(key, value)
#            print()
        
        message = "ValidationError: An error was found in the " + schema["title"] + ". \n"
        custom_message = ""
        
        if e.validator == "minProperties":
            message += "The " + schema["title"] + " cannot be empty."
        elif e.validator == "required":
            required_property = re.match(r"(\'.*\')", e.message).group(1)
            if len(e.relative_path) == 0:
                message += "The required property " + required_property + " is missing."
            else:
                message += "The entry " + "[%s]" % "][".join(repr(index) for index in e.relative_path) + " is missing the required property " + required_property + "."
        ## In an older version of JSON Schema the keyword was "dependencies" instead of "dependentRequired".
        elif e.validator == "dependencies":
            message += "The entry " + "[%s]" % "][".join(repr(index) for index in e.relative_path) + " is missing a dependent property.\n"
            message += e.message
        elif e.validator == "dependentRequired":
            message += "The entry " + "[%s]" % "][".join(repr(index) for index in e.relative_path) + " is missing a dependent property.\n"
            message += e.message
        elif e.validator == "minLength":
            custom_message = " cannot be an empty string."
        elif e.validator == "maxLength":
            custom_message = " is too long."
        elif e.validator == "minItems":
            custom_message = " cannot be empty."
        elif e.validator == "type":
            if type(e.validator_value) == list:
                custom_message = " is not any of the allowed types: ["
                for allowed_type in e.validator_value:
                    custom_message += "\'" + allowed_type + "\', "
                custom_message = custom_message[:-2]
                custom_message += "]."
            else:
                custom_message = " is not of type \"" + e.validator_value + "\"."
        elif e.validator == "enum":
            custom_message = " is not one of [" + "%s" % ", ".join(repr(index) for index in e.validator_value) + "]"
        elif e.validator == "format":
            custom_message = " is not a valid " + e.validator_value + "."
        elif e.validator == "pattern" and e.relative_path[-1] in pattern_messages:
            custom_message = pattern_messages[e.relative_path[-1]]
        elif e.validator == "minimum":
            custom_message = " must be greater than or equal to " + str(e.validator_value)
        elif e.validator == "maximum":
            custom_message = " must be less than or equal to " + str(e.validator_value)
        else:
            raise e
        
        
        if custom_message:
            message = message + "The value for " + "[%s]" % "][".join(repr(index) for index in e.relative_path) + custom_message
        print(message)
        sys.exit()






[docs]
def cli_inputs_check(args):
    """Run input checking on the CLI inputs.
    
    Uses jsonschema to validate the inputs.
            
    Args:
        args (dict): dict from docopt.
    """
    
#    list_args = ["--grants", "--affiliations", "--cc_email"]
#    
#    for arg in list_args:
#        if args[arg]:
#            args[arg] = args[arg].split(",")
#            
#    int_args = ["--cutoff_year"]
#    
#    for arg in int_args:
#        if args[arg]:
#            try:
#                args[arg] = int(args[arg])
#            except:
#                pass
    
    tracker_validate(instance=args, schema=tracker_schema.cli_schema, format_checker=jsonschema.FormatChecker())









[docs]
def config_file_check(config_json, no_ORCID, no_GoogleScholar, no_Crossref, no_PubMed):
    """Check that the configuration JSON file is as expected.
    
    The validational jsonschema is in the tracker_schema module. 
    
    Args:
        config_json (dict): dict with the same structure as the configuration JSON file.
        no_ORCID (bool): if True delete the part of the schema that checks ORCID attributes.
        no_GoogleScholar (bool): if True and no_Crossref is True delete the part of the schema that checks Crossref attributes.
        no_Crossref (bool): if True and no_GoogleScholar is True delete the part of the schema that checks Crossref attributes.
        no_PubMed (bool): if True delete the part of the schema that checks PubMed attributes.
    """
    
    schema = copy.deepcopy(tracker_schema.config_schema)
    if no_ORCID:
        del schema["properties"]["ORCID_search"]
        schema["required"].remove("ORCID_search")
    if no_Crossref and no_GoogleScholar:
        del schema["properties"]["Crossref_search"]
        schema["required"].remove("Crossref_search")
    if no_PubMed:
        del schema["properties"]["PubMed_search"]
        schema["required"].remove("PubMed_search")
    
    pattern_messages = {"ORCID":" is not a valid ORCID. It must match the regex \\d{4}-\\d{4}-\\d{4}-\\d{3}[0,1,2,3,4,5,6,7,8,9,X]"}
    tracker_validate(instance=config_json, schema=schema, pattern_messages=pattern_messages, format_checker=jsonschema.FormatChecker())



            


[docs]
def ref_config_file_check(config_json, no_Crossref, no_PubMed):
    """Check that the configuration JSON file is as expected.
    
    The validational jsonschema is in the tracker_schema module.    
    
    Args:
        config_json (dict): dict with a truncated structure of the configuration JSON file.
        no_Crossref (bool): if True delete the part of the schema that checks Crossref attributes.
        no_PubMed (bool): if True delete the part of the schema that checks PubMed attributes.
    """
    
    schema = copy.deepcopy(tracker_schema.ref_config_schema)
    if no_Crossref:
        del schema["properties"]["Crossref_search"]
        schema["required"].remove("Crossref_search")
    if no_PubMed:
        del schema["properties"]["PubMed_search"]
        schema["required"].remove("PubMed_search")
    
    tracker_validate(instance=config_json, schema=schema, format_checker=jsonschema.FormatChecker())    





[docs]
def config_report_check(config_json):
    """Check that the report attributes don't have conflicts.
    
    Make sure that the values in sort and column_order are in columns, 
    and that every column is in column_order.
    
    Args:
        config_json (dict): dict with the same structure as the configuration JSON file.
    """
    ## Make sure sort and column_order only have values in columns for any report.
    attributes_to_check = ["sort", "column_order"]
    for attribute in attributes_to_check:
        if "summary_report" in config_json and "columns" in config_json["summary_report"] and attribute in config_json["summary_report"]:
            names_not_in_columns = [name for name in config_json["summary_report"][attribute] if not name in config_json["summary_report"]["columns"]]
            if names_not_in_columns:
                helper_functions.vprint("ValidationError: The \"" + attribute + "\" attribute for the summary_report has values that are not column names in \"columns\".")
                helper_functions.vprint("The following names in \"" + attribute + "\" could not be matched to a column in \"columns\":\n\n" + "\n".join(names_not_in_columns))
                sys.exit()   
                
            if attribute == "column_order":
                if len(config_json["summary_report"]["column_order"]) != len(config_json["summary_report"]["columns"]):
                    helper_functions.vprint("ValidationError: The \"column_order\" attribute for the summary_report does not have all of the column names in \"columns\". Every column in \"columns\" must be in \"column_order\".")
                    sys.exit() 
            
    
    if "project_descriptions" in config_json:        
        report_keys = ["collaborator_report", "project_report"]
        for project, project_attributes in config_json["project_descriptions"].items():
            for report_key in report_keys:
                for attribute in attributes_to_check:
                    if report_key in project_attributes and "columns" in project_attributes[report_key] and attribute in project_attributes[report_key]:
                        names_not_in_columns = [name for name in project_attributes[report_key][attribute] if not name in project_attributes[report_key]["columns"]]
                        if names_not_in_columns:
                            helper_functions.vprint("ValidationError: The \"" + attribute + "\" attribute for the " + report_key + " in project " + project + " has values that are not column names in \"columns\".")
                            helper_functions.vprint("The following names in \"" + attribute + "\" could not be matched to a column in \"columns\":\n\n" + "\n".join(names_not_in_columns))
                            sys.exit()
                            
                        if attribute == "column_order":
                            if len(project_attributes[report_key]["column_order"]) != len(project_attributes[report_key]["columns"]):
                                helper_functions.vprint("ValidationError: The \"column_order\" attribute for the " + report_key + " in project " + project + " does not have all of the column names in \"columns\". Every column in \"columns\" must be in \"column_order\".")
                                sys.exit() 
                                    
    
    if "Authors" in config_json:
        for author, author_attributes in config_json["Authors"].items():
            for report_key in report_keys:
                for attribute in attributes_to_check:
                    if report_key in author_attributes and "columns" in author_attributes[report_key] and attribute in author_attributes[report_key]:
                        names_not_in_columns = [name for name in author_attributes[report_key][attribute] if not name in author_attributes[report_key]["columns"]]
                        if names_not_in_columns:
                            helper_functions.vprint("ValidationError: The \"" + attribute + "\" attribute for the " + report_key + " for author " + author + " has values that are not column names in \"columns\".")
                            helper_functions.vprint("The following names in \"" + attribute + "\" could not be matched to a column in \"columns\":\n\n" + "\n".join(names_not_in_columns))
                            sys.exit()
                            
                        if attribute == "column_order":
                            if len(author_attributes[report_key]["column_order"]) != len(author_attributes[report_key]["columns"]):
                                helper_functions.vprint("ValidationError: The \"column_order\" attribute for the " + report_key + " for author " + author + " does not have all of the column names in \"columns\". Every column in \"columns\" must be in \"column_order\".")
                                sys.exit()     





[docs]
def prev_pubs_file_check(prev_pubs):
    """Run input checking on prev_pubs dict.
    
    The validational jsonschema is in the tracker_schema module. 
    
    Args:
        prev_pubs (dict): dict with the same structure as the previous publications JSON file.
    """
    
    tracker_validate(instance=prev_pubs, schema=tracker_schema.publications_schema, format_checker=jsonschema.FormatChecker())

    



[docs]
def tok_reference_check(tok_ref):
    """Run input checking on tok_ref dict.
    
    The validational jsonschema is in the tracker_schema module. 
            
    Args:
        tok_ref (dict): dict with the same structure as the tokenized reference JSON file.
    """
    
    tracker_validate(instance=tok_ref, schema=tracker_schema.tok_schema, format_checker=jsonschema.FormatChecker())