package util; import java.io.*; import java.net.*; import java.nio.*; import java.nio.charset.Charset; import java.util.*; import java.text.*; import java.util.regex.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /* PermaLinkLccnValidatorUtil perma01 = new PermaLinkLccnValidatorUtil( lccn, authLccnPrefixList, bibLccnPrefixList, masterLccnPrefixList ); loggerStatic.info("lccnRawMfer:" + perma01.getLccnRawMfer() + ":::"); loggerStatic.info("lccnPadded:" + perma01.getLccnPadded() + ":::"); loggerStatic.info("lccnNoWhiteSpace:" + perma01.getLccnNoWhiteSpace() + ":::"); loggerStatic.info("RECORD TYPE:" + perma01.getRecordType() + ":::"); loggerStatic.info("VALIDATE RESULT:" + perma01.getValidateResult() + ":::"); loggerStatic.info("TRACE MESSAGE:" + perma01.getTraceMessage() + ":::"); */ class PermaLinkLccnValidatorUtil implements java.io.Serializable { private String userDirString = System.getProperty("user.dir"); private String lineEnd = System.getProperty("line.separator"); private String fSep = System.getProperty("file.separator"); private File userDirFile = new File(userDirString); private File propertiesFile = new File(userDirFile, "config/configuration.xml"); private Date dt = new Date(); private SimpleDateFormat sdf = new SimpleDateFormat("EEE, d MMMMM yyyy HH:mm:ss a z"); private SimpleDateFormat sdfShort = new SimpleDateFormat("d_MMMMM_yyyy_HH.mm.ss_a"); private SimpleDateFormat sdfdd = new SimpleDateFormat("yyyyMMdd"); private SimpleDateFormat sdfRecordCreateDate = new SimpleDateFormat("MM-dd-yyyy");//mm-dd-yyyy // private LinkedList authLccnPrefixList = new LinkedList(); private LinkedList bibLccnPrefixList = new LinkedList(); private LinkedList masterLccnPrefixList = new LinkedList(); // private static final Log loggerStatic = LogFactory.getLog(PermaLinkLccnValidatorUtil.class); private Log logger = LogFactory.getLog(PermaLinkLccnValidatorUtil.class); // // private String baseURL_LX2 = "http://lx2.loc.gov:210/"; // voyager search : marcxml private String voyagerSearch = "lcdb"; // name authority records : marcxml private String nameAuthority = "NAF"; // subject authority records : marcxml private String subjectAuthority = "SAF"; // /* */ private String lccnNoWhiteSpace = null; private String lccnRawMfer = null; private String lccnCleanedMfer = null; private String lccnPadded = null; private String serialMfer = null; private Integer serialMferLen = null; private String gatewayVarGeneratorRecordTypeString = null; // private String lccn = null; private String recordType = null; private Boolean validateResult = true; private String traceMessage = null; // public PermaLinkLccnValidatorUtil( String lccnRawMfer, List authLccnPrefixList, List bibLccnPrefixList, List masterLccnPrefixList ) { try { this.authLccnPrefixList = new LinkedList(authLccnPrefixList); this.bibLccnPrefixList = new LinkedList(authLccnPrefixList); this.masterLccnPrefixList = new LinkedList(masterLccnPrefixList); // this.lccnRawMfer = URLDecoder.decode(lccnRawMfer, "UTF-8"); this.lccnCleanedMfer = cleanLccnParam(lccnRawMfer); this.serialMfer = (lccnCleanedMfer.replaceAll("[^\\d+]", "")).length()==0 ? new String("") : new String(lccnCleanedMfer.replaceAll("[^\\d+]", "")); this.serialMferLen = new Integer(serialMfer.length()); // doMain(); } catch (Exception e) { e.printStackTrace(); } } /* Java Tenary Operator name = ((city == null) || (city.getName() == null) ? "N/A" : city.getName()); */ /* */ /* if bib return a lccn no spaces for Voyager */ @SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { } @SuppressWarnings("unchecked") public void doMain() throws Exception { // if(authLccnPrefixList.size()!=0) { logger.info("authLccnPrefixList.size():" + authLccnPrefixList.size() + ":::"); } else { logger.error("========== ERROR: authLccnPrefixList IS EMPTY =========="); throw new Exception("========== ERROR: authLccnPrefixList IS EMPTY =========="); } if(bibLccnPrefixList.size()!=0) { logger.info("bibLccnPrefixList.size():" + bibLccnPrefixList.size() + ":::"); } else { logger.error("========== ERROR: bibLccnPrefixList IS EMPTY =========="); throw new Exception("========== ERROR: bibLccnPrefixList IS EMPTY =========="); } if(masterLccnPrefixList.size()!=0) { logger.info("masterLccnPrefixList.size():" + masterLccnPrefixList.size() + ":::"); } else { logger.error("========== ERROR: bibLccnPrefixList IS EMPTY =========="); throw new Exception("========== ERROR: bibLccnPrefixList IS EMPTY =========="); } // Collections.sort(authLccnPrefixList); Collections.sort(bibLccnPrefixList); Collections.sort(masterLccnPrefixList); // String result = generateLccnCinclude(); // /* SET VARIABLES */ if(lccnPadded == null || lccnPadded.length()==0) { setLccnPadded("NULL"); } //setLccn(lccn); setLccnNoWhiteSpace(lccn.replaceAll("\\s+", "")); // if(result.contains("ERROR")) { setValidateResult(false); setRecordType("ERROR"); } else { setRecordType(gatewayVarGeneratorRecordTypeString); } // if(validateResult) { setTraceMessage("Valid LCCN" + "\n" + result); } else { setTraceMessage(result); } } // public void setLccnNoWhiteSpace(String lccnNoWhiteSpace) { this.lccnNoWhiteSpace = lccnNoWhiteSpace; } public String getLccnNoWhiteSpace() { return lccnNoWhiteSpace; } public void setLccnPadded(String lccnPadded) { this.lccnPadded = lccnPadded; } public String getLccnPadded() { return lccnPadded; } public void setLccnRawMfer(String lccnRawMfer) { this.lccnRawMfer = lccnRawMfer; } public String getLccnRawMfer() { return lccnRawMfer; } /* public void setLccn(String lccn) { this.lccn = lccn; } public String getLccn() { return lccn; } */ public void setValidateResult(Boolean test) { this.validateResult = test; } public boolean getValidateResult() { return validateResult; } public void setRecordType(String recordType) { this.recordType = recordType; } public String getRecordType() { return recordType; } public void setTraceMessage(String traceMessage) { this.traceMessage = traceMessage; } public String getTraceMessage() { return traceMessage; } /* When variable returns only the string "NULL" then the lccn is clean. use the lccnRawMfer string // Where XSLT return is string "NULL" return lccnRawMfer // Invalid LCCN */ //this creates lccnCleanedMfer variable public String cleanLccnParam(String lccn) { String result = null; if(lccn!=null & ((lccn.replaceAll("[a-z0-9-]", "")).replaceAll("\\s+", "")).length()!=0) { try{ lccnRawMfer = java.net.URLDecoder.decode(lccn, "UTF-8"); } catch(UnsupportedEncodingException e) { e.printStackTrace(); } result = lccnRawMfer; if(lccnRawMfer.contains("\\") | lccnRawMfer.contains("-")) { if(lccnRawMfer.contains("\\")) { result = lccnRawMfer.substring(0, lccnRawMfer.indexOf("\\")); } if(lccnRawMfer.contains("-")) { String tmp = lccnRawMfer.toLowerCase(); tmp = tmp.substring(tmp.indexOf("-")+1); if((tmp.replaceAll("[\\d+]", "")).length()==0) { result = new String(lccnRawMfer + "-" + String.format("%06d", new Integer(tmp))); } else { result = lccnRawMfer; } } } else { result = lccnRawMfer.toLowerCase(); } } else { result = lccnRawMfer; } return result; } // public String generateLccnCinclude() throws Exception { String result = null; StringBuffer sb = new StringBuffer(); logger.info("serialMfer:" + serialMfer + ":::"); logger.info("serialMferLen:" + serialMferLen + ":::"); logger.info("lccnCleanedMfer:" + lccnCleanedMfer + ":::"); logger.info("lccnCleanedMferLen:" + lccnCleanedMfer.length() + ":::"); //CASE 01 // if(serialMferLen == 8 & lccnCleanedMfer.length() == 8) { lccn = lccnCleanedMfer; gatewayVarGeneratorRecordTypeString = "BIBLIOGRAPHIC"; sb.append("testedLCCN: " + lccnCleanedMfer + "\n"); sb.append("verifiedLCCN: " + lccnCleanedMfer + "\n"); sb.append("mferRecordType: BIBLIOGRAPHIC \n"); result = lccnCleanedMfer; //return lccnCleanedMfer //CASE 02 // } else if(serialMferLen == 10 & lccnCleanedMfer.length() == 10) { lccn = lccnCleanedMfer; gatewayVarGeneratorRecordTypeString = "BIBLIOGRAPHIC"; sb.append("testedLCCN: " + lccnCleanedMfer + "\n"); sb.append("verifiedLCCN: " + lccnCleanedMfer + "\n"); sb.append("mferRecordType: BIBLIOGRAPHIC \n"); result = lccnCleanedMfer; //return lccnCleanedMfer //OTHER CASES // } else { /* if(testLccnPrefixSyntaxResult & lccnPrefixCheck) { // === BEGIN: [2] CHOOSE.WHEN CHECK : (PREFIX SYNTAX)=true() | (PREFIX LITERAL STRING)=true() === --> lccnPadded = lccnPaddedGenerator(prefixMfer, prefixMferLen, serialMfer, serialMferLen); sb.append("lccnPadded:" + lccnPadded + ":::" + "\n"); /**/ lccn = lccnCleanedMfer; sb.append("testedLCCN: " + lccnCleanedMfer + "\n"); sb.append("verifiedLCCN: " + lccnCleanedMfer + "\n"); // String gateway = gatewayVarGenerator(prefixMfer, lccnPadded); sb.append("mferRecordType:" + gatewayVarGeneratorRecordTypeString + "\n"); sb.append("gateway: " + gateway + ":::" + "\n"); // === BEGIN: [2] CHOOSE.OTHERWISE CHECK : (PREFIX SYNTAX)=false() | (PREFIX LITERAL STRING)=false() === --> } else { // // CANNOT FIND PREFIX IN LISTS // ERROR REPORTED : DOES NOT VALIDATE // lccn = lccnRawMfer; sb.append("testedLCCN: " + lccnRawMfer + "\n"); sb.append("ERROR: Invalid LCCN" + "\n"); // if(!testLccnPrefixSyntaxResult) { sb.append("ERROR: testLccnprefixSyntaxResult == NULL" + "\n"); } // if(!masterLccnPrefixList.contains(prefixMfer)) { sb.append("ERROR: prefix not found : " + ((prefixMfer == null) | (prefixMfer.length()==0) ? "NULL" : prefixMfer) + ":::"); } } } return sb.toString(); } // creates gateway variable String public String gatewayVarGenerator(String prefixMfer, String lccnPadded) throws Exception { String result = null; // if(prefixMfer.equals("no")) { /**/ // if(serialMfer.startsWith("3") | serialMfer.startsWith("40")) { // gatewayVarGeneratorRecordTypeString = "BIBLIOGRAPHIC"; result = new String( baseURL_LX2 + voyagerSearch + "?operation=searchRetrieve&version=1.1&query=bath.lccn=%22^" + lccnPadded + "%22&recordSchema=marcxml&recordPacking=xml&startRecord=1&maximumRecords=10" ); // } else if(!serialMfer.startsWith("3") & !serialMfer.startsWith("40")) { gatewayVarGeneratorRecordTypeString = "AUTHORITY"; result = new String( baseURL_LX2 + nameAuthority + "?operation=searchRetrieve&version=1.1&query=bath.lccn=%22" + lccnPadded + "%22&recordSchema=marcxml&recordPacking=xml&startRecord=1&maximumRecords=10" ); } // } else { // if(authLccnPrefixList.contains(prefixMfer)) { // if(prefixMfer.startsWith("n") | prefixMfer.equals("tn")) { gatewayVarGeneratorRecordTypeString = "AUTHORITY"; result = new String( baseURL_LX2 + nameAuthority + "?operation=searchRetrieve&version=1.1&query=bath.lccn=%22" + lccnPadded + "%22&recordSchema=marcxml&recordPacking=xml&startRecord=1&maximumRecords=10" ); } else if(prefixMfer.startsWith("s") | prefixMfer.startsWith("g") | prefixMfer.startsWith("ts")) { gatewayVarGeneratorRecordTypeString = "AUTHORITY"; result = new String( baseURL_LX2 + subjectAuthority + "?operation=searchRetrieve&version=1.1&query=bath.lccn=%22" + lccnPadded + "%22&recordSchema=marcxml&recordPacking=xml&startRecord=1&maximumRecords=10" ); } } else if(bibLccnPrefixList.contains(prefixMfer)) { // // gatewayVarGeneratorRecordTypeString = "BIBLIOGRAPHIC"; result = new String( baseURL_LX2 + voyagerSearch + "?operation=searchRetrieve&version=1.1&query=bath.lccn=%22^" + lccnPadded + "%22&recordSchema=marcxml&recordPacking=xml&startRecord=1&maximumRecords=10" ); } } return result; } /* */ // $perfixRawMfer : strip all digits : leave a-z and spaces public String processPrefixRawMfer() throws Exception { String result = null; if((lccnCleanedMfer.replaceAll("[\\d+]", "")).length()==0) { result = ""; } else { if(lccnCleanedMfer.contains(" ")) { result = lccnCleanedMfer.substring(0, lccnCleanedMfer.indexOf(" ")+1); } else { result = lccnCleanedMfer.replaceAll("[^a-z]", ""); } } return result; } // /* LCDB: 8 digit 1|2 char prefix = add a space 3 char prefix != add a space 10 digit 1 char prefix = add a space 2 char prefix != add a space */ //creates lccnPadded variable string public String lccnPaddedGenerator(String prefixMfer, Integer prefixMferLen, String serialMfer, Integer serialMferLen) throws Exception { String result = null; if(serialMferLen == 8 & prefixMferLen ==1) { result = new String(prefixMfer + "%20" + serialMfer); } else if(serialMferLen == 8 & prefixMferLen == 2) { result = new String(prefixMfer + "%20" + serialMfer); } else if(serialMferLen == 8 & prefixMferLen ==3 ) { result = prefixMfer.concat(serialMfer); } else if(serialMferLen == 10 & prefixMferLen ==1) { result = new String(prefixMfer + "%20" + serialMfer); } else if(serialMferLen == 10 & prefixMferLen ==2 ) { result = prefixMfer.concat(serialMfer); } return result; } /* First character WHITESPACE is trimmed from LCCN BEFORE it gets to this STYLESHEET If total length is 8 == all numbers ======PREFIX: NO (bib auth) ====== BIB : 1930s == NO == observatory starts-with( 30-39,40 ... AUTH : 1980s == NO == authority record starts-with(50s) NOT starts-with (3 or 4) ==== SERIAL=8 ================= LCCNs assigned before 1/1/2001 have 3 character prefixes + 8 character serial numbers + trailing space in bibs, the prefixes could be 3 blanks, 1 alpha and 2 blanks, 2 alphas and 1 blank, or 3 alphas in auths, the prefixes could be 1 alpha and 2 blanks, 2 alphas and 1 blank [or 3 alphas if there's a tes prefix, which we're ignoring or changing to 2 characters] ==== SERIAL=10 ================ LCCNs assigned after 1/1/2001 have 2 character prefixes and 10 character serial numbers in bibs, the prefixes could be 2 blanks, 1 alpha and 1 blanks, or 2 alphas in auths, the prefixes could be 1 alpha and 1 blank, and 2 alphas */ //creates testLccnPrefixSyntaxResult variable boolean public boolean testLccnPrefixSyntax(String prefixRawMfer, Integer prefixRawMferLen, String serialMfer, Integer serialMferLen) throws Exception { boolean result = false; // len = 8 = all numbers if(serialMferLen == 8 & prefixRawMferLen == 0) { // len = 8 != all numbers } else if(serialMferLen == 8 & prefixRawMferLen !=0) { if(prefixRawMferLen == 1 & prefixRawMfer.matches("[a-z]")) { result = true; } else if(prefixRawMferLen == 2 & prefixRawMfer.matches("[a-z][\\s]|[a-z][a-z]")) { result = true; } else if(prefixRawMferLen == 3 & prefixRawMfer.matches("[a-z][\\s][\\s]|[a-z][a-z][\\s]|[a-z][a-z][a-z]")) { result = true; } //len = 10 = all numbers } else if(serialMferLen == 10 & prefixRawMferLen == 10) { // len = 10 != all numbers } else if(serialMferLen == 10 & prefixRawMferLen != 0) { if(prefixRawMferLen == 1 & prefixRawMfer.matches("[a-z]")) { result = true; } else if(prefixRawMferLen == 2 & prefixRawMfer.matches("[a-z][\\s]|[a-z][a-z]")) { result = true; } } return result; } }