2016-01-18

Euler-98

By replacing each of the letters in the word CARE with 1, 2, 9, and 6 respectively, we form a square number: 1296 = 36^2. What is remarkable is that, by using the same digital substitutions, the anagram, RACE, also forms a square number: 9216 = 96^2. We shall call CARE (and RACE) a square anagram word pair and specify further that leading zeroes are not permitted, neither may a different letter have the same digital value as another letter.

Using words.txt, a 16K text file containing nearly two-thousand common English words, find all the square anagram word pairs (a palindromic word is NOT considered to be an anagram of itself).

What is the largest square number formed by any member of such a pair?

NOTE: All anagrams formed must be contained in the given text file.


rm(list=ls(all=TRUE))
#I want all large integers manipulated without scientific notation
options( scipen = 20 ) ##don't use scientific notation
options(digits=22)

words <- scan( file= paste(getwd(),"p098_words.txt", sep="/"), what="list", sep=",",skip=0, quote="\"")


> words <- scan( file= paste(getwd(),"p098_words.txt", sep="/"), what="list", sep=",",skip=0, quote="\"")
Read 1786 items
> 
> counts <- nchar(words)
> max.counts <- max(counts)
> max.counts
[1] 14
> 
> min.counts <- min(counts)
> min.counts
[1] 1
> 
> words.len <- length(words)
> 
> d <- data.frame(counts, words)
> head(d)
  counts   words
1      1       A
2      7 ABILITY
3      4    ABLE
4      5   ABOUT
5      5   ABOVE
6      7 ABSENCE
> tail(d)
     counts    words
1781      3      YET
1782      3      YOU
1783      5    YOUNG
1784      4     YOUR
1785      8 YOURSELF
1786      5    YOUTH
>

There are 1786 words, the longest is 14 characters and the smallest is 1 character. Since we have already been given the square anagram word pair CARE / RACE I will assume the answer is greater than 4 characters and ignore all 1-4 character words.

Write a function compare.word that will take 2 words, sort the characters and determine if the two words have the same characters.

> compare.word <- function( a, b){
+     aa <-sort(strsplit (a,"")[[1]])
+     bb <-sort(strsplit (b,"")[[1]])
+   (length(aa)==sum(aa==bb))    
+ }
> 

##process through all words looking for words that have the same characters
##get words from d by counts.  Only consider words with 5 or more characters

for(i in 5:14){

    
    d2 <- d[d$counts==i,]
    d2.len <- nrow(d2)
    col.index <- 2
    row.index <- 1

    for(row in row.index:d2.len){
        for(col in col.index:d2.len){   
            if(compare.word(as.character(d2[col,2]), as.character(d2[row,2]))){
                mywords <- rbind(mywords,c(i,row,col,as.character(d2[row,2]),as.character(d2[col,2])))
            }
        }
        if(col.index < d2.len) col.index <- col.index+1
    }
      
    
}

mywords <- mywords[!is.na(mywords$a),]
> mywords$same <- mywords$a!=mywords$b
> mywords <- mywords[mywords$same==TRUE,]
> mywords
   len row col         a         b same
21   5  18 175     ARISE     RAISE TRUE
22   5  31  36     BOARD     BROAD TRUE
23   5  68 101     EARTH     HEART TRUE
24   5 118 219     LEAST     STEAL TRUE
25   5 141 237     NIGHT     THING TRUE
26   5 157 194     PHASE     SHAPE TRUE
27   5 172 173     QUIET     QUITE TRUE
28   5 196 236     SHEET     THESE TRUE
29   5 199 208     SHOUT     SOUTH TRUE
30   5 240 279     THROW     WORTH TRUE
32   6  53 228    CENTRE    RECENT TRUE
33   6  69 272    COURSE    SOURCE TRUE
34   6  71  89    CREDIT    DIRECT TRUE
35   6  74 135    DANGER    GARDEN TRUE
36   6 110 111    EXCEPT    EXPECT TRUE
37   6 132 231    FORMER    REFORM TRUE
38   6 144 234    IGNORE    REGION TRUE
41   8  35 125  CREATION  REACTION TRUE
43   9  62  87 INTRODUCE REDUCTION TRUE
> 
> nrow(mywords)
[1] 19
> 
> len.of.int <- unique(mywords$len)
> len.of.int
[1] 5 6 8 9
>

There are 19 anagrams with lengths of 5, 6, 8, or 9 characters.

Now figure out how many squares of length 5,6,8, or 9 exist.

These squares will be between 100^2 to 31623^2


nums <- vector(mode="numeric", 31523)

counter <- 1
for(i in 100:31623){
    nums[counter] <- as.numeric(i^2)
    counter <- counter +1
}

num.lengths <- nchar(nums)

d.nums <- data.frame(nums, num.lengths)

> head(d.nums)
   nums num.lengths
1 10000           5
2 10201           5
3 10404           5
4 10609           5
5 10816           5
6 11025           5
> tail(d.nums)
            nums num.lengths
31519  999697924           9
31520  999761161           9
31521  999824400           9
31522  999887641           9
31523  999950884           9
31524 1000014129          10
> 

mynums <- data.frame(matrix(nrow=1,ncol=5))
names(mynums) <- c("len","row","col","a","b")

for( i in len.of.int){

    ##d.nums is the data.frame that holds the squares
    ##and their lengths
temp.nums <- as.numeric(d.nums[d.nums$num.lengths==i,"nums"])
temp.len <- length(temp.nums)

col.index <- 2
row.index <- 1

for(row in row.index:temp.len){
    for(col in col.index:temp.len){
        if(compare.square(as.integer(temp.nums[col]),as.integer(temp.nums[row]))){
            mynums <- rbind(mynums, c(i,col,row,temp.nums[col],temp.nums[row]))
        }
    }
    if(col.index < temp.len) col.index <- col.index+1
}
}

> head(mynums)
  len row col     a     b
2   5  11   2 12100 10201
3   5  21   3 14400 10404
4   5 102   3 40401 10404
5   5 111   3 44100 10404
6   5  31   4 16900 10609
7   5  41   4 19600 10609
> tail(mynums)
      len   row   col         a         b
40991   9 21612 21516 999255321 993195225
40992   9 21526 21517 993825625 993258256
40993   9 21589 21530 997801744 994077841
40994   9 21574 21538 996854329 994582369
40995   9 21594 21573 998117649 996791184
40996   9 21623 21623 999950884 999950884

> nrow(mynums)
[1] 40994
>

There are about 41,000 squares. Assign a square to a word, rearrange according to the square pair, determine if the new word is in the anagram list.


scramble <- function( w, n1, n2){
#w: the word; n1: the first square; n2 the rearranged square

    w.vector <-strsplit(w,"")[[1]]
    n1.vector <-   as.integer(strsplit(as.character(n1),"")[[1]])
    n2.vector <-   as.integer(strsplit(as.character(n2),"")[[1]])

    d1 <-data.frame(col1=w.vector, col2= n1.vector, stringsAsFactors = FALSE)

    paste(as.character(d1$col1[match(n2.vector, d1$col2)]),sep="", collapse="")
    
}

holder <- data.frame(matrix(nrow=1,ncol=5))
names(holder) <- c("len","wa","wb","na","nb")


for(i in len.of.int){
    temp.words <- mywords[mywords3len==i,]
    temp.nums <- mynums[mynums$len==i,]
    for(j in 1:nrow(temp.words)){
        for(k in 1:nrow(temp.nums)){
            new.word <-temp.words[ match(scramble(temp.words[j,"a"], temp.nums[k,"a"], temp.nums[k,"b"]),temp.words$a), "a"]
            if( !is.na( new.word)){
                holder <- rbind(holder,c(i, temp.words[j,"a"], as.character(new.word),temp.nums[k,"a"], temp.nums[k,"b"] ))
            }
        }
    }
}


> holder
   len     wa     wb     na     nb
1    5  BROAD  BOARD  18769  17689
2    6 CENTRE RECENT 436921 214369

CENTRE assigns different numbers to the ‘E’ violating one of the rules so BROAD/BOARD is the anagram pair and the largest square is 18769