Solving dense acrostics quickly

# A puzzle:

The letters can be arranged to make four words across and four words down. How quickly can we solve it?

(By "we" I mean "your machine and I" – together we shall achieve Perfect Scalability through Edge Computing! I've included sample runtimes below code samples, but there's a button to re-run on your device.)

First we need a dictionary of words

let words = new Set(['aahs','abbe','abed','abet','able','ably','abut','acai','aced','aces','ache','achy','acid','acme','acne','acre','acts','adds','ades','ados','adze','aeon','afar','afro','agar','aged','agee','ages','agog','ague','ahem','ahoy','aide','aids','ails','aims','airs','airy','ajar','akin','alar','alas','albs','alee','ales','alga','alit','ally','alms','aloe','also','alto','alum','amen','amid','ammo','amok','amps','amyl','anal','ands','anew','anil','ankh','anon','ante','anti','ants','anus','aped','aper','apes','apex','apod','apps','apse','aqua','arch','arcs','area','aria','arid','arks','arms','army','arts','arty','arum','asea','ashy','asks','asps','atom','atop','auks','aunt','aura','auto','aver','avid','avow','away','awed','awes','awls','awol','awry','axed','axel','axes','axis','axle','axon','ayes','baas','babe','baby','back','bade','bags','bail','bait','bake','bald','bale','balk','ball','balm','band','bane','bang','bank','bans','barb','bard','bare','barf','bark','barn','bars','base','bash','bask','bass','bate','bath','bats','baud','bawl','bays','bead','beak','beam','bean','bear','beat','beau','beck','beds','beef','been','beep','beer','bees','beet','begs','bell','bels','belt','bend','bent','berg','berm','best','beta','bets','bevy','bias','bibs','bide','bids','bier','bike','bile','bilk','bill','bind','bins','bios','bird','bite','bits','blab','blah','bled','blew','blip','blob','bloc','blog','blot','blow','blue','blur','boar','boas','boat','bobs','bock','bode','body','bogs','boil','bold','bole','boll','bolo','bolt','bomb','bond','bone','bong','bonk','bony','boob','book','boom','boon','boor','boos','boot','bops','bore','born','boss','both','bout','bowl','bows','boxy','boys','bozo','brad','brag','bran','bras','brat','bray','bred','brew','brie','brig','brim','bris','brow','brrr','buck','buds','buff','bugs','bulb','bulk','bull','bump','bums','bung','bunk','buns','bunt','buoy','burg','burn','burp','burr','bury','bush','busk','buss','bust','busy','buts','butt','buys','buzz','byes','byte','cabs','caca','cads','cafe','cage','cagy','cake','caky','calf','call','calm','came','camp','cams','cane','cans','cant','capo','cape','caps','carb','card','care','carp','cars','cart','case','cash','cask','cast','cats','cave','caws','cede','cell','cels','cent','chad','chai','chap','char','chat','chaw','chef','chew','chia','chic','chin','chip','chit','chop','chow','chug','chum','ciao','cite','city','clad','clam','clan','clap','claw','clay','clef','clip','clod','clog','clop','clot','cloy','club','clue','coal','coat','coax','cobs','coca','cock','coda','code','cods','cogs','coif','coil','coin','coir','coke','cola','cold','colt','coma','comb','come','cone','conk','conn','cons','cook','cool','coop','coos','coot','cope','cops','copy','cord','core','cork','corm','corn','cost','cosy','cote','cots','coup','cove','cowl','cows','cozy','crab','crag','cram','crap','craw','cred','crew','crib','croc','crop','crow','crud','crux','cube','cubs','cuds','cued','cues','cuff','cuke','cull','cult','cups','curb','curd','cure','curl','curs','curt','cusp','cuss','cute','cuts','cyan','cyst','czar','dabs','dada','dado','dads','daft','dais','dale','dame','damn','damp','dams','dank','dare','dark','darn','dart','dash','data','date','daub','dawn','days','daze','dead','deaf','deal','dean','dear','debt','deck','deco','deed','deem','deep','deer','deft','defy','deke','deli','dell','demo','dens','dent','deny','desk','dews','dewy','dhow','dial','dibs','dice','dido','died','dies','diet','digs','dike','dill','dime','dims','dine','ding','dino','dins','dint','dips','dire','dirk','dirt','disc','dish','disk','diss','ditz','diva','dive','dock','docs','dodo','doer','does','doff','dogs','dojo','dole','doll','dolt','dome','done','dons','doom','door','dope','dopy','dork','dorm','dose','dost','dote','doth','dots','dour','dove','down','doze','drab','drag','dram','drat','draw','dray','dreg','drew','drib','drip','drop','drub','drug','drum','drys','dual','dubs','duck','duct','dude','duds','duel','dues','duet','duff','duke','dull','duly','dumb','dump','dune','dung','dunk','duns','duos','dupe','dura','dusk','dust','duty','dyad','dyed','dyer','dyes','dyke','each','earl','earn','ears','ease','east','easy','eats','eave','ebbs','echo','ecru','edam','eddy','edge','edgy','edit','eels','efts','egad','eggs','eggy','egos','eked','eker','ekes','elan','ells','elms','else','emit','emos','emus','ends','enid','envy','eons','epee','epic','eras','ergo','ergs','erns','errs','espy','etch','even','ever','eves','evil','ewer','ewes','exam','exec','exes','exit','expo','eyed','eyer','eyes','face','fact','fade','fads','fail','fain','fair','fake','fall','fame','fang','fans','fare','farm','faro','fart','fast','fate','fats','faun','faux','fava','fawn','faze','fear','feat','feds','feed','feel','fees','feet','fell','felt','fend','fens','fern','fest','feta','fete','feud','fiat','fibs','fief','fife','figs','file','fill','film','filo','find','fine','fink','fins','fire','firm','firs','fish','fist','fits','five','fizz','flab','flag','flak','flan','flap','flat','flaw','flax','flay','flea','fled','flee','flew','flex','flip','flit','floe','flog','flop','flow','flub','flue','flus','flux','foal','foam','fobs','foci','foes','fogs','fogy','foil','fold','folk','fond','font','food','fool','foot','fops','fora','ford','fore','fork','form','fort','foul','four','fowl','foxy','frag','frat','fray','free','fret','frit','frog','from','fuel','fugu','full','fume','fund','funk','furl','furs','fury','fuse','fuss','futz','fuzz','gabs','gads','gaga','gags','gain','gait','gala','gale','gall','gals','game','gams','gamy','gang','gape','gaps','garb','gars','gash','gasp','gate','gave','gawk','gays','gaze','gear','geek','gees','geez','geld','gels','gelt','gems','gene','gent','germ','gets','ghee','gibe','gift','gigs','gila','gild','gill','gilt','gins','gird','girl','girt','gist','give','glad','glam','glee','glen','glia','glib','glid','glob','glop','glow','glue','glug','glum','glut','gnat','gnaw','gnus','goad','goal','goas','goat','gobs','goby','gods','goer','goes','gogo','goji','gold','golf','gone','gong','good','goof','goon','goop','goos','gore','gory','gosh','gout','gown','grab','grad','gram','gray','grew','grid','grim','grin','grip','grit','grog','grow','grub','guar','guff','gulf','gull','gulp','gums','gunk','guns','guru','gush','gust','guts','guys','gyms','gyre','gyro','hack','haft','hags','hail','hair','hake','hale','half','hall','halo','halt','hams','hand','hang','hard','hare','hark','harm','harp','hart','hash','hasp','hast','hate','hath','hats','haul','have','hawk','haws','hays','haze','hazy','head','heal','heap','hear','heat','heck','heed','heel','heft','heir','held','hell','helm','help','heme','hemp','hems','hens','herb','herd','here','hero','hers','hewn','hews','hick','hide','hied','hies','high','hike','hill','hilt','hind','hint','hips','hire','hiss','hits','hive','hoar','hoax','hobo','hobs','hock','hods','hoed','hoer','hoes','hogs','hold','hole','holy','home','homy','hone','honk','hood','hoof','hook','hoop','hoot','hope','hops','hora','horn','hose','host','hots','hour','hove','howl','hows','hubs','hued','hues','huff','huge','hugs','hula','hulk','hull','hump','hums','hung','hunk','hunt','hurl','hurt','hush','husk','huts','hymn','hype','iamb','ibex','ibis','iced','icer','ices','icky','icon','idea','ides','idle','idly','idol','iffy','ilks','ills','imam','imps','inch','info','inks','inky','inns','into','ions','iota','ired','ires','iris','irks','iron','isle','isms','itch','item','jabs','jack','jade','jags','jail','jake','jamb','jams','jape','jars','java','jaws','jays','jazz','jean','jeep','jeer','jeez','jerk','jest','jets','jibe','jibs','jigs','jilt','jinx','jive','jobs','jock','joes','joey','jogs','john','join','joke','jolt','josh','joss','jots','jowl','joys','judo','jugs','juju','juke','jump','junk','jury','just','jute','juts','kale','kava','keel','keen','keep','kegs','kelp','keno','kept','kern','keto','keys','kick','kids','kill','kiln','kilo','kilt','kind','kine','king','kink','kiss','kite','kith','kits','kiva','kiwi','knap','knee','knew','knit','knob','knot','know','koan','kobe','kohl','kola','kook','kora','koto','kuru','labs','lace','lack','lacy','lade','lads','lady','lags','laic','laid','lain','lair','lake','lama','lamb','lame','lamp','lams','land','lane','lank','laps','lard','lark','larp','lash','lass','last','late','lath','lats','laud','lava','lavs','lawn','laws','lays','laze','lazy','lead','leaf','leak','lean','leap','leas','lede','leek','leer','lees','left','legs','leis','lend','lens','lent','less','lest','lets','levy','lewd','liar','lice','lick','lids','lied','lien','lies','lieu','life','lift','like','lilt','lily','lima','limb','lime','limn','limo','limp','limy','line','link','lino','lint','lion','lips','lisp','list','lite','live','load','loaf','loam','loan','lobe','lobs','loch','loci','lock','lode','loft','loge','logo','logs','logy','loin','loll','lone','long','look','loom','loon','loop','loos','loot','lope','lops','lord','lore','lose','loss','lost','loth','lots','loud','lout','love','lows','luau','lube','luck','luge','lugs','lull','lump','lung','lure','lurk','lush','lust','lute','lyes','lyme','lynx','lyre','lyse','mace','mach','mack','made','magi','maid','mail','maim','main','make','male','mall','malt','mama','mane','mans','many','maps','mare','mark','marl','mars','mart','masa','mash','mask','mass','mast','mate','math','mats','maul','maws','mayo','maze','mead','meal','mean','meat','meek','meet','meld','melt','meme','memo','mend','menu','meow','mere','mesa','mesh','mess','meta','mete','meth','mewl','mews','mica','mice','mids','mien','miff','mike','mild','mile','milk','mill','mils','mime','mind','mine','mini','mink','mint','minx','mire','miso','miss','mist','mite','mitt','moan','moas','moat','mobs','mock','mode','mods','moil','mojo','mold','mole','moll','molt','moms','monk','mono','mood','moon','moor','moos','moot','mope','mops','more','morn','mosh','moss','most','mote','moth','move','mown','mows','much','muck','muds','muff','mugs','mule','mull','mums','mumu','muon','murk','muse','mush','musk','muss','must','mute','mutt','myna','myth','naan','nabs','nags','naif','nail','name','napa','nape','naps','narc','nard','nary','nave','navy','nays','neap','near','neat','neck','need','neem','neon','nerd','nest','nets','news','newt','next','nibs','nice','nick','nigh','nine','nips','nits','node','nods','noel','noes','noir','none','nook','noon','nope','nori','norm','nose','nosh','nosy','note','noun','nous','nova','nubs','nude','nuke','null','numb','nuns','nuts','oafs','oaks','oaky','oars','oast','oath','oats','oaty','obey','obis','obit','oboe','ocas','odds','odes','odic','odor','offs','ogle','ogre','ohms','oils','oily','oink','okay','okra','oleo','olio','olla','omen','omit','once','oner','ones','only','onto','onus','onyx','oohs','oops','ooze','opal','open','opts','opus','orad','oral','orbs','orca','orcs','ores','orgy','orts','oryx','orzo','otic','ouch','ouds','ours','oust','outs','ouzo','oval','oven','over','ovum','owed','ower','owes','owls','owns','oxen','pace','pack','pact','pads','page','paid','pail','pain','pair','pale','pall','palm','pals','pane','pang','pans','pant','papa','pare','park','part','pass','past','pate','path','pats','pave','pawl','pawn','paws','pays','peak','peal','pear','peas','peat','peck','pecs','peed','peek','peel','peen','peep','peer','pees','pegs','pelf','pelt','pend','pens','pent','peon','peps','perk','perm','pert','perv','pest','pets','pews','pfft','phew','pica','pick','pics','pied','pier','pies','pigs','pike','pile','pill','pimp','pine','ping','pink','pins','pint','piny','pipe','pips','piss','pita','pith','pits','pity','plan','play','plea','pled','plod','plop','plot','plow','ploy','plug','plum','plus','pock','pods','poem','poet','pogo','pois','poke','poky','pole','poll','polo','pomp','pond','pone','pong','pony','poof','pooh','pool','poop','poor','poos','poot','pope','pops','pore','pork','porn','port','pose','posh','post','posy','pots','pouf','pour','pout','poxy','pram','prat','pray','prep','prey','prig','prim','proa','prod','prom','prop','pros','prow','psst','pubs','puce','puck','puff','pugs','puke','pull','pulp','puma','pump','punk','puns','punt','puny','pupa','pups','pure','purl','purr','push','puss','puts','putt','putz','pyre','pyro','qats','quad','quay','quid','quim','quip','quit','quiz','race','rack','racy','raft','raga','rage','rags','raid','rail','rain','rake','ramp','rams','rang','rank','rant','raps','rapt','rare','rash','rasp','rate','rats','rave','rays','raze','razz','read','real','ream','reap','rear','redo','reds','reed','reef','reek','reel','refs','rein','rely','rend','rent','repo','reps','rest','revs','rhea','rias','ribs','rice','rich','ride','rids','rife','riff','rift','rigs','rile','rill','rime','rims','rind','ring','rink','riot','ripe','rips','rise','risk','rite','road','roam','roan','roar','robe','robs','rock','rocs','rode','rods','roes','roil','role','roll','romp','rood','roof','rook','room','root','rope','ropy','rose','rosy','rote','roti','rots','roue','rout','roux','rove','rows','rube','rubs','ruby','rude','rued','ruer','rues','ruff','rugs','ruin','rule','ruly','rump','rums','rune','rung','runs','runt','ruse','rush','rusk','rust','ruts','ryes','sack','sacs','safe','saga','sage','sago','sags','said','sail','sake','sale','salt','same','sand','sane','sang','sank','sans','saps','sari','sash','sass','sate','save','sawn','saws','says','scab','scad','scam','scan','scar','scat','scot','scow','scry','scud','scum','seal','seam','sear','seas','seat','secs','sect','seed','seek','seem','seen','seep','seer','sees','self','sell','semi','send','sent','sera','sere','serf','sets','sewn','sews','sext','sexy','shad','shag','sham','shat','shea','shed','shim','shin','ship','shit','shiv','shod','shoe','shoo','shop','shot','show','shun','shut','sick','sics','side','sift','sigh','sign','silk','sill','silo','silt','sine','sing','sink','sins','sips','sire','sirs','site','sits','sitz','size','skew','skid','skim','skin','skip','skis','skit','slab','slag','slam','slap','slat','slaw','slay','sled','slew','slid','slim','slip','slit','slob','sloe','slog','slop','slot','slow','slue','slug','slum','slur','smog','smug','smut','snag','snap','snip','snit','snob','snot','snow','snub','snug','soak','soap','soar','sobs','sock','soda','sods','sofa','soft','soil','sold','sole','solo','some','song','sons','soon','soot','sops','sore','sort','sots','soul','soup','sour','sown','sows','soya','spam','span','spar','spas','spat','spay','spaz','spec','sped','spew','spin','spit','spot','spry','spud','spun','spur','stab','stag','star','stat','stay','stem','step','stew','stir','stop','stow','stub','stud','stun','stye','subs','such','suck','suds','sued','suer','sues','suet','suit','sulk','sumo','sump','sums','sung','sunk','suns','sups','sure','surf','suss','swab','swag','swam','swan','swap','swat','sway','swig','swim','swum','sync','tabs','tack','taco','tact','tads','tags','tail','take','talc','tale','talk','tall','tame','tamp','tams','tang','tank','tans','tape','taps','tare','tarn','taro','tarp','tars','tart','task','tats','taut','taxi','teak','teal','team','tear','teas','teat','tech','teed','teem','teen','tees','tell','temp','tend','tens','tent','term','tern','test','text','than','that','thaw','thee','them','then','they','thin','this','thou','thud','thug','thus','tick','tics','tide','tidy','tied','tier','ties','tiff','tiki','tile','till','tilt','time','tine','tins','tint','tiny','tipi','tips','tire','tits','toad','toed','toes','tofu','toga','togs','toil','toke','told','toll','tomb','tome','toms','tone','tons','tony','took','tool','toon','toot','tops','tore','tori','torn','tors','tort','tory','toss','tote','tots','tour','tout','town','tows','toys','tram','trap','tray','tree','trek','trig','trim','trio','trip','trod','trot','true','tsks','tuba','tube','tubs','tuck','tuft','tugs','tuna','tune','tuns','turd','turf','turn','tush','tusk','tutu','twee','twig','twin','twit','twos','tyke','type','typo','udon','ugly','ukes','ulna','umph','umps','undo','unit','unto','updo','upon','urea','urge','uric','urns','used','user','uses','uvea','vain','vale','vamp','vane','vans','vape','vary','vase','vast','vats','veal','veer','veil','vein','veld','vend','vent','verb','very','vest','veto','vets','vial','vibe','vice','vids','vied','vies','view','vile','vine','viny','viol','visa','vise','viva','void','vole','volt','vote','vows','wack','wade','wadi','wads','waft','wage','wags','waif','wail','wain','wait','wake','wale','walk','wall','wand','wane','wang','wank','want','ward','ware','warm','warn','warp','wars','wart','wary','wash','wasp','wast','watt','wave','wavy','waxy','ways','weak','weal','wean','wear','webs','weds','weed','week','weep','weft','weir','weld','well','welt','wend','went','wept','were','west','weta','wets','wham','what','whee','when','whet','whew','whey','whig','whim','whip','whir','whit','whiz','whoa','whom','whys','wick','wide','wife','wigs','wiki','wild','wile','will','wilt','wily','wimp','wind','wine','wing','wink','wino','wins','wipe','wire','wiry','wise','wish','wisp','with','wits','wive','woad','woes','woke','woks','wolf','womb','wonk','wont','wood','woof','wool','woos','word','wore','work','worm','worn','wort','wove','wows','wrap','wren','writ','wuss','yaks','yams','yank','yaps','yard','yarn','yawn','yaws','yeah','year','yeas','yell','yelp','yens','yeti','yews','yips','yoga','yogi','yoke','yolk','yoni','yore','your','yowl','yuck','yuks','yule','yurt','zags','zany','zaps','zeal','zebu','zero','zest','zeta','zigs','zinc','zine','zing','zips','ziti','zits','zoic','zone','zonk','zoom','zoos']);
        

and the letters in the puzzle

let seed = [...'eras'];
let letters = [...'ceeiklmorstw'];
        

If we take them in the order given (across), we can verify it's not the solution – glue together three words across and four words down, and bail out early if one of them isn't in the set of words.

function isSolution(letters) {
    for (let i=0; i<3; ++i) {
        if (!words.has(letters[4*i] + letters[4*i+1] + letters[4*i+2] + letters[4*i+3]))
            return false;
    }
    for (let i=0; i<4; ++i) {
        if (!words.has(seed[i] + letters[i] + letters[4+i] + letters[8+i]))
            return false;
    }
    return true;
}

console.log(isSolution(letters) ? 'yes' : 'no');

We can iterate through permutations until we find an answer, using a recursive generator. Each level will need the letters chosen so far, along with a mask of which letters are still available, and the deepest level will yield 12-letter permutations.

function* permutations(nRemain=letters.length, head=[], used=[]) {
    for (let i=0; i<letters.length; ++i) {
        if (used[i]) continue;
        used[i] = true;
        const nextHead = [...head, letters[i]];
        if (nRemain <= 1) yield nextHead;
        else yield* permutations(nRemain - 1, nextHead, used);
        used[i] = false;
    }
}

let n = 0;
for (const permutation of permutations()) {
    if (isSolution(permutation)) {
        console.log(permutation);
        break;
    }
    ++n;
}
console.log('Tested ' + (n+1) + ' permutations');
        
l,i,c,k,m,o,r,e,s,t,e,w
Tested 210562809 permutations
Finished in 172.554 seconds.
        
Flame graph

Looks like it's spending all of its time making permutations. Can we do better with a published "unranking" algorithm? (Myrvold & Ruskey, 2000)

function unrank1(n, r, p) {
    if (n <= 0) return p;
    const x = p[n - 1]; // swap with p[r % n]
    const j = r % n;
    p[n - 1] = p[j];
    p[j] = x;
    return unrank1(n - 1, Math.floor(r / n), p);
}
function* mrPermutations(pIn=letters) {
    const p = pIn.slice();
    const n = p.length;
    let r = n; // #r = n!
    for (let i=n; i-->1;) r *= i;
    while (r --> 0) {
        for (let j=0; j<p.length; ++j) p[j] = pIn[j];
        yield [r, unrank1(n, r, p)];
    }
}
        
let n = 0;
for (const [r, permutation] of mrPermutations()) {
    if (isSolution(permutation)) {
        console.log(r, permutation);
        break;
    }
    ++n;
}
console.log('Tested ' + (n+1) + ' permutations');
        
l,i,c,k,m,o,r,e,s,t,e,w
Tested 214099885 permutations
Finished in 61.197 seconds.
        

That's a little better. Still, some of the permutations are duplicates, since 'e' === 'e'.

# Use a library

Let's use a proper multiset permutation algorithm: Cool-Lex order as implemented in github.com/nicolaitanes/multipermute (upstream).

/*
from https://github.com/ekg/multipermute
version 1.0.1

The MIT License (MIT)

Copyright (c) 2014 Erik Garrison

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

This module encodes functions to generate the permutations of a multiset
following this algorithm:

Algorithm 1
Visits the permutations of multiset E. The permutations are stored
in a singly-linked list pointed to by head pointer h. Each node in the linked
list has a value field v and a next field n. The init(E) call creates a
singly-linked list storing the elements of E in non-increasing order with h, i,
and j pointing to its first, second-last, and last nodes, respectively. The
null pointer is given by φ. Note: If E is empty, then init(E) should exit.
Also, if E contains only one element, then init(E) does not need to provide a
value for i.

A linked list must be used instead of an array because, as long as we keep a
reference to the nodes at the removal and insertion points, arbitrary shifts
(rotations of sub-lists) can be performed in constant time; this is what h, i,
and j are for.

[h, i, j] ← init(E)
visit(h)
while j.n ≠ φ or j.v < h.v do
    if j.n ≠ φ and i.v ≥ j.n.v then
        s←j
    else
        s←i
    end if
    t←s.n
    s.n ← t.n
    t.n ← h
    if t.v < h.v then
        i←t
    end if
    j←i.n
    h←t
    visit(h)
end while
*/
function mp_visit(h, l, p) {
    for (let i = l - 1; i >= 0; i--) {
        p[i] = h.r;
        h = h.n;
    }
    return p;
}

// p must be sorted in ascending order
function* mp_gen(p, remap, cycle) {
    const l = p.length;
    const pOut = p.slice();
    // If the input is empty, exit.
    // There is only one permutation of the empty set.
    if (l === 0) {
        yield [];
        return;
    }
    // Init
    let h = { v: p[0], r: remap[p[0]], n: null };
    let i = h; // penultimate node
    let j = h; // final node
    if (l > 1)
        h = i = { v: p[1], r: remap[p[1]], n: h };
    for (let k = 2; k < l; ++k) {
        h = { v: p[k], r: remap[p[k]], n: h };
    }
    for (;;) {
        // Visit permutations of the list
        yield mp_visit(h, l, pOut);
        let s;
        for (;;) {
            if (j.n)
                s = i.v >= j.n.v ? j : i;
            else if (j.v >= h.v)
                break;
            else
                s = i;
            const t = s.n;
            s.n = t.n;
            t.n = h;
            if (t.v < h.v) {
                i = t;
            }
            j = i.n;
            h = t;
            yield mp_visit(h, l, pOut);
        }
        if (cycle) {
            // Reset
            // This is different from initialization
            // because we don't need to allocate new
            // list nodes.
            i = h; // penultimate node
            j = h; // final node
            for (let k = l - 1; k > 0; k--) {
                j.v = p[k];
                j.r = remap[p[k]];
                i = j;
                j = j.n;
            }
            j.v = p[0];
            j.r = remap[p[0]];
        }
        else
            return;
    }
}

function multiplicity2sorted(mults) {
    const n = mults.length;
    const permutation = [];
    for (let k = 0; k < n; k++) {
        for (let m = mults[k]; m > 0; m--) {
            permutation.push(k);
        }
    }
    return permutation;
}

function ms2multiplicities(multiset, eq) {
    const remap = [];
    const mults = [];
    if (eq) {
        // If we have a custom equality function,
        // fall back on an implicit O(n^2) map.
        outer: for (const e of multiset) {
            for (let i = remap.length - 1; i >= 0; i--) {
                if (eq(remap[i], e)) {
                    mults[i]++;
                    continue outer;
                }
            }
            remap.push(e);
            mults.push(1);
        }
    }
    else {
        // Otherwise, we can use the native map,
        // with implied === comparison.
        const m = new Map();
        for (const e of multiset) {
            m.set(e, (m.get(e) || 0) + 1);
        }
        for (const e of multiset) {
            if (!m.has(e))
                continue;
            remap.push(e);
            mults.push(m.get(e));
            m.delete(e);
        }
    }
    return { remap: remap, mults };
}

function ms2permutation(multiset, eq) {
    const { remap, mults } = ms2multiplicities(multiset, eq);
    return { remap, permutation: multiplicity2sorted(mults) };
}

function multipermute(multiset, opts = {}) {
    let cb;
    let eq;
    let cycle = false;
    if (typeof opts === 'function')
        cb = opts;
    else
        ({ cb, eq, cycle = false } = opts);
    const { permutation, remap } = ms2permutation(multiset, eq);
    if (typeof cb !== 'function')
        return mp_gen(permutation, remap, cycle);
    for (const p of mp_gen(permutation, remap, cycle))
        cb(p);
    return;
}

        
let n = 0;
for (const permutation of multipermute(letters)) {
    if (isSolution(permutation)) {
        console.log(permutation);
        break;
    }
    ++n;
}
console.log('Tested ' + (n+1) + ' permutations');
        
l,i,c,k,m,o,r,e,s,t,e,w
Tested 90670467 permutations
Finished in 12.157 seconds.
        
Flame graph

Much better! Now it's spending a substantial amount of time in isSolution(); can we make it faster?

# Use integers

Let's replace the letters with their ordinal value 0..25, and make a table[a][b][c] with all allowed letters d. Each letter fits in 5 bits, so the table can be indexed quickly with 26*32*32 entries:

function intFromLetter(s) { return s ? s.charCodeAt() - 'a'.charCodeAt() : 26; };

function letterFromInt(i) { return String.fromCharCode(i + 'a'.charCodeAt()); };

let masks3 = new Int32Array(26*32**2);
for (const word of words) {
    const [a, b, c, d] = [...word].map(intFromLetter);
    masks3[(a<<10) + (b<<5) + c] |= (1 << d);
}

function isWord(a, b, c, d) {
    return !!(masks3[(a<<10) + (b<<5) + c] & (1 << d));
}

        
const seedInts = seed.map(intFromLetter);
const letterInts = letters.map(intFromLetter);

function isSolutionB(ints) {
    for (let i=0; i<3; ++i) {
        if (!isWord(ints[4*i], ints[4*i+1], ints[4*i+2], ints[4*i+3]))
            return false;
    }
    for (let i=0; i<4; ++i) {
        if (!isWord(seedInts[i], ints[i], ints[4+i], ints[8+i]))
            return false;
    }
    return true;
}

for (const permutation of multipermute(letterInts)) {
    if (isSolutionB(permutation)) {
        console.log(permutation.map(letterFromInt));
        break;
    }
}
        
l,i,c,k,m,o,r,e,s,t,e,w
Finished in 5.619 seconds.
        
Flame graph

# Short-circuit linked list traversal

Now it's spending a bit of time in mp_visit – multipermute uses a linked list internally, and crawls it to copy each permutation into the output array – but if we could rule out the first word when letter 4 arrives (or the second after 8) we can skip the rest of the linked list and save some time. Might as well check the third word across after 12, and only check the down direction if they all succeed.

function mp_visitB(h, l, p) {
    for (let i = l - 1; i >= 0; i--) {
        p[i] = h.r;
        h = h.n;
        const fullWord = (i % 4) === 0;
        if (fullWord && !isWord(p[i], p[i+1], p[i+2], p[i+3])) return null;
    }
    return p;
}

// p must be sorted in ascending order
function* mp_genB(p, remap, cycle) {
    const l = p.length;
    const pOut = p.slice();
    // If the input is empty, exit.
    // There is only one permutation of the empty set.
    if (l === 0) {
        yield [];
        return;
    }
    // Init
    let h = { v: p[0], r: remap[p[0]], n: null };
    let i = h; // penultimate node
    let j = h; // final node
    if (l > 1)
        h = i = { v: p[1], r: remap[p[1]], n: h };
    for (let k = 2; k < l; ++k) {
        h = { v: p[k], r: remap[p[k]], n: h };
    }
    for (;;) {
        // Visit permutations of the list
        const x = mp_visitB(h, l, pOut);
        if (x) yield x;
        let s;
        for (;;) {
            if (j.n)
                s = i.v >= j.n.v ? j : i;
            else if (j.v >= h.v)
                break;
            else
                s = i;
            const t = s.n;
            s.n = t.n;
            t.n = h;
            if (t.v < h.v) {
                i = t;
            }
            j = i.n;
            h = t;
            const x = mp_visitB(h, l, pOut);
            if (x) yield x;
        }
        if (cycle) {
            // Reset
            // This is different from initialization
            // because we don't need to allocate new
            // list nodes.
            i = h; // penultimate node
            j = h; // final node
            for (let k = l - 1; k > 0; k--) {
                j.v = p[k];
                j.r = remap[p[k]];
                i = j;
                j = j.n;
            }
            j.v = p[0];
            j.r = remap[p[0]];
        }
        else
            return;
    }
}

function multipermuteB(multiset, opts = {}) {
    let cb;
    let eq;
    let cycle = false;
    if (typeof opts === 'function')
        cb = opts;
    else
        ({ cb, eq, cycle = false } = opts);
    const { permutation, remap } = ms2permutation(multiset, eq);
    if (typeof cb !== 'function')
        return mp_genB(permutation, remap, cycle);
    for (const p of mp_genB(permutation, remap, cycle))
        cb(p);
    return;
}

const seedInts = seed.map(intFromLetter);
const letterInts = letters.map(intFromLetter);

function isSolutionDown(ints) {
    for (let i=0; i<4; ++i) {
        if (!isWord(seedInts[i], ints[i], ints[4+i], ints[8+i]))
            return false;
    }
    return true;
}

for (const permutation of multipermuteB(letterInts)) {
    if (isSolutionDown(permutation)) {
        console.log(permutation.map(letterFromInt));
        break;
    }
}
        
l,i,c,k,m,o,r,e,s,t,e,w
Finished in 1.375 seconds.
        

# Hash table

Would a hash table of words be any more efficient? If we treat a sequence of four integers (0..25) as a little-endian 4-byte integer, we can hash it by multiplying by a large prime and masking/shifting some bits to match the desired table size.

Since we know the dictionary ahead of time, we can find a prime multiplier that fits all the words above into 4096 bins -- 2 words per bin, each word found in its bin or the next one. The total table size is 32K, smaller than the 104KiB table we used before, so maybe more will stay in cache?

function isPrime(n) {
    if ((n%2) === 0 || (n%3) === 0) return false;
    for (let i = 5; i*i <= n; i += 6) {
        if ((n%i) === 0 || (n % (i+2)) === 0) return false;
    }
    return true;
}

function* primegen() {
    while (true) {
        const p = 0x10000000 + (0x6fffffff * Math.random())|0;
        if (isPrime(p)) yield p;
    }
}

function buildHash(hashKey, hashVal, size=4096, limitOverflow=3) {
    const capacity = size*2 ;
    const hash = new Int32Array(capacity);
    const key_words = [...words].map(word => {
        const key = hashKey(...[...word].map(intFromLetter));
        const offset = hashVal(key);
        return { word, key, offset };
    });
    key_words.sort((a, b) => a.offset - b.offset);

    let maxOverflow = 0;

    for (const { word, key, offset } of key_words) {
        let h = offset;
        while (h < capacity) {
            if (!hash[h]) {
                hash[h] = key;
                maxOverflow = Math.max(maxOverflow, h - offset);
                break;
            }
            ++h;
            if (maxOverflow > limitOverflow) break;
        }
        if (h === capacity) return [null, 0];
    }

    return [hash, maxOverflow];
}

function setupHashing(p, size=4096, limitOverflow=3) {
    const mask = (size - 1) << 1;

    function hashKey(a, b, c, d) {
        return a + (b === undefined ? 0 : (d<<24) + (c<<16) + (b<<8));
    }
    function hashVal(x) {
        return ((p * x) >> 16) & mask;
    }
    const [table, maxOverflow] = buildHash(hashKey, hashVal, size, limitOverflow);
    if (table && maxOverflow <= limitOverflow) {
        function isWord(a, b, c, d) {
            const x = hashKey(a, b, c, d);
            const h = hashVal(x);
            return table[h] === x || table[h+1] === x || table[h+2] === x || table[h+3] === x;
        }
        
        return { p, table, hashKey, hashVal, isWord };
    }
    return null;
}

function findTablePrime() {
    const primes = primegen();
    let hashPrime = 0;
    let hashData = '';
    while (!hashData) {
        const p = primes.next().value;
        
        const hashing = setupHashing(p);
        if (!hashing) continue;
        
        hashPrime = p;
        const bytes = new Uint8Array(hashing.table.buffer);
        for (const b of bytes) hashData += `\\` + b.toString(16).padStart(2, '0');
    }
    return { hashPrime, hashData };
}

const { hashPrime } = findTablePrime();
let hashing = setupHashing(hashPrime);

function mp_visitC(h, l, p) {
    for (let i = l - 1; i >= 0; i--) {
        p[i] = h.r;
        h = h.n;
        const fullWord = (i % 4) === 0;
        if (fullWord && !hashing.isWord(p[i], p[i+1], p[i+2], p[i+3])) return null;
    }
    return p;
}

// p must be sorted in ascending order
function* mp_genC(p, remap) {
    const l = p.length;
    const pOut = p.slice();
    // If the input is empty, exit.
    // There is only one permutation of the empty set.
    if (l === 0) {
        yield [];
        return;
    }
    // Init
    let h = { v: p[0], r: remap[p[0]], n: null };
    let i = h; // penultimate node
    let j = h; // final node
    if (l > 1)
        h = i = { v: p[1], r: remap[p[1]], n: h };
    for (let k = 2; k < l; ++k) {
        h = { v: p[k], r: remap[p[k]], n: h };
    }

        // Visit permutations of the list
        const x = mp_visitC(h, l, pOut);
        if (x) yield x;
        let s;
        for (;;) {
            if (j.n)
                s = i.v >= j.n.v ? j : i;
            else if (j.v >= h.v)
                break;
            else
                s = i;
            const t = s.n;
            s.n = t.n;
            t.n = h;
            if (t.v < h.v) {
                i = t;
            }
            j = i.n;
            h = t;
            const x = mp_visitC(h, l, pOut);
            if (x) yield x;
        }
}

function multipermuteC(multiset, opts = {}) {
    let cb;
    let eq;
    if (typeof opts === 'function')
        cb = opts;
    else
        ({ cb, eq } = opts);
    const { permutation, remap } = ms2permutation(multiset, eq);
    if (typeof cb !== 'function')
        return mp_genC(permutation, remap);
    for (const p of mp_genC(permutation, remap))
        cb(p);
    return;
}

function solveC(seed, letters) {
    const seedInts = seed.map(intFromLetter);
    const letterInts = letters.map(intFromLetter);

    function isSolutionDownC(ints) {
        for (let i=0; i<4; ++i) {
            if (!hashing.isWord(seedInts[i], ints[i], ints[4+i], ints[8+i]))
                return false;
        }
        return true;
    }

    for (const permutation of multipermuteC(letterInts)) {
        if (isSolutionDownC(permutation)) return permutation.map(letterFromInt);
    }
}
        
console.log(solveC(seed, letters));
        
l,i,c,k,m,o,r,e,s,t,e,w
Finished in 2.050 seconds.
        

A little slower. Our high water mark was 125x faster (=== 172.554/1.375) than we started, and 8.8x faster than plain multipermute with string concat, but maybe webassembly would be even faster?

# WebAssembly and simd

Let's generate a module in WebAssembly Text (wat)[2] format, and implement multipermuteC for a fixed length of 12. We'll stick with hash-based testing to simplify the wasm.

// a few utilities to emit WAT opcodes with computed memory addresses

let indent = '';

function emitter(output, op, open) {
    return (...v) => {
        if (op === 'raw') return output(indent + v.join(' '));
        if (!v.length && !open) return output(indent + op);
        const paren = open || ['call', 'export', 'local', 'memory', 'param', 'result'].includes(op);
        output(`${indent}${paren ? '(' : ''}${op} ${v.join(' ')}${paren && !open ? ')' : ''}`);
    }
}

function watter(output=console.log, target={}, stem='', open=false) {
    return new Proxy(
        target,
        {
            get: (target, p) => {
                if (p === 'begin') return (op, ...v) => {
                    emitter(output, stem + op, true)(v);
                    indent += '  ';
                    return watter(output);
                }
                if (p === 'end') return () => {
                    if (indent) indent = indent.slice(0, indent.length - 2);
                    output(indent + ')');
                };
                return watter(output, emitter(output, stem + p), stem + p + '.');
            }
        }
    );
}

class pointermath {
    constructor(w, p, b, n, m) {
        this.w = w;
        this.p = p;
        this.s = (Math.log(b)/Math.log(2))|0;
        this.m = m || 1;
        this.n = n;
    }
    offset(i, j) {
        return this.p + ((((i || 0) * this.m + (j || 0))) << this.s);
    }
    at(i, j) {
        return this.w.raw(this.wat(i, j));
    }
    wat(i, j) {
        return `i32.const ${this.offset(i, j)}`;
    }
    after() { return this.offset(this.n); }
}

class mem {
    constructor(w, items) {
        this.w = w;
        let at = 0;
        for (const { k, b, n, m } of items) {
            const a = this[k] = new pointermath(w, at, b, n, m);
            at = a.after();
        }
        this.size = at;
    }
    memory(name='memory', exported=true) {
        this.w.memory('$' + name, Math.ceil(this.size / 65536));
        if (exported) this.w.raw(`(export "${name}" (memory $${name}))`);
    }
}

        
const w = watter();
const {
    begin, end, block, br, br_if, call, drop, local, loop, memory, param, raw, result, select,
    i8x16, i16x8, i32x4, i32, v128 } = w;

const m = new mem(w, [
    { k: 'tbl', b: 4, n: 4096, m: 2 },
    { k: 'scuts', b: 4, n: 3, m: 16 },
    { k: 'seed', b: 1, n: 4, m: 4 },
    // includes { k: 'ints', b: 1, n: 12 },
    { k: 'remap', b: 1, n: 4, m: 4 },
]);

const { hashPrime, hashData } = findTablePrime();

begin('module');
{
    m.memory();

    const testdown = () => { // inline
        // param('$words', 'v128');
        // param('$prime', 'i32');
        // result('i32'); in $result
        // local('$result', 'i32');
        // local('$down', 'v128');
        // local('$hash', 'v128');
        i32.const(0); local.set('$result');
        begin('block', '$top');
        {
            // transpose $words so down becomes across
            local.get('$words');
            v128.const('i8x16',0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15);
            i8x16.swizzle();
            local.tee('$down');
            
            // SIMD hash
            local.get('$prime'); i32x4.splat();
            i32x4.mul();
            v128.const('i16x8', 0x7ff8, 0x7ff8, 0x7ff8, 0x7ff8, 0x7ff8, 0x7ff8, 0x7ff8, 0x7ff8);
            v128.and();
            local.tee('$hash');

            for (let i=0; i<4; ++i) {
                // get hash bucket for word i
                if (i) local.get('$hash');
                i16x8.extract_lane_s(2*i + 1); v128.load('align=8');
                local.get('$down');
                const iBytes = [4*i, 4*i+1, 4*i+2, 4*i+3];
                // compare word i with each item in this and next bucket
                local.get('$down'); i8x16.shuffle(...iBytes, ...iBytes, ...iBytes, ...iBytes);
                i32x4.eq(); v128.any_true(); i32.eqz();
                br_if('$top');
            }

            i32.const(1); local.set('$result');
        
            end();
        }

        local.get('$result');
    };

    // given inputs $h, $prime, and $seed
    // fills $words [seed+letters] for the current permutation if all words are matched
    // output $t != 0 if found, else 0.
    // uses scratch locals $s and $word
    const testacross = () => { // inline
        // param('$s', 'i32');
        // param('$prime', 'i32');
        // result('v128');
        // local('$words', 'v128');
        // local('$word', 'i32');
        // local('$u', 'i32');
        // local('$v', 'i32');

        local.get('$h'); local.set('$s');
        i32.const(0); local.set('$t');

        begin('block', '$subtop');
        {
          for (let i = 12; i --> 0;) {
            // iterating backwards D, C, B, A, ...
            const isD = !((i+1)%4);
            const isA = !(i%4);
            // word = A<<24 + B<<16 + C<<8 + D
            if (!isD) {
                i32.const(8); i32.shl();
            }
            local.get('$s'); i32.load('offset=128');
            if (!isD) i32.add();
            if (isA) { // handle completed word
                // set word aside for output; hash and load 2 buckets
                const reg = ['$word', '$u', '$v'][i / 4]
                local.tee(reg);
                // compare word with each item in buckets
                local.get('$prime'); i32.mul();
                i32.const(16); i32.shr_u();
                i32.const(0x7ff8); i32.and();
                v128.load('align=8');
                local.get(reg); i32x4.splat();
                i32x4.eq(); v128.any_true(); i32.eqz();
                // early return if not found
                br_if('$subtop');
            }
            if (i) {
                local.get('$s'); i32.load();
                local.set('$s');
            }
          }

          // fill output vector if successful
          local.get('$words');
          local.get('$word');
          i32x4.replace_lane(1);
          local.get('$u');
          i32x4.replace_lane(2);
          local.get('$v');
          i32x4.replace_lane(3);
          local.set('$words');
          i32.const(1); local.set('$t');

          end();
        }
    };

    begin('func', '$linklist');
    {
        local('$i', 'i32');
        for (let i=0; i<12; ++i) {
            m.scuts.at(1, 12 - i - 1);
            m.seed.at(1, i);
            i32.load8_s();
            local.tee('$i');
            i32.store();
            m.scuts.at(2, 12 - i - 1);
            m.remap.at(1); local.get('$i'); i32.add();
            i32.load8_s();
            i32.store();
            m.scuts.at(0, 12 - i - 1);
            if (i) {
                m.scuts.at(0, 12 - i);
            } else {
                i32.const(0);
            }
            i32.store();
        }
        end();
    }

    begin('func', '$solve');
    {
        w.export('"solve"');
        result('i32');
        local('$result', 'i32');
        local('$h', 'i32');
        local('$i', 'i32');
        local('$j', 'i32');
        local('$s', 'i32');
        local('$t', 'i32');
        local('$u', 'i32');
        local('$v', 'i32');
        local('$jn', 'i32');
        local('$prime', 'i32');
        local('$word', 'i32');
        local('$words', 'v128');
        local('$down', 'v128');
        local('$hash', 'v128');
        i32.const(0); local.set('$result');

        begin('block', '$top');
        {
            // Init
            m.seed.at(0); v128.load('align=4'); local.set('$words');
            i32.const('0x' + hashPrime.toString(16)); local.set('$prime');

            call('$linklist');
            m.scuts.at(0, 0); local.set('$h');
            m.scuts.at(0, 10); local.set('$i');
            m.scuts.at(0, 11); local.set('$j');

            // test initial permutation
            begin('block', '$cd');
            {
                begin('block', '$crossdown');
                {
                    testacross();
                    local.get('$t'); br_if('$crossdown');
                    br('$cd');

                    end();
                }
                testdown();
                br_if('$top');

                end();
            }
        
            i32.const(0);
            local.set('$jn');

            begin('loop', '$perms');
            {
                // visit permutations of the list
                
                begin('block', '$noJN'); // less likely
                {
                    begin('block', '$hasJN'); // more likely
                    {
                        // if (j.n)
                        local.get('$jn'); i32.eqz(); br_if('$hasJN');

                        begin('block', '$iSwap');
                        {
                            // s = i.v >= j.n.v ? j : i;
                            local.get('$i'); i32.load('offset=64');
                            local.get('$jn'); i32.load('offset=64');
                            i32.ge_s();
                            br_if('$iSwap');
    
                            local.get('$i'); local.set('$s');
                            br('$noJN');
                            end();
                        }

                        local.get('$j'); local.set('$s');
                        br('$noJN');
                        end();
                    }

                    // else s = i;
                    local.get('$i'); local.set('$s');

                    local.get('$j'); i32.load('offset=64');
                    local.get('$h'); i32.load('offset=64');
                    // else if (j.v >= h.v) break;
                    i32.lt_s(); br_if('$noJN');
                    br('$top');
                    end();
                }

                local.get('$s');
                // t = s.n;
                local.get('$s'); i32.load(); local.tee('$t');
                // s.n = t.n;
                i32.load(); i32.store();
                // t.n = h;
                local.get('$t'); local.get('$h'); i32.store();

                // if (t.v < h.v) i = t;
                local.get('$t'); i32.load('offset=64');
                local.get('$h'); i32.load('offset=64');
                i32.lt_s();
                raw(`(if (then local.get $t local.set $i))`);

                // j = i.n;
                local.get('$i'); i32.load(); local.tee('$j');
                i32.load(); local.set('$jn');
                // h = t;
                local.get('$t');
                local.set('$h');

                begin('block', '$cd2');
                {
                    begin('block', '$crossdown2');
                    {
                        testacross();
                        local.get('$t'); br_if('$crossdown2');
                        br('$cd2');

                        end();
                    }
                    testdown();
                    br_if('$top');

                    end();
                }

                br('$perms');
                end();
            }
            end();
        }

        // store final permutation in m.seed
        m.seed.at();
        local.get('$words');
        v128.store();
        local.get('$result');
        end();
    }

    raw(`(data (;0;) (i32.const 0) "${hashData}")`);
    end();
}
        

We'll need the WebAssembly Binary Toolkit (wabt) to build it, and a Makefile

w4hash.wasm: w4hash-generated.wat
	wat2wasm w4hash-generated.wat -o w4hash.wasm
w4hash-generated.wat: w4hash-generator.js
	node w4hash-generator.js >w4hash-generated.wat
        

It boils down to 36KiB of wasm: a few K of opcodes plus a 32KiB lookup table. We'll need some js to load the wasm, set up the input buffers, invoke it, and read out outputs. Note that the init code for hash tables and permutations isn't in the hot loop, so it can stay in js.

let w4hash = null;
let w4hashbuf = null;
let w4seed = null;
let w4across = null;
let w4remap = null;
WebAssembly.instantiateStreaming(fetch("./w4hash.wasm"), {}).then(mod => {
    w4hash = mod.instance.exports;
    w4hashbuf = mod.instance.exports.memory.buffer;
    const pSeed = (2*4096 + 3*16) << 2;
    w4seed = new Int8Array(w4hashbuf, pSeed, 32);
    w4across = new Int32Array(w4hashbuf, pSeed, 4);
    w4remap = new Int32Array(w4hashbuf, pSeed+16, 4);
}).catch(console.error);
        
function solveD(seed, letters) {
    const seedInts = seed.map(intFromLetter);
    const letterInts = letters.map(intFromLetter);
    const { permutation, remap } = ms2permutation(letterInts);

    for (let i=0; i<4; ++i) w4seed[i] = seedInts[i];
    for (let i=0; i<12; ++i) {
        w4seed[4+i] = permutation[i];
        w4seed[20+i] = remap[i];
    }
    if (w4hash.solve()) return [...w4seed.slice(4, 16)].map(letterFromInt);
}
        
console.log(solveD(seed, letters));
        
l,i,c,k,m,o,r,e,s,t,e,w
Finished in 0.447 seconds.
        

A new record! Now 386x faster (=== 172.554/0.447) than we started. And 4.6x faster than the same algorithm in js.

# Zig with simd

Maybe we can get the same results by compiling a more ergonomic language down to wasm? I looked into Rewriting in Rust™ but this algorithm is so simple it doesn't need an allocator, let alone a borrow checker. Zig turned out to be a better fit.

Thanks to comptime specialization, the implementations of MultiPermute and hashing could be disentangled while still allowing short-circuit evaluation. And, like the hand-generated wasm, we can discover a prime and build the table into the binary at comptime.

// MultiPermute.zig
const std = @import("std");
const testing = std.testing; 

pub const MaxN = 1024;
pub const MaxNsmall = 16;

// Specialized to keep results in a register, avoiding ram.
pub fn MultiPermuteSmall(comptime N: u32) type {
    return MultiPermuteSmallWithVisitor(N, struct {});
}

fn ms2permutation(symbols: []const u8, permu: []u8, remap: []u8, N: u16) void {
    // ms2multiplicities
    const capacity = symbols.len;
    var counters = [_]u8{0} ** 256;
    for (0..N) |i| {
        const e = symbols[capacity - i - 1];
        counters[e] += 1;
    }
    var mults = [_]u8{0} ** MaxN;
    var nMult: usize = 0;
    for (0..N) |i| {
        const e = symbols[capacity - i - 1];
        const n = counters[e];
        if (n == 0) continue;
        remap[nMult] = e;
        mults[nMult] = n;
        counters[e] = 0;
        nMult += 1;
    }
    // multiplicity2sorted:
    var j: usize = 0;
    for (0..N) |k| {
        var m = mults[k];
        while (m > 0) : (m -= 1) {
            permu[j] = @intCast(k);
            j += 1;
        }
    }
}

const MultiPermuteNode = struct { v: u32, r: u32, n: ?*MultiPermuteNode };

fn initMultiPermuteList(nodes: []MultiPermuteNode, permu: []u8, remap: []u8) [3]?*MultiPermuteNode {
    const N = nodes.len;
    for (0..N) |i| {
        nodes[i].v = permu[N - i - 1];
        const v: u32 = @bitCast(nodes[i].v);
        nodes[i].r = remap[v];
        nodes[i].n = if (i < N - 1) &nodes[i + 1] else null;
    }

    return [_]?*MultiPermuteNode{ &nodes[0], &nodes[N - 2], &nodes[N - 1] };
}

fn readMultiPermuteList(head: ?*MultiPermuteNode, symbols: []u8) bool {
    var h = head;
    if (h == null) return false;

    for (0..symbols.len) |j| {
        const i = symbols.len - j - 1;
        symbols[i] = @intCast(h.?.r);
        h = h.?.n;
    }

    return true;
}

/// Rewires the linked list [h, ..., i, j] for the next permutation, and returns the next [h, i, j]
// ([head, penultimate, ultimate]).
inline fn nextFrom(h: ?*MultiPermuteNode, i: ?*MultiPermuteNode, j: ?*MultiPermuteNode) [3]?*MultiPermuteNode {
    var s = i.?;

    // slower code uses CMOV/select:
    //if (j.n != null) {
    //    s = if (i.v >= j.n.?.v) j else i;
    if (j.?.n != null and i.?.v >= j.?.n.?.v) {
        @branchHint(.likely);
        s = j.?;
    } else if (j.?.n != null) {
        @branchHint(.unlikely);
        s = i.?;
    } else if (j.?.v < h.?.v) {
        @branchHint(.unlikely);
        s = i.?;
    } else {
        @branchHint(.cold);
        return [_]?*MultiPermuteNode{null} ** 3;
    }

    const t = s.n.?;
    s.n = t.n;
    t.n = h.?;
    var iOut = i.?;

    if (t.v < h.?.v) {
        @branchHint(.unlikely);
        iOut = t;
    }

    return [_]?*MultiPermuteNode{ t, iOut, iOut.n.? };
}

/// For applications where reading each permutation (traversing a linked list)
/// is a significant fraction of runtime, provide a Visitor that can matchQuad(fourBytes, index in 0..4) --
/// if not matched, list traversal is abandoned and we move on to the next permutation.
/// Visitor: { .skipBeforeReading() bool, .matchQuad(x, i) bool, .match(x) bool }
pub fn MultiPermuteSmallWithVisitor(comptime N: u32, comptime Visitor: type) type {
    if (N > MaxNsmall) @compileError("Max 16 symbols.");
    return struct {
        visitor: Visitor,
        nodes: [N]Node,
        permu: [N]u8,
        remap: [N]u8,
        N: u32 = N,
        h: ?*Node = null,
        i: ?*Node = null,
        j: ?*Node = null,
        input: @Vector(16, u8) = @splat(0),

        const Self = @This();
        const Node = MultiPermuteNode;

        pub inline fn initVisitor(visitor: Visitor) Self {
            return Self{ .visitor = visitor, .nodes = [_]Node{Node{ .v = 0, .r = 0, .n = null }} ** N, .permu = [_]u8{0} ** N, .remap = [_]u8{0} ** N };
        }

        pub inline fn init() Self {
            return initVisitor(.{});
        }

        pub inline fn reset(self: *Self) void {
            self.h = null;
        }

        /// Inputs symbols for permutation. The first (N-16) symbols will be untouched in the output;
        /// the remaining N symbols will be permuted.
        pub fn initSymbols(self: *Self, symbols: @Vector(16, u8)) void {
            self.input = symbols;

            var symbolArr = [_]u8{0} ** 16;
            symbolArr[0..].* = symbols;
            ms2permutation(&symbolArr, &self.permu, &self.remap, N);

            self.reset();
        }

        /// Initializes the linked list with input symbols and initial traversal order.
        pub fn initNodes(self: *Self) void {
            if (self.permu[N - 1] == 0)
                return self.initSymbols(self.input);

            self.h, self.i, self.j = initMultiPermuteList(&self.nodes, &self.permu, &self.remap);
        }

        /// Moves to the next permutation.
        pub inline fn moveNext(self: *Self) bool {
            self.h, self.i, self.j = nextFrom(self.h, self.i, self.j);
            if (self.h == null) {
                self.reset();
                return false;
            }
            return true;
        }

        /// Moves to the first or next permutation (if any) and reads it out.
        pub inline fn nextSeq(self: *Self) ?@Vector(16, u8) {
            if (self.h == null) {
                self.initNodes();
            } else if (!moveNext(self)) {
                return null;
            }

            return readSymbols(self, self.h, self.input);
        }

        /// Reads out the current permutation, or null if there are no more.
        inline fn readSymbols(self: *Self, head: ?*Node, input: @Vector(16, u8)) ?@Vector(16, u8) {
            var h = head;
            if (h == null) return null;

            const canMatchQuad = comptime std.meta.hasMethod(Visitor, "matchQuad");
            const Nquadded = 4 * (N / 4);

            const canSkip = comptime std.meta.hasMethod(Visitor, "skipBeforeReading");
            if (canSkip and self.visitor.skipBeforeReading())
                return input;

            var x: u32 = 0;

            if (N == Nquadded) {
                var quads: @Vector(4, u32) = @bitCast(input);

                inline for (0..N) |j| {
                    const r: u32 = h.?.r;
                    if (j < (N - 1))
                        h = h.?.n;

                    // accumulate up to 4 bytes in x
                    x += r;
                    const i = Nquadded - j - 1;
                    if ((i % 4) != 0) {
                        x = x << 8;
                    } else if (!self.visitor.matchQuad(x, 3 - j / 4)) {
                        return null;
                    } else {
                        quads[3 - j / 4] = x;
                        x = 0;
                    }
                }

                return @bitCast(quads);
            } else {
                var symbols: @Vector(16, u8) = input;

                inline for (0..N) |j| {
                    const r: u32 = h.?.r;
                    symbols[MaxNsmall - j - 1] = @intCast(r);
                    if (j < (N - 1))
                        h = h.?.n;

                    // accumulate up to 4 bytes in x
                    if (canMatchQuad and j < Nquadded) {
                        x += r;
                        const i = Nquadded - j - 1;
                        if ((i % 4) != 0) {
                            x = x << 8;
                        } else if (!self.visitor.matchQuad(x, 3 - j / 4)) {
                            return null;
                        } else if (i != 0) {
                            x = 0;
                        }
                    }
                }

                return symbols;
            }
        }

        /// Iterates through permutations, returning the first one for which `Visitor.match(seq)` is true.
        pub fn nextMatch(self: *Self) ?@Vector(16, u8) {
            if (comptime !std.meta.hasMethod(Visitor, "match")) {
                @compileError("Visitor.match is not defined.");
            }

            // stuff that's faster if it's in a register
            const input: @Vector(16, u8) = self.input;

            if (self.h == null) {
                @call(.never_inline, initNodes, .{self});
                if (self.readSymbols(self.h, input)) |puz| {
                    if (self.visitor.match(puz)) {
                        return puz;
                    }
                }
            }

            var h = self.h;
            var i = self.i;
            var j = self.j;

            while (true) {
                h, i, j = nextFrom(h, i, j);
                if (h == null) {
                    self.reset();
                    return null;
                }

                if (self.readSymbols(h, input)) |puz| {
                    if (self.visitor.match(puz)) {
                        self.h = h;
                        self.i = i;
                        self.j = j;
                        return puz;
                    }
                }
            }
        }
    };
}
        
// z4hash.zig
const std = @import("std");

const MultiPermute = @import("MultiPermute");

const zero128 = @Vector(4, u32){ 0, 0, 0, 0 };
const nHash = 4096;
const hashMask = (nHash - 1) << 1;
const aVal = 97;
const shift = 16;

const WordKey = struct {
    key: u32,
    offset: u32
};

fn compareWordKey(context: void, a: WordKey, b: WordKey) bool {
    _ = context;
    return a.offset < b.offset;
}

fn findPrime(comptime wordLetters: anytype, comptime S: usize) u32 {
    if (wordLetters.len == 0)
        return 0;

    @setEvalBranchQuota(10000000);

    const seed: u64 = 0;
    var prng = std.Random.DefaultPrng.init(seed);
    const rand = prng.random();

    var tbl align(8) = [_]u32{0} ** (2 * nHash);
    var p: u32 = 0;

    nextprime: while(true) {
        @memset(tbl[0..2*nHash], 0);
        
        getprime: while(true) {
            p = rand.int(u32);
            if ((p%2) == 0 or (p%3) == 0) continue :getprime;
            var i = 5;
            while (i*i <= p) : (i += 6) {
                if ((p%i) == 0 or (p % (i+2)) == 0) continue :getprime;
            }
            break :getprime;
        }
        
        const n = wordLetters.len / S;
        const word_keys = init: {
            var wks: [n]WordKey = undefined;
            for (&wks, 0..) |*wk, i| {
                if (S > 4 and words[S + 4] == '9') {
                    wk.* = WordKey{ .key = 0, .offset = 0 };
                    continue;
                } else {
                    const a: u32 = words[S * i + 0] - aVal;
                    const b: u32 = words[S * i + 1] - aVal;
                    const c: u32 = words[S * i + 2] - aVal;
                    const d: u32 = words[S * i + 3] - aVal;
                    const x: u32 = (d << 24) + (c << 16) + (b << 8) + a;
                    const h: u32 = @intCast(((p *% x) >> 16) & hashMask);
                    wk.* = WordKey{ .key = x, .offset = h };
                }
            }
            std.mem.sort(WordKey, &wks, {}, compareWordKey);
            break :init wks;
        };

        nextword: for (word_keys) |wk| {
            for (0..4) |j| {
                if (tbl[wk.offset + j] == 0) {
                    tbl[wk.offset + j] = wk.key;
                    continue :nextword;
                }
            }
            continue :nextprime;
        }

        break :nextprime;
    }

    return p;
}

const words = @embedFile("./words4.txt");
const hashPrime = findPrime(words, 4);

// Lookup table for English 4-letter words; stores ~2.5k words in ~32KiB,
// all words fit within either the 2 slots for that hash value, or in the following 2 slots,
// for a single SIMD lookup.

pub fn StaticWordHash() type {
    return struct {
        tbl: [*]const u32 = &emptyTbl,

        const Self = @This();
        const emptyTbl = [0]u32{};
        const emptyTbl8 = [0]u8{};

        fn hashKey(a: u32, b: u32, c: u32, d: u32) u32 {
            return (d << 24) + (c << 16) + (b << 8) + a;
        }

        fn hashVal(x: u32) u32 {
            return @intCast(((hashPrime *% x) >> shift) & hashMask);
        }

        pub inline fn initComptime(comptime wordLetters: anytype, comptime S: usize) Self {
            comptime {
                var self = Self{};
                if (wordLetters.len == 0)
                    return self;

                @setEvalBranchQuota(1000000);

                var tbl align(8) = [_]u32{0} ** (2 * nHash);

                const n = wordLetters.len / S;
                const word_keys = init: {
                    var wks: [n]WordKey = undefined;
                    for (&wks, 0..) |*wk, i| {
                        if (S > 4 and words[S + 4] == '9') {
                            wk.* = WordKey{ .key = 0, .offset = 0 };
                            continue;
                        } else {
                            const a = words[S * i + 0] - aVal;
                            const b = words[S * i + 1] - aVal;
                            const c = words[S * i + 2] - aVal;
                            const d = words[S * i + 3] - aVal;
                            const x = hashKey(a, b, c, d);
                            const h = hashVal(x);
                            wk.* = WordKey{ .key = x, .offset = h };
                        }
                    }
                    std.mem.sort(WordKey, &wks, {}, compareWordKey);
                    break :init wks;
                };

                nextword: for (word_keys) |wk| {
                    for (0..4) |j| {
                        if (tbl[wk.offset + j] == 0) {
                            tbl[wk.offset + j] = wk.key;
                            continue :nextword;
                        }
                    }
                    @compileLog("Excessive hash collisions", wk.key);
                }

                const finalTbl = tbl;
                self.tbl = &finalTbl;
                
                return self;
            }
        }
                
        pub inline fn randomWord(self: *const Self) u32 {
            const rand = std.crypto.random;
            
            while (true) {
                const c = rand.int(u32) & ((2 * nHash) - 1);
                const w = self.tbl[c];
                if (w != 0) return w;
            }
        }

        pub inline fn isWord(self: *const Self, x: u32) bool {
            const h = hashVal(x);
            const vx: @Vector(4, u32) = @splat(x);
            const vh: @Vector(4, u32) = self.tbl[h..][0..4].*;
            return @reduce(.Or, vx == vh);
        }

        pub inline fn testAcrossDown(self: *const Self, puz: @Vector(4, u32)) bool {
            return self.isWord(puz[1]) and self.isWord(puz[2]) and self.isWord(puz[3]) and self.testDown(puz);
        }

        pub fn testDown(self: *const Self, puz: @Vector(16, u8)) bool {
            const mask = @Vector(16, u8){ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
            const transposed = @shuffle(u8, puz, undefined, mask);
            const down: @Vector(4, u32) = @bitCast(transposed);

            const hh = down *% @as(@Vector(4, u32), @splat(hashPrime));
            var hShort: @Vector(8, u16) = @bitCast(hh);
            hShort &= @as(@Vector(8, u16), @splat(hashMask));

            for (0..4) |i| {
                const x = down[i];
                const h = hShort[2*i + 1];
                const vx: @Vector(4, u32) = @splat(x);
                const vh: @Vector(4, u32) = self.tbl[h..][0..4].*;
                if (!@reduce(.Or, vx == vh))
                   return false;
            }
            
            return true;
        }

        // interface Visitor:

        pub inline fn matchQuad(self: *const Self, x: u32, comptime i: usize) bool {
            _ = i;
            return self.isWord(x);
        }

        pub inline fn match(self: *const Self, x: @Vector(16, u8)) bool {
            return self.testDown(x);
        }
    };
}

const Hashing = StaticWordHash();
pub const hashing: Hashing = .initComptime(words, 7);

var mp: MultiPermute.MultiPermuteSmallWithVisitor(12, Hashing) = .initVisitor(hashing);

// exports for WASM:

var letterIO = [_]u8{0} ** (MultiPermute.MaxNsmall);
var wordIO: [*]u32 = @alignCast(@ptrCast((&letterIO)[0..MultiPermute.MaxNsmall]));

export fn letteraddr() usize {
    return @intFromPtr(&letterIO);
}

export fn solve() bool {
    mp.initSymbols(letterIO[0..].*);
    return nextSolution();
}

export fn nextSolution() bool {
    if (mp.nextMatch()) |puz| {
        letterIO[0..].* = puz;
        return true;
    }
    return false;
}
        
// build.zig
const std = @import("std");
const Target = @import("std").Target;
const Feature = @import("std").Target.Cpu.Feature;

pub fn build(b: *std.Build) void {
    const target = b.standardTargetOptions(.{});
    const optimize = b.standardOptimizeOption(.{});

    const package = b.dependency("MultiPermute", .{
        .target = target,
        .optimize = optimize,
    });
    const module = package.module("MultiPermute");

    var wasmFeatures = Feature.Set.empty;
    const features = Target.wasm.Feature;
    wasmFeatures.addFeature(@intFromEnum(features.bulk_memory));
    wasmFeatures.addFeature(@intFromEnum(features.extended_const));
    wasmFeatures.addFeature(@intFromEnum(features.multimemory));
    wasmFeatures.addFeature(@intFromEnum(features.multivalue));
    wasmFeatures.addFeature(@intFromEnum(features.mutable_globals));
    wasmFeatures.addFeature(@intFromEnum(features.reference_types));
    wasmFeatures.addFeature(@intFromEnum(features.relaxed_simd));
    wasmFeatures.addFeature(@intFromEnum(features.sign_ext));
    wasmFeatures.addFeature(@intFromEnum(features.simd128));
    wasmFeatures.addFeature(@intFromEnum(features.tail_call));

    const wasm = b.addExecutable(.{ .name = "z4hash", .root_source_file = b.path("z4hash.zig"), .target = b.resolveTargetQuery(.{ .cpu_arch = .wasm32, .os_tag = .freestanding, .cpu_features_add = wasmFeatures }), .optimize = .ReleaseFast, .strip = true });
    wasm.entry = .disabled;
    wasm.export_memory = true;
    wasm.rdynamic = true;
    wasm.stack_size = 16384;
    wasm.root_module.addImport("MultiPermute", module);

    b.installArtifact(wasm);
}
        
let z4hash = null;
let z4seed = null;
let z4across = null;
let z4remap = null;
WebAssembly.instantiateStreaming(fetch("./z4hash.wasm"), {}).then(mod => {
    z4hash = mod.instance.exports;
    const z4hashbuf = mod.instance.exports.memory.buffer;
    const pSeed = z4hash.letteraddr();
    z4seed = new Int8Array(z4hashbuf, pSeed, 16);
    z4across = new Int32Array(z4hashbuf, pSeed, 4);
}).catch(console.error);
        
function solveZig(seed, letters) {
    const seedInts = seed.map(intFromLetter);
    const letterInts = letters.map(intFromLetter).reverse();
    for (let i=0; i<4; ++i) z4seed[i] = seedInts[i];
    for (let i=0; i<12; ++i) z4seed[4+i] = letterInts[i];
    if (z4across[0] !== hashing.hashKey(...seedInts)) throw new Error('Big-endian CPU detected, and this WASM code relies on little-endian conventions.')
    const solved = z4hash.solve();
    if (solved) return [...z4seed.slice(4, 16)].map(letterFromInt);
}
        
console.log(solveZig(seed, letters));
        
l,i,c,k,m,o,r,e,s,t,e,w
Finished in 0.400 seconds.
        

# WebGL2 Transform Feedback

Multipermute isn't embarassingly parallel, but we could test any number of permutations in parallel if we fall back to unrank1. Will the GPU be any faster? I found some instructions and utility code for Transform Feedback, and wrote a shader which tests a batch of permutations for each worker/invocation and writes the matching permutation number (if any) to the output.

The trouble with GPUs is that we have to transfer the results from GPU memory and read through them sequentially to find a nonzero output. We want a workgroup/transfer size that's big enough to use the GPU effectively, but small enough to transfer results quickly. And if the answer is found early, we should avoid waiting for every permutation. Batch and transfer sizes were tuned empirically.

/*
from https://github.com/gfxfundamentals/webgl2-fundamentals

# Copyright 2021, GFXFundamentals.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#     * Neither the name of GFXFundamentals. nor the names of his
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

function createDataTexture(gl, data, internalFormat, format, type, tex) {
    let numElements = data.length;

    // next highest square power-of-two
    const side = Math.sqrt(numElements);
    const p2side = 2**Math.ceil(Math.log(side) / Math.log(2));
    numElements = p2side**2;
    
    // compute a size that will hold all of our data
    const width = 4*Math.ceil(Math.sqrt(numElements)/4);
    const height = Math.ceil(numElements / width);

    if (data.length < (width * height)) {
        const srcData = data;
        data = new (data.constructor)(width * height);
        data.set(srcData);
    }
    
    tex ??= gl.createTexture();
    gl.bindTexture(gl.TEXTURE_2D, tex);
    gl.texImage2D(
        gl.TEXTURE_2D,
        0,        // mip level
        internalFormat,
        width,
        height,
        0,        // border
        format,
        type,
        data,
    );
    gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
    gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST);
    gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
    gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
    return {tex, dimensions: { width, height }};
}
    
function createShader(gl, type, src) {
    const shader = gl.createShader(type);
    gl.shaderSource(shader, src);
    gl.compileShader(shader);
    if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) {
        throw new Error(gl.getShaderInfoLog(shader));
    }
    return shader;
}
    
function makeBuffer(gl, sizeOrData, usage=gl.STATIC_DRAW) {
    const buf = gl.createBuffer();
    gl.bindBuffer(gl.ARRAY_BUFFER, buf);
    gl.bufferData(gl.ARRAY_BUFFER, sizeOrData, usage);
    return buf;
}
    
function makeTransformFeedback(gl, r) {
    const tf = gl.createTransformFeedback();
    gl.bindTransformFeedback(gl.TRANSFORM_FEEDBACK, tf);
    const foundBuffer = makeBuffer(gl, r * 4, gl.DYNAMIC_READ);
    gl.bindBufferBase(gl.TRANSFORM_FEEDBACK_BUFFER, 0, foundBuffer);
    gl.bindTransformFeedback(gl.TRANSFORM_FEEDBACK, null);
    gl.bindBuffer(gl.ARRAY_BUFFER, null);
    return [tf, foundBuffer];
}

let minimalFragmentSource = `#version 300 es
precision highp float;
void main() {}
`;
        
let vertexSource = `#version 300 es
precision highp float;
uniform highp isampler2D hashes;
uniform int k, k0, k1;
uniform ivec4 p0, p1, p2, p3;
flat out int found;

int isNonWord(int a, int b, int c, int d, ivec2 hashDim) {
    int k = a + b*(1<<8) + c*(1<<16) + d*(1<<24);
    int h = ((0x${hashing.p.toString(16)} * k) >> 16) & 0x1ffe;
    int y = h / hashDim.x;
    int x = h % hashDim.x;
    if (
        k == texelFetch(hashes, ivec2(x, y), 0).r ||
        k == texelFetch(hashes, ivec2(x+1, y), 0).r ||
        k == texelFetch(hashes, ivec2(x+2, y), 0).r ||
        k == texelFetch(hashes, ivec2(x+3, y), 0).r
    ) return 0;
    return 1;
 }

void main() {
   int p[12];
   int r = gl_VertexID * k;
   ivec2 hashDim = textureSize(hashes, 0);
   found = 0;

   for (int i=k0; i<k1; ++i) {
     p[0] = p1[0]; p[1] = p1[1]; p[2]  = p1[2]; p[3]  = p1[3];
     p[4] = p2[0]; p[5] = p2[1]; p[6]  = p2[2]; p[7]  = p2[3];
     p[8] = p3[0]; p[9] = p3[1]; p[10] = p3[2]; p[11] = p3[3];

     int rr = r + i;
     for (int n=12; n>0; --n) {
        int x = p[n - 1]; // swap with p[rr % n]
        int j = rr % n;
        p[n - 1] = p[j];
        p[j] = x;
        rr = rr / n;
     }

     if (
        0 == isNonWord(p[0], p[1], p[2], p[3], hashDim) &&
        0 == isNonWord(p[4], p[5], p[6], p[7], hashDim) &&
        0 == isNonWord(p[8], p[9], p[10], p[11], hashDim) &&
        0 == isNonWord(p0[0], p[0], p[4], p[8], hashDim) &&
        0 == isNonWord(p0[1], p[1], p[5], p[9], hashDim) &&
        0 == isNonWord(p0[2], p[2], p[6], p[10], hashDim) &&
        0 == isNonWord(p0[3], p[3], p[7], p[11], hashDim)
    ) {
       found = r + i + 1;
       break;
     }
  }
 }
`;

let rFull = letters.length;
// r = min(7, n)! * 4
let r = Math.min(7, letters.length);
for (let i = rFull; i --> 1; rFull *= i) if (i < r) r *= i;
r *= 4;
let k = rFull / r;

const glcanvas = new OffscreenCanvas(256, 256);
const gl = glcanvas.getContext('webgl2');

let hashesTex = null;
let gpuResults = null;

function updateGLHashing(hashing) {
    const { tex, dimensions } =
        createDataTexture(gl, hashing.table, gl.R32I, gl.RED_INTEGER, gl.INT, hashesTex);
    hashesTex = tex;
    gpuResults = new Int32Array(dimensions.width * dimensions.height);
}
updateGLHashing(hashing);

const vShader = createShader(gl, gl.VERTEX_SHADER, vertexSource);
const fShader = createShader(gl, gl.FRAGMENT_SHADER, minimalFragmentSource);

const program = gl.createProgram();
gl.attachShader(program, vShader);
gl.attachShader(program, fShader);
gl.transformFeedbackVaryings(
    program,
    ['found'],
    gl.SEPARATE_ATTRIBS,
);
gl.linkProgram(program);
if (!gl.getProgramParameter(program, gl.LINK_STATUS)) {
    throw new Error(gl.getProgramParameter(program));
}

const [tf, foundBuffer] = makeTransformFeedback(gl, r);
gl.enable(gl.RASTERIZER_DISCARD);

const locs = {
    hashes: gl.getUniformLocation(program, 'hashes'),
    k: gl.getUniformLocation(program, 'k'),
    k0: gl.getUniformLocation(program, 'k0'),
    k1: gl.getUniformLocation(program, 'k1'),
    p0: gl.getUniformLocation(program, 'p0'),
    p1: gl.getUniformLocation(program, 'p1'),
    p2: gl.getUniformLocation(program, 'p2'),
    p3: gl.getUniformLocation(program, 'p3'),
};

gl.useProgram(program);
gl.uniform1i(locs.hashes, hashesTex);
gl.uniform1i(locs.k, k);

function solveGPU(seed, letters) {
    const seedInts = seed.map(intFromLetter);
    const letterInts = letters.map(intFromLetter);

    gl.useProgram(program);
    gl.uniform4i(locs.p0, seedInts[0], seedInts[1], seedInts[2], seedInts[3]);
    gl.uniform4i(locs.p1, letterInts[0], letterInts[1], letterInts[2], letterInts[3]);
    gl.uniform4i(locs.p2, letterInts[4], letterInts[5], letterInts[6], letterInts[7]);
    gl.uniform4i(locs.p3, letterInts[8], letterInts[9], letterInts[10], letterInts[11]);
 
    const batch = 1536;
    for (let k0 = 0; k0 < k; k0 += batch) {
        gl.uniform1i(locs.k0, k0);
        gl.uniform1i(locs.k1, Math.min(k, k0 + batch));
    
        gl.bindTransformFeedback(gl.TRANSFORM_FEEDBACK, tf);
        gl.beginTransformFeedback(gl.POINTS);
        gl.drawArrays(gl.POINTS, 0, r);
        gl.endTransformFeedback();
        gl.bindTransformFeedback(gl.TRANSFORM_FEEDBACK, null);

        /*
        const fence = gl.fenceSync(gl.SYNC_GPU_COMMANDS_COMPLETE, 0);
        gl.flush();

        const status = gl.clientWaitSync(fence, 0, 0);
        if (status === gl.CONDITION_SATISFIED || status === gl.ALREADY_SIGNALED) {
            gl.deleteSync(fence);
        */

        gl.bindBuffer(gl.ARRAY_BUFFER, foundBuffer);
        gl.getBufferSubData(gl.ARRAY_BUFFER, 0, gpuResults);
        gl.bindBuffer(gl.ARRAY_BUFFER, null);

        for (let i=r; i-->0;) {
            if (gpuResults[i]) return unrank1(letters.length, gpuResults[i] - 1, letters.slice());
        }
    }
}
        
console.log(solveGPU(seed, letters));
        
l,i,c,k,m,o,r,e,s,t,e,w
Finished in 0.166 seconds.
        

# Random puzzle

How about generating a random puzzle? With four copies of the word list, we can shuffle each, then try combinations in order (across) until there are four valid words down.

function shuffle(arr) {
    let pos = arr.length;
    while (pos != 0) {
        const randIx = Math.floor(Math.random() * pos--);
        [ arr[pos], arr[randIx] ] = [ arr[randIx], arr[pos] ];
    }
    return arr;
}
function shuffledInts(words) {
    return shuffle([...words].map(w => [...w].map(intFromLetter)));
}

function randomPuzzle() {
    const wl = [0, 1, 2, 3].map(() => shuffledInts(words));
    for (const a of wl[0]) {
        for (const b of wl[1]) {
            for (const c of wl[2]) {
                for (const d of wl[3]) {
                    if (!a.every((a, i) => isWord(a, b[i], c[i], d[i]))) continue;
                    return [a, b, c, d].map(l => l.map(letterFromInt).join(''));
                }
            }
        }
    }
    return null;
}
const [seed, ...ltrWords] = randomPuzzle();
console.log(seed, ltrWords);
const letters = ltrWords.flatMap(s => [...s]).sort().join('');
console.log(seed, letters);
        
loot,edge,fora,trek
loot,adeeefgkorrt
Finished in 10.356 seconds.
        

But it's pretty slow. Before we speed it up, let's average out ten runs for a better baseline.

function randomPuzzle() {
    const wl = [0, 1, 2, 3].map(() => shuffledInts(words));
    for (const a of wl[0]) {
        for (const b of wl[1]) {
            for (const c of wl[2]) {
                for (const d of wl[3]) {
                    if (!a.every((a, i) => isWord(a, b[i], c[i], d[i]))) continue;
                    return [a, b, c, d].map(l => l.map(letterFromInt).join(''));
                }
            }
        }
    }
    return null;
}

for (let iter=10; iter --> 0;) {
    const [seed, ...ltrWords] = randomPuzzle();
    const letters = ltrWords.flatMap(s => [...s]).sort().join('');
    console.log(seed, letters);
}
        
hurt,addeeknoosss
drab,bbeeeillorst
cops,aaeeklmnrswy
sink,iilnnoopsstt
sews,aaeeglmnrtuv
beep,eeeeeelprsst
rely,aaaddeeginrs
host,aabcekoortuz
pelf,aacdhiinostw
joes,beelnooosstw
Finished in 83.901 seconds.        
        

# Prefix tables

If we add tables for partial words, we can check if any of the two- and three-letter prefixes are impossible, and skip the iteration over c and d.

let masks1 = new Int32Array(26);
for (const word of words) {
    const [a, b] = [...word].map(intFromLetter);
    masks1[a] |= (1 << b);
}
function isPrefix2(a, b) {
    return !!(masks1[a] & (1 << b));
}

let masks2 = new Int32Array(26*32);
for (const word of words) {
    const [a, b, c] = [...word].map(intFromLetter);
    masks2[(a<<5) + b] |= (1 << c);
}
function isPrefix3(a, b, c) {
    return !!(masks2[(a<<5) + b] & (1 << c));
}

function randomPuzzleFast() {
    const wl = [0, 1, 2, 3].map(() => shuffledInts(words));
    for (const a of wl[0]) {
        for (const b of wl[1]) {
            if (!a.every((a, i) => isPrefix2(a, b[i]))) continue;
            for (const c of wl[2]) {
                if (!a.every((a, i) => isPrefix3(a, b[i], c[i]))) continue;
                for (const d of wl[3]) {
                    if (!a.every((a, i) => isWord(a, b[i], c[i], d[i]))) continue;
                    return [a, b, c, d].map(l => l.map(letterFromInt).join(''));
                }
            }
        }
    }
    return null;
}
        
for (let iter=10; iter --> 0;) {
    const [seed, ...ltrWords] = randomPuzzleFast();
    const letters = ltrWords.flatMap(s => [...s]).sort().join('');
    console.log(seed, ltrWords, letters);
}
        
bock,ague,ally,sets,aaeegllsstuy
anus,nave,omen,neat,aaeeemnnnotv
ribs,unit,stay,hose,aehinossttuy
sect,agar,kora,espy,aaaegkoprrsy
heft,ayes,leek,orts,aeeeklorssty
oval,dale,ires,cyst,acdeeilrssty
acai,cart,alee,imam,aaaceeilmmrt
moas,arum,taro,slag,aaaglmorrstu
boll,olio,year,sore,aeeilooorrsy
lady,acai,camp,yips,aaacciimppsy
Finished in 0.054 seconds.
        

Much better! A 1500x improvement, without touching WebAssembly!

# Next Steps

There are 12,478 5-letter words in SOWPODS, and 2,315 5-letter solutions to a famous guessing game. We could extend the code above to work with 5x5 grids of those words, and go beyond to 6 or even 7 letters.

At larger sizes, some techniques will start running out of bits (and ram). Even a hash table will require more sophisticated multi-word hashing. If Set<String>-based tests are competitive for longer prefix lengths, we might want a multi-level strategy using simple tables for short prefixes, hash tables for medium length, and Set<String> for anything longer.

# Strive for Five

Needed githack.com to defeat CORS:

let words5 = [];
fetch('https://gistcdn.githack.com/cfreshman/a03ef2cba789d8cf00c08f767e0fad7b/raw/45c977427419a1e0edee8fd395af1e0a4966273b/wordle-answers-alphabetical.txt')
    .then(r => r.text())
    .then(t => {
        words5 = t.split('\n');
        for (const word of words5) {
            const [a, b, c, d, e] = [...word].map(intFromLetter);
            masks15[a] |= (1 << b);
            masks25[(a<<5) + b] |= (1 << c);
            masks35[(a<<10) + (b<<5) + c] |= (1 << d);
        }
    })
    .catch(console.error);

let masks15 = new Int32Array(26);
let masks25 = new Int32Array(26*32);
let masks35 = new Int32Array(26*32**2);
function isPrefix25(a, b) {
    return !!(masks15[a] & (1 << b));
}
function isPrefix35(a, b, c) {
    return !!(masks25[(a<<5) + b] & (1 << c));
}
function isPrefix45(a, b, c, d) {
    return !!(masks35[(a<<10) + (b<<5) + c] & (1 << d));
}

function buildHashTable5(hashKey, hashVal) {
    const hash = new Int32Array(8192*4);
    for (const word of words5) {
        const [a, b, c, d, e] = [...word].map(intFromLetter);
        const x = hashKey(a, b, c, d, e);
        const h = hashVal(x);
        for (let i=0;i<4;++i) {
            if (i === 3) return null;
            if (!hash[h+i]) {
                hash[h+i] = x;
                break;
            }
        }
    }
    return hash;
}
function setupHashing5(p=0x8815379) {
    function hashKey(a, b, c, d, e) {
        return (e<<20) + (d<<15) + (c<<10) + (b<<5) + a;
    }
    function hashVal(x) {
        return (((p * x) & 0x1ffffffe) >> 16) << 2;
    }
    const table = buildHashTable5(hashKey, hashVal);
    if (!table) return null;

    function isWord(a, b, c, d, e=26) {
        const x = hashKey(a, b, c, d, e);
        const h = hashVal(x);
        for (let i=0;i<4;++i) {
            if (table[h+i] === x) return true;
        }
        return false;
    }

    for (const word of words5) {
        const [a, b, c, d, e] = [...word].map(intFromLetter);
        if (!isWord(a, b, c, d, e)) return null;
    }
    
    return { p, table, hashKey, hashVal, isWord };
}

let hashing5 = null;
let prime5 = null; // 0x58e8c8c;
            
while (!prime5) {
    const p = 0x1000000 + (0xeffffff * Math.random())|0;
    if (!isPrime(p)) continue;

    hashing5 = setupHashing5(p);
    if (!hashing5) continue;

    prime5 = p;
    console.log('0x' + p.toString(16));
}
function randomPuzzle5() {
    const wl = [0, 1, 2, 3, 4].map(() => shuffledInts(words5));
    for (const a of wl[0]) {
        for (const b of wl[1]) {
            if (!a.every((a, i) => isPrefix25(a, b[i]))) continue;
            for (const c of wl[2]) {
                if (!a.every((a, i) => isPrefix35(a, b[i], c[i]))) continue;
                for (const d of wl[3]) {
                    if (!a.every((a, i) => isPrefix45(a, b[i], c[i], d[i]))) continue;
                    for (const e of wl[4]) {
                        if (!a.every((a, i) => hashing5.isWord(a, b[i], c[i], d[i], e[i]))) continue;
                        return [a, b, c, d, e].map(l => l.map(letterFromInt).join(''));
                    }
                }
            }
        }
    }
    return null;
}
const [seed, ...ltrWords] = randomPuzzle5();
const letters = ltrWords.flatMap(s => [...s]).sort().join('');
console.log(seed, letters);
for (const w of ltrWords) console.log(w);
        
0x827c545
older,aaaaaddeeeelmmnrrrvv
larva
dream
evade
ramen
Finished in 0.247 seconds.
        
function mp_visit5(h, l, p) {
    for (let i = l - 1; i >= 0; i--) {
        p[i] = h.r;
        h = h.n;
        const fullWord = (i % 5) === 0;
        if (fullWord && !hashing5.isWord(p[i], p[i+1], p[i+2], p[i+3], p[i+4])) return null;
    }
    return p;
}

// p must be sorted in ascending order
function* mp_gen5(p, remap, cycle) {
    let nperm = 0;
    const l = p.length;
    const pOut = p.slice();
    // If the input is empty, exit.
    // There is only one permutation of the empty set.
    if (l === 0) {
        yield [];
        return;
    }
    // Init
    let h = { v: p[0], r: remap[p[0]], n: null };
    let i = h; // penultimate node
    let j = h; // final node
    if (l > 1)
        h = i = { v: p[1], r: remap[p[1]], n: h };
    for (let k = 2; k < l; ++k) {
        h = { v: p[k], r: remap[p[k]], n: h };
    }
    for (;;) {
        // Visit permutations of the list
        const x = mp_visit5(h, l, pOut);
        if (x) yield x;
        let s;
        for (;;) {
            if (j.n)
                s = i.v >= j.n.v ? j : i;
            else if (j.v >= h.v)
                break;
            else
                s = i;
            const t = s.n;
            s.n = t.n;
            t.n = h;
            if (t.v < h.v) {
                i = t;
            }
            j = i.n;
            h = t;
            ++nperm;
            if (!(nperm % 10000000000)) console.log(nperm);
            const x = mp_visit5(h, l, pOut);
            if (x) yield x;
        }
        if (cycle) {
            // Reset
            // This is different from initialization
            // because we don't need to allocate new
            // list nodes.
            i = h; // penultimate node
            j = h; // final node
            for (let k = l - 1; k > 0; k--) {
                j.v = p[k];
                j.r = remap[p[k]];
                i = j;
                j = j.n;
            }
            j.v = p[0];
            j.r = remap[p[0]];
        }
        else
            return;
    }
}

function multipermute5(multiset, opts = {}) {
    let cb;
    let eq;
    let cycle = false;
    if (typeof opts === 'function')
        cb = opts;
    else
        ({ cb, eq, cycle = false } = opts);
    const { permutation, remap } = ms2permutation(multiset, eq);
    if (typeof cb !== 'function')
        return mp_gen5(permutation, remap, cycle);
    for (const p of mp_gen5(permutation, remap, cycle))
        cb(p);
    return;
}

function solve5(seed, letters) {
    const seedInts = seed.map(intFromLetter);
    const letterInts = letters.map(intFromLetter);

    function isSolutionDown5(ints) {
        for (let i=0; i<5; ++i) {
            if (!hashing5.isWord(seedInts[i], ints[i], ints[5+i], ints[10+i], ints[15+i]))
                return false;
        }
        return true;
    }

    for (const permutation of multipermute5(letterInts)) {
        if (isSolutionDown5(permutation)) return permutation.map(letterFromInt);
    }
}

        
hashing5 = setupHashing5();
console.log(solve5([...'older'], [...'aaaaaddeeeelmmnrrrvv']));
        
[Cancelled after 12 hours and 1.27 trillion unsuccessful permutations]
        

# Conclusions

Henry Lytton as the Major-General (1919)
"I answer hard acrostics, I've a pretty taste for paradox" – The Modern Major General

[1] Run Times

laptop c.2011 laptop c.2017 laptop c.2017 (FF) android c.2018 android c.2021 ipad c.2022 laptop c.2024 this device
naive 248.275 172.554 666.413 470.668 185.993 121.287 100.739
unpack1 86.949 61.197 107.729 170.558 57.523 41.895 37.809
multipermute 16.996 12.157 21.376 34.385 10.924 6.193 6.522
Integers 8.293 5.619 15.100 14.982 5.599 2.234 3.349
Short-circuit 2.373 1.375 4.228 5.179 1.376 1.431 0.876
Hash Table 3.345 2.050 6.894 7.658 2.228 2.178 1.345
wasm / simd 0.705 0.447 0.437 2.610 0.705 0.453 0.397
Zig 0.625 0.400 0.427 2.548 0.718 0.555 0.427
webgl2 / gpu 5.680 0.166 0.158 0.450 0.150 0.001 0.051
© 2025, Christopher Nicolai