One of the (many) things that I do is various ad-hoc statistical reports on data in SQL databases, so I’m forever churning out weird little SQL routines and cursors with the hope of extracting meaning from data.

Here’s a handy little number I put together just recently which counts the number of words in a field in a table. Words are counted individually (the delimiter inside a field is a space). So this routine will count the field “John” as one entry and the field “John Smith” as two separate entries, “John” and “Smith”. It then counts the results and displays a quick table with the most common words at the top and the number of occurrences.

Modify the cursor declaration to suit your own schema…

create table #tblCommonNames
(namepart varchar(100))

DECLARE @f varchar(50), @count int, @temp smallint

DECLARE cur CURSOR
FOR select distinct company_name
from tblCompany

set @count = 1

OPEN cur
FETCH NEXT FROM cur INTO @f

WHILE (@@fetch_status <> -1) -- and @count < 100)
BEGIN
   IF (@@fetch_status <> -2)
   BEGIN
      --find spaces
      while (charindex(' ', @f) <> 0)
      begin
         set @temp = charindex(' ', @f)
         insert into #tblCommonNames
            (namepart)
         values
            (ltrim(rtrim(left (@f,@temp))))
         
         set @f = ltrim(rtrim(substring(@f, @temp, 99)))
      end
   END
   FETCH NEXT FROM cur INTO @f
   SELECT @count = @count + 1
END

CLOSE cur
DEALLOCATE cur

delete
from #tblCommonNames
where ltrim(rtrim(namepart)) = ''

select namepart, count(namepart)
from #tblCommonNames
group by namepart
order by count(namepart) desc

drop table #tblCommonNames
go