ateneva
6/3/2018 - 3:16 PM

How do I quickly evaluate a text string in SQL?

How do I quickly evaluate a text string in SQL?


------------------------------------MySQL-----------------------------------------------------------
select
title as Adtitle,

#--count the number of words in the title
length(title) - length(replace(title, ' ', '')) + 1 as AdTitleWordLength,

#--analyse the punctuation of a title
case 
	when left (title,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'starts with a number'
	
	when instr(title, '-') !=0 then 'contains a dash'
	when instr(title, ':') !=0 then 'contains a colon'
	when instr(title, ',') !=0 then 'contains a comma'
	when instr(title, '"') !=0 then 'contains quoatation marks'
	
	when right(title,1) = '!' then 'ends with an exclamation mark'
	when right(title,1) = '?' then 'ends with a question mark'
	when right(title, 1) = '.' then 'ends with a dot'
	when right(title,1) not in ('?', '!', '.') then 'ends with no punctuation'
	when right(title,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'ends with a number'
end as AdPunctuation,


#--find tge number of characters and classify according to the following criteria
length(title) as TitleCharLength,
case 
	when length(title) < 40 then '0-40 characters'
	when length(title) >= 40 and length(name) < 70 then '40-70 charcaters'
	when length(title) >= 70 then 'more than 70 characters'
end as AdTitleCharength

#---find the number of Upper case characters in the string	
#regexp_count(name, '[A-Z]') as NumberOfUpperCaseCharactersinTitle,

from
datageeking.ted_main_utf

-------------------------SQL Server-------------------------------------------------------------------

select
title as Adtitle,

--count the number of words in the title
len(title) - len(replace(title, ' ', '')) + 1 as AdTitleWordLength,

--analyse the punctuation of a title
case 
	when left (title,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'starts with a number'
	
	when charindex(title, '-') !=0 then 'contains a dash'
	when charindex(title, ':') !=0 then 'contains a colon'
	when charindex(title, ',') !=0 then 'contains a comma'
	when charindex(title, '"') !=0 then 'contains quoatation marks'
	
	when right(title,1) = '!' then 'ends with an exclamation mark'
	when right(title,1) = '?' then 'ends with a question mark'
	when right(title, 1) = '.' then 'ends with a dot'
	when right(title,1) not in ('?', '!', '.') then 'ends with no punctuation'
	when right(title,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'ends with a number'
end as AdPunctuation,


--find tge number of characters and classify according to the following criteria
len(title) as TitleCharLength,
case 
	when len(title) < 40 then '0-40 characters'
	when len(title) >= 40 and len(title) < 70 then '40-70 charcaters'
	when len(title) >= 70 then 'more than 70 characters'
end as AdTitleCharength


from
datageeking.dbo.ted_main_utf


-----------------------------------PostgreSQL-------------------------------------------------------------
select
title as Adtitle,

--count the number of words in the title
length(description) - length(replace(description, ' ', '')) + 1 as AdTitleWordLength,

--analyse the punctuation of a title
case 
	when left (description,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'starts with a number'
	
	when position('-' in description ) !=0 then 'contains a dash'
	when position(':' in description) !=0 then 'contains a colon'
	when position(',' in description) !=0 then 'contains a comma'
	when position('"' in description) !=0 then 'contains quoatation marks'
	
	when right(description,1) = '!' then 'ends with an exclamation mark'
	when right(description,1) = '?' then 'ends with a question mark'
	when right(description, 1) = '.' then 'ends with a dot'
	when right(description,1) not in ('?', '!', '.') then 'ends with no punctuation'
	when right(description,1) in ('1', '2' ,'3' ,'4','5','6','7','8','9') then 'ends with a number'
end as AdPunctuation,


--find tge number of characters and classify according to the following criteria
length(description) as TitleCharLength,
case 
	when length(description) < 40 then '0-40 characters'
	when length(description) >= 40 and length(description) < 70 then '40-70 charcaters'
	when length(description) >= 70 then 'more than 70 characters'
end as AdTitleCharength

---find the number of Upper case characters in the string	
--regexp_count(name, '[A-Z]') as NumberOfUpperCaseCharactersinTitle,

from
public.film