Example - Sentence in Cell A1
is WO# 000000.0 for ITSHTJ in Q0 20000_ITAI0006:TBD Replacement PO
and I want to extract ITAI0006
.
I have used below UDF but it's not helpful.
Function findUcase(Rng As Range) As Boolean
Dim Txt
Dim Desc() As String
Desc = Split(Rng.Value, " ")
For Each Txt In Desc
If (UCase(Txt) = Txt) Then
findUcase = True
Exit Function
End If
Next
End Function
CodePudding user response:
Regex method - This will return any 8 characters (A-Z and numbers) from the given cell.
Function findUcase(Rng As Range) As String
If Rng.Cells.Count = 1 Then
Dim regex As Object
Set regex = CreateObject("vbScript.Regexp")
regex.Global = False
regex.Pattern = "[\dA-Z]{8}"
If regex.Test(Rng.Value) Then
findUcase = regex.Execute(Rng.Value)(0)
End If
End If
End Function
Edit - Another version that will return string that has at least A-Z
in the string and also supports format of 8 characters of any amount of A-Z, a space then any amount of numbers.
Function findUcase(Rng As Range) As String
Const firstPattern As String = "[\dA-Z]{8}"
Const secondPattern As String = "[A-Z] [\d] "
Const charLimit As Long = 8
If Rng.Cells.Count = 1 Then
Dim rngValue As String
rngValue = Rng.Value
Dim regex As Object
Set regex = CreateObject("vbScript.Regexp")
Dim matchColl As Object
Dim match As Object
regex.Global = True
regex.Pattern = firstPattern
If regex.test(rngValue) Then
Set matchColl = regex.Execute(rngValue)
For Each match In matchColl
If Not IsNumeric(match) Then
findUcase = match
Exit For
End If
Next match
Else
regex.Pattern = secondPattern
If regex.test(rngValue) Then
Set matchColl = regex.Execute(rngValue)
For Each match In matchColl
If Len(match) = charLimit Then
findUcase = match
Exit For
End If
Next match
End If
End If
End If
Set matchColl = Nothing
Set regex = Nothing
End Function
CodePudding user response:
You could try:
Formula in B1
:
=FILTERXML("<t><s>"&SUBSTITUTE(SUBSTITUTE(SUBSTITUTE(A1,"_"," "),":"," ")," ","</s><s>")&"</s></t>","//s[string-length(.)=8][translate(.,'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', '')='']")
Where the XPath expressions filter nodes on:
[string-length(.)=8]
- All substrings of 8 characters long;[translate(.,'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', '')='']
- Make sure that when all uppercase alphachars and numbers are substituted with nothing, the node would also equal nothing.
Note that a formulae-approach can get much simpler once you have more specific rules down, e.g.:
- Is it always the 8 characters before the first colon?
- Is it always between underscore and colon?
- Is it always alphanumeric?
- Etc.