I have a text file which has following data:
#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: [email protected]
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 AM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: [email protected]
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: [email protected]
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A
The data needs to be organized in a way so that the Customer_ID, MMYY and Txn_ID are only displayed in single line separated by | symbol. Everything else in this text file should be ignored.
Example:
5524335312265537 | 08/23 | 24430
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474
This is what I've tried but I get "Invalid file!" message after the text file is opened. Reference taken from this post
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = File.OpenText(ofdtmp.FileName))
{
while (sr.Peek() >= 0)
{
string line = sr.ReadLine();
line = line.Trim();
if (line.ToString() == "" || line.Contains("#") || line.Contains("ORDER_STATUS") || /*Exclude Date & Time*/ line.Contains(".") || line.Contains("Name:") || line.Contains("E-mail:") || line.Contains("Phone:") || line.Contains("Address_original:") || line.Contains("ZIP_City_State_Country:") || line.Contains("Type:") || line.Contains("Subtype:"))
continue; //skip
if (line.Contains("CustomerID: "))
{
string customID = line.Substring(12, 29).Trim();
continue;
}
if (line.Contains("MMYY: "))
{
string mmyy = line.Substring(6, 11).Trim();
continue;
}
if (line.Contains("Txn_ID: "))
{
string txnID = line.Substring(10, 16).Trim();
continue;
}
}
richTextBox.Text = sr.ToString();
}
}
catch
{
MessageBox.Show("Invalid file!");
}
}
}
I've looked up alternative solutions on similar online posts, and it appears that applying regular expression is the right approach to go. The difficulty is to figure out how to skip over all of the unnecessary characters and symbols in the text file and just extract the data that is required. What will be the best solution for this problem?
CodePudding user response:
may be you are searching something like this: https://dotnetfiddle.net/vSEs5e
CodePudding user response:
My solution using regex.
You can test/run this code on https://replit.com/@JomaCorpFX/SO70374465
You can check the the regex on https://regex101.com/r/F8Npd9/1
Code
using System.Text.RegularExpressions;
using System.Collections.Generic;
using System;
using System.Linq;
public class Program
{
static List<string> GetStrings(string data)
{
RegexOptions options = RegexOptions.Multiline;
string pattern = @"(?:Customer_ID: (?<CustomerId>\d )\s)(?:MMYY: (?<ExpiryDate>. )\s)(?:Txn_ID: (?<TxnId>. )\s)";
List<string> strings = new List<string>();
var regex = new Regex(pattern, options);
foreach (Match match in regex.Matches(data))
{
string customerId = match.Groups.Cast<Group>().Where(g => g.Name == "CustomerId").Select(g => g.Value ?? "").FirstOrDefault();
string expiryDate = match.Groups.Cast<Group>().Where(g => g.Name == "ExpiryDate").Select(g => g.Value ?? "").FirstOrDefault();
string txnId = match.Groups.Cast<Group>().Where(g => g.Name == "TxnId").Select(g => g.Value ?? "").FirstOrDefault();
strings.Add(customerId " | " expiryDate " | " txnId);
}
return strings;
}
public static void Main(string[] args)
{
string input = @"#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: [email protected]
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 PM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: [email protected]
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: [email protected]
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A";
foreach (var value in GetStrings(input))
{
Console.WriteLine(value);
}
Console.ReadLine();
}
}
Output
5524335312265537 | 08/23 | 74627
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474