Home > OS >  Speed up large nested loop with DataTable and FileInfo
Speed up large nested loop with DataTable and FileInfo

Time:07-07

I have a DataTable which contains approximately 150.000 rows. I try to compare those rows to Files (approximately 200.000 files) in a specific folder. My code looks like:

foreach (DataRow row in KundenundDateinamenohneDoppelte.Rows)
                {
                    OrdnerSchadenanlage = @""   Settings.Default.PathtoKundenablage   "\\Kundenablage\\";

                    foreach (string item in listbox2.Items)
                    {
                        if (item == "Leerzeile")
                        {
                            OrdnerSchadenanlage  = " ";
                        }
                        else if (item == "-" || item == ",")
                        {
                            OrdnerSchadenanlage  = item;
                        }
                        else
                        {
                            OrdnerSchadenanlage  = row[item].ToString().Replace("/", " u. ").Replace(@"""", "").Replace("00:00:00", "");
                        }
                    }

                    if (!Directory.Exists(OrdnerSchadenanlage))
                    {
                        Directory.CreateDirectory(OrdnerSchadenanlage);
                    }

                    DirectoryInfo DIR = new DirectoryInfo(Settings.Default.PathtoGesamtablage);
                    FileInfo[] FILES = DIR.GetFiles("*.*");
                    DirectoryInfo[] DIRECTORIES = DIR.GetDirectories();

                    foreach (FileInfo f in FILES)
                    {
                        double _fileSize = new System.IO.FileInfo(f.FullName).Length;
                        string filenametocheck = string.Empty;

                        string filenamewithoutklammern = f.Name;
                        string pattern = @"\s\([0-9] \)\.";
                        bool m = Regex.IsMatch(filenamewithoutklammern, pattern);
                        if (m == true)
                        {
                            filenamewithoutklammern = Regex.Replace(filenamewithoutklammern, pattern, ".");
                            filenametocheck = filenamewithoutklammern;
                        }
                        else
                        {
                            filenametocheck = f.Name;
                        }

                        if (System.IO.Path.GetFileNameWithoutExtension(filenametocheck) == row["Dateiname"].ToString() && Math.Ceiling(_fileSize / 1024) == Convert.ToDouble(row["Größe in kB"].ToString()))
                        {
                            string folderfound = "";
                            if (newtable.Rows.Count > 0)
                            {
                                foreach (DataRow dr in newtable.Rows)
                                {
                                    if (dr["Suchart"].ToString() == "beginnend")
                                    {
                                        if (f.Name.StartsWith(dr["Dateiname"].ToString()))
                                        {
                                            folderfound = dr["Ordnername"].ToString();
                                            break;
                                        }
                                        else
                                        {
                                            folderfound = string.Empty;
                                        }
                                    }
                                    else
                                    {
                                        if (f.Name.Contains(dr["Dateiname"].ToString()))
                                        {
                                            folderfound = dr["Ordnername"].ToString();
                                            break;
                                        }
                                        else
                                        {
                                            folderfound = string.Empty;
                                        }
                                    }

                                }

                                if (folderfound != string.Empty)
                                {
                                    OrdnermitUnterodner = OrdnerSchadenanlage   "\\"   folderfound   "";
                                    if (!Directory.Exists(OrdnermitUnterodner))
                                    {
                                        Directory.CreateDirectory(OrdnermitUnterodner);
                                    }
                                    if (!File.Exists(System.IO.Path.Combine(OrdnerSchadenanlage, f.Name)) && !File.Exists(System.IO.Path.Combine(OrdnermitUnterodner, f.Name)))
                                    {
                                        File.Copy(f.FullName, System.IO.Path.Combine(OrdnermitUnterodner, f.Name));
                                        File.Delete(f.FullName);
                                        //kopiervorgang.Rows.Add(1, "Copied file "   f.FullName   " to "   System.IO.Path.Combine(OrdnermitUnterodner, f.Name)   "\n");
                                    }
                                }
                                else
                                {
                                    if (!File.Exists(System.IO.Path.Combine(OrdnerSchadenanlage, f.Name)) && !File.Exists(System.IO.Path.Combine(OrdnermitUnterodner, f.Name)))
                                    {
                                        File.Copy(f.FullName, System.IO.Path.Combine(OrdnerSchadenanlage, f.Name));
                                        File.Delete(f.FullName);
                                        //kopiervorgang.Rows.Add(1, "Copied file "   f.FullName   " to "   System.IO.Path.Combine(OrdnerSchadenanlage, f.Name)   "\n");
                                    }
                                }

                            }
                            else
                            {
                                if (!File.Exists(System.IO.Path.Combine(OrdnerSchadenanlage, f.Name)) && !File.Exists(System.IO.Path.Combine(OrdnermitUnterodner, f.Name)))
                                {
                                    File.Copy(f.FullName, System.IO.Path.Combine(OrdnerSchadenanlage, f.Name));
                                    File.Delete(f.FullName);
                                    //kopiervorgang.Rows.Add(1, "Copied file "   f.FullName   " to "   System.IO.Path.Combine(OrdnerSchadenanlage, f.Name)   "\n");
                                }
                            }
                        }
                        else
                        {
                            //kopiervorgang.Rows.Add(0, "Copy failed cause there is no match between filenmame and filesize.\n");
                        }

                    }
                    GC.Collect();
                    progressrow  ;
                    double Percentage = Convert.ToDouble(progressrow) * 100 / KundenundDateinamenohneDoppelte.Rows.Count;
                    übertragenworker.ReportProgress(0, Percentage);
                }
                kopiervorgang.Rows.Add(0, "Übertragung abgeschlossen.");
                e.Result = kopiervorgang;
            }

This works just fine, Files are getting compared and copied to the destined folder. However for processing just 0.2 % of this task my application needs about 1 hour. Calculated to 100% this should be about 17 days.

Is there any way to make this loop more efficient and faster?

CodePudding user response:

This needs what we call "refactoring" to achieve the desired optimization. There are too many nested loops containing code that can be removed from the loops.

One particular kind of refactoring needed is removing everything that is not affected by the loops from inside the loops. Everything that depends on these things that are not related to the loops, can also then be removed (as long as there is no dependency on the loop, e.g. loop variable).

For example, I would start with removing this chunk from the loop because it is non-trivial (expensive, computationally) but it does not depend on the loop at all.

// should remove these lines from the loop!
DirectoryInfo DIR = new DirectoryInfo(Settings.Default.PathtoGesamtablage);
FileInfo[] FILES = DIR.GetFiles("*.*");
DirectoryInfo[] DIRECTORIES = DIR.GetDirectories();

The process of refactoring involve identifying these cases and improving code by putting them in suitable better places. Another thing to do is split the big method into smaller methods.

More things that can be removed from loop:

OrdnerSchadenanlage = @"" Settings.Default.PathtoKundenablage "\\Kundenablage\\";

string pattern = @"\s\([0-9] \)\.";

FILES

Now that FILES is removed from the loop, we can also remove this from the loop: double _fileSize = new System.IO.FileInfo(f.FullName).Length; Since we are looping all the FILES we can get all the sizes ONCE not for each data row, this is because the files are the same for each row in KundenundDateinamenohneDoppelte.Rows (the main foreach).

Note: fileSize should be long not double.

The way to remove this from the loop is to create a dictionary of file sizes. For example:

// outside the loop, just under `FILES`
            Dictionary<string, long> fileSizes = FILES.Select(file => new { file.FullName, new System.IO.FileInfo(file.FullName).Length }).ToDictionary(fl => fl.FullName, fl => fl.Length);

You can then use this dictionary like this to get the file size without doing many calls to FileInfo.Length!

// inside the FILES loop, I replace double with long
long _fileSize = fileSizes[f.FullName];

newtable

I don't know what's inside of newtable and how does it change, but I don't see it depending on the outside loops, therefore it should be possible to remove this whole section from the nested loops:

foreach (DataRow dr in newtable.Rows) {
    if (dr["Suchart"].ToString() == "beginnend") {
        if (f.Name.StartsWith(dr["Dateiname"].ToString())) {
            folderfound = dr["Ordnername"].ToString();
            break;
        } else {
            folderfound = string.Empty;
        }
    } else {
        if (f.Name.Contains(dr["Dateiname"].ToString())) {
            folderfound = dr["Ordnername"].ToString();
            break;
        } else {
            folderfound = string.Empty;
        }
    }

}
  • Related