Question:
Program? To split text file and save as multiple named files?
srg_rector
2010-04-10 16:03:14 UTC
I need a program or script (Windows 7 or Unix/Linux) that will:
1. Take a large text file as input
2. Separate it into multiple sections by an arbitrary set of characters (perhaps ‘=’ or ‘***’ or MS Word hard page break or something else)
3. Save each grouping to its own text file
4. *** Important *** Name the text file the character string that is on the first line of the text grouping. (I.e. The filename is on the first line, data on following lines).

I am not picky about what is used to accomplish the 4 items listed above. I would prefer using a windows/dos program but if it can only be accomplished on Unix/Linux I could make that work as well.

Thank you for your time!
SR
Three answers:
anonymous
2010-04-15 13:19:44 UTC
Hi there, I've just spent some time creating a windows program which I think can do what you want. I've hosted it at https://sourceforge.net/projects/textsplitter/ so you should be able to download it. Or, if you don't trust some random stranger on the internet telling you to download exes ;), here's the code C# .NET and you can compile yourself:



(using System.IO and System.Text.RegularExpressions)



static void Main(string[] args)

{

Console.WriteLine("Welcome to Text Splitter, thanks for using :). - D.E.");

string loc = "";

while (true)

{

Console.Write("Please type the location of the text file to split>");

loc = Console.ReadLine();

if (File.Exists(loc))

break;

else

Console.WriteLine("That file does not exist, please try again");

}

Console.WriteLine("");

string text = System.IO.File.ReadAllText(loc);

Console.Write("Choose how to decide where to split the text...\r\nOPTION: After a certain [n]umber of characters.\r\nOPTION: At the [i]ndex of a certain string.\r\n>");

bool count;

while (true)

{

ConsoleKeyInfo ki = Console.ReadKey();

if (ki.Key == ConsoleKey.N)

{ count = true; break; }

else if (ki.Key == ConsoleKey.I)

{ count = false; break; }

else

Console.Write("\r\nPlease enter either \"n\" or \"i\">");

}

Console.WriteLine("");

List textFiles = new List() { };

if (count)

{

Console.Write("\r\nPlease enter the interval for how many characters should be in each text file>");

int i;

while (true)

{

try { i = Convert.ToInt16(Console.ReadLine()); if (i > text.Length) { Console.Write("\r\nThe text isn't that long, please enter a smaller number>"); } else { break; } }

catch { Console.Write("\r\nPlease enter a numeric value for the interval>"); }

}

Console.WriteLine("");

do

{

if (text.Length < i)

textFiles.Add(text);

else

{ textFiles.Add(text.Remove(i)); text = text.Remove(0, i); }

}

while (text.Length > i);

}

else

{

Console.Write("\r\nWould you like to use a [r]egular expression or [n]ot>");

bool rg;

while (true)

{

ConsoleKeyInfo ki = Console.ReadKey();

if (ki.Key == ConsoleKey.N)

{ rg = false; break; }

else if (ki.Key == ConsoleKey.R)

{ rg = true; break; }

else

Console.Write("\r\nPlease enter either \"r\" or \"n\">");

}

Console.WriteLine("");

Console.Write("\r\nPlease enter the string to search for as the location(s) to split the text at>");

string splitter = Console.ReadLine();

Console.WriteLine("");

if (rg)

textFiles = Regex.Split(text, splitter, RegexOptions.IgnoreCase).ToList();

else

{

while (true)

{

int i = text.IndexOf(splitter);

if (i == -1)

{ textFiles.Add(text); break; }

else

{

textFiles.Add(text.Remove(i));

text = text.Remove(0, i + splitter.Length);

}

}



}



}

Console.WriteLine("Separate files created: " + textFiles.Count());

Console.WriteLine("\r\nPlease enter the FOLDER/DIRECTORY to save to>");

string folder = Console.ReadLine();

folder = folder + (folder.EndsWith("\\") == false ? "\\" : "");

Console.WriteLine("\r\n\r\nPlease enter the name of the file(s) where \"{0}\" is the file number, and \"{1}\" is the first line (suggestion: \"{1}.txt\")>");

string name = Console.ReadLine();

bool check = true;



Console.Write("\r\nWould you like to [r]emove the first line from each folder, or [n]ot>");

bool removefirst;

while (true)

{

ConsoleKeyInfo ki = Console.ReadKey();

if (ki.Key == ConsoleKey.N)

{ removefirst = false; break; }

else if (ki.Key == ConsoleKey.R)

{ removefirst = true; break; }

else

Console.Write("\r\nPlease enter either \"r\" or \"n\">");

}



for (int s = 0; s < textFiles.Count; s++)

{

//removefirst

int ind = textFiles[s].IndexOf("\r\n");

if (ind == -1)

ind = textFiles[s].IndexOf("\n");

string firstline = (ind == -1 ? textFiles[s] : textFiles[s].Remove(ind));

string saveto = String.Format(name, (s+1).ToString(), firstline);

string texttosave;

if (removefirst && ind > -1)

texttosave = textFiles[s].Remove(0, ind + 1);

else

texttosave = textFiles[s];

if (check && File.Exists(folder + saveto))

{

Console.Write("\r\nFile already exists at " + folder + saveto + ". Would you like to [i]gnore, ignore [a]ll, or [s]kip>");

bool skip = false;

while (true)

{

ConsoleKeyInfo ki = Console.ReadKey();

if (ki.Key == ConsoleKey.S)

{ skip = true; break; }

else if (ki.Key == ConsoleKey.I)

{ skip = false; break; }

else if (ki.Key == ConsoleKey.A)

{ check = false; skip = false; break; }

else

Console.Write("\r\nPlease enter either \"i\", \"a\", or \"s\">");

}

if (skip)

continue;

}

File.WriteAllText(folder + saveto, texttosave);

}

Console.WriteLine("\r\nFinished! Press any key to exit.");

Console.ReadKey();

}

}

}
Brian
2010-04-10 21:07:30 UTC
You can use 'sed' on Unix/Linux to break up files. 'awk' might work too. Check the sed link below.



You can use 'head -c 4' to get the first 4 chars.
?
2010-04-10 16:06:55 UTC
elance.com is your friend.


This content was originally posted on Y! Answers, a Q&A website that shut down in 2021.
Loading...