搜索文本
搜索文件夹中的文件是否包含某个文本,使用多线程搜索,输出文件名、行、列,命令行输出带高亮,支持导出CSV
用法:FuckSearch 要搜索的文本 选项
选项:
- -p 要搜索的文件夹路径
- -sm 搜索模式,Contains,包含;Regex,正则表达式;
- -mf 匹配模式,Line,一行一行地匹配;Global,全局匹配;
- -ic 可选,默认n,忽略大小写,y,忽略大小写;n,区分大小写;
- -pl 可选,默认n,预加载,y,加载整个文件到内存;n,加载一行到内存;
- -b 可选,默认50,显示匹配位置前后到字符数
- -e 可选,默认y,是否输出错误,y,输出错误;n,隐藏错误
- -ep 可选,导出文件路径
- -ef 可选,导出文件格式,Text,导出文本;CSV,导出CSV;
#include <cstdint>
#include <iostream>
#include <filesystem>
#include <string>
#include <charconv>
#include <unordered_map>
#include <execution>
#include <fstream>
#include <regex>
#include <deque>
#include <mutex>
#include <functional>
#include <algorithm>
#include <variant>
#include <memory>
#include "Arguments.h"
#include "Convert.h"
#include "Macro.h"
#include "CSV.h"
ArgumentOption(MatchFlag, Line, Global)
ArgumentOption(SearchMode, Contains, Regex)
ArgumentOption(ExportFileType, Text, CSV)
static std::mutex LogMutex{};
static std::mutex RecordMutex{};
static bool Err = true;
template<typename Stream, typename ...Args>
Stream& StreamCombine(Stream& stream, Args&&...args)
{
std::lock_guard<std::mutex> lock(RecordMutex);
(stream << ... << args);
return stream;
}
template<typename ...Args>
std::ostringstream StringCombine(Args&&...args)
{
std::ostringstream oss{};
(oss << ... << args);
return oss;
}
template<typename ...Args>
void SearchLog(Args&&...args)
{
std::lock_guard<std::mutex> lock(LogMutex);
(std::cout << ... << args) << '\n';
}
template<typename ...Args>
void SearchErrCombine(Args&&...args)
{
if (Err)
{
std::lock_guard<std::mutex> lock(LogMutex);
(std::cerr << ... << args) << '\n';
}
}
#define SearchErr(...)\
SearchErrCombine(__FILE__ ": " MacroLine ": " __FUNCTION__ ":\n", __VA_ARGS__)
struct SearchParams
{
template<typename ...Ts> struct VisitFunc : Ts... { using Ts::operator()...; };
template<typename ...Ts> VisitFunc(Ts...) -> VisitFunc<Ts...>;
using ExportFileTextType = std::ofstream;
using ExportFileCsvType = CSV::CsvFile;
using ExportFileValueType = std::optional<std::variant<ExportFileTextType, ExportFileCsvType>>;
SearchMode searchMode;
MatchFlag matchFlag;
bool ignoreCase;
bool preLoad;
int16_t border;
ExportFileValueType& exportFile;
};
void SearchRecord(const std::string& file, const std::string& prefix, const std::string& keyword, const std::string& suffix, const SearchParams& searchParams)
{
#define __SearchRecord__Red_Text__(__text__) "\x1B[31m", __text__, "\033[0m"
SearchLog(file, prefix, __SearchRecord__Red_Text__(keyword), suffix);
#define __SearchRecord__Stream_Log__(__stream__, __endl__) StreamCombine(__stream__, file, StringCombine(prefix, keyword, suffix).str(), __endl__)
if (searchParams.exportFile.has_value())
{
std::visit(SearchParams::VisitFunc
{
[&](SearchParams::ExportFileTextType& arg) { __SearchRecord__Stream_Log__(arg, "\n"); },
[&](SearchParams::ExportFileCsvType& arg) { __SearchRecord__Stream_Log__(arg, CSV::CsvFile::endl); },
}, searchParams.exportFile.value());
}
#undef __SearchRecord__Red_Text__
#undef __SearchRecord__Stream_Log__
}
void Search(const std::string& buf, const std::string& search, const SearchParams& searchParams, const std::string& prefix = "")
{
try
{
if (searchParams.searchMode == SearchMode::Regex)
{
auto buf_ = buf;
auto regexParams = std::regex::ECMAScript;
if (searchParams.ignoreCase) regexParams |= std::regex::icase;
const std::regex re(search, regexParams);
for (std::regex_token_iterator<std::string::iterator> end, i(buf_.begin(), buf_.end(), re);
i != end;
*i++)
{
const auto pos = std::distance(buf_.begin(), i->first);
SearchRecord(
StringCombine(prefix, pos + 1, ": ").str(),
StringCombine(
pos > searchParams.border
? buf_.substr(pos - searchParams.border, searchParams.border)
: buf_.substr(0, pos)).str(),
*i,
StringCombine(buf_.substr(pos + i->length(), searchParams.border)).str(),
searchParams);
}
}
else
{
#define __Search__SearchMode_Contains__Loop__(__buf__, __search__, __str__)\
{\
for (auto pos = (__buf__).find(__search__); pos != std::string::npos; pos = (__buf__).find((__search__), pos + 1))\
{\
SearchRecord(\
StringCombine(prefix, pos + 1, ": ").str(),\
StringCombine(\
pos > searchParams.border\
? (__str__).substr(pos - searchParams.border, searchParams.border)\
: (__str__).substr(0, pos)).str(),\
StringCombine((__str__).substr(pos, (__search__).length())).str(),\
StringCombine((__str__).substr(pos + (__search__).length(), searchParams.border)).str(),\
searchParams); \
}\
}
if (searchParams.ignoreCase)
{
auto toLower = [](auto& str) { std::transform(str.begin(), str.end(), str.begin(), static_cast<int(*)(int)>(std::tolower)); };
auto buf_ = buf;
auto search_ = search;
toLower(buf_);
toLower(search_);
__Search__SearchMode_Contains__Loop__(buf_, search_, buf);
}
else
{
__Search__SearchMode_Contains__Loop__(buf, search, buf);
}
}
}
catch (const std::exception& e)
{
SearchErr(e.what());
}
#undef __Search__SearchMode_Contains__Loop__
}
void SearchFile(
const std::filesystem::path& path,
const std::string& searchString,
const SearchParams& searchParams)
{
try
{
std::ifstream fs(path.string());
fs.rdbuf()->pubsetbuf(std::make_unique<char[]>(4096).get(), 4096);
const auto prefix = path.string() + ": ";
if (searchParams.matchFlag == MatchFlag::Global)
{
std::ostringstream buf{};
buf << fs.rdbuf();
Search(buf.str(), searchString, searchParams, prefix);
}
else
{
#define __SearchFile__MatchFlag_Line__Read_File_Line_By_Line_Loop__(__fs__, __loop__)\
{\
auto i = 1;\
for (std::string line; std::getline((__fs__), line); ++i)\
{\
__loop__;\
}\
}
#define __SearchFile__MatchFlag_Line__Search_Caller__(__buf__, __index__)\
Search(__buf__, searchString, searchParams, prefix + std::to_string(__index__) +": ")
if (searchParams.preLoad)
{
std::vector<std::tuple<int, std::string>> lines{};
__SearchFile__MatchFlag_Line__Read_File_Line_By_Line_Loop__(fs, lines.emplace_back(i, line));
std::for_each(std::execution::par_unseq, lines.begin(), lines.end(), [&](const auto& indexBuf)
{
__SearchFile__MatchFlag_Line__Search_Caller__(std::get<1>(indexBuf), std::get<0>(indexBuf));
});
}
else
{
__SearchFile__MatchFlag_Line__Read_File_Line_By_Line_Loop__(fs, __SearchFile__MatchFlag_Line__Search_Caller__(line, i));
}
}
}
catch (const std::exception& e)
{
SearchErr(" ", e.what());
}
#undef __SearchFile__MatchFlag_Line__Read_File_Line_By_Line_Loop__
#undef __SearchFile__MatchFlag_Line__Search_Caller__
}
void SearchDirectory(
const std::filesystem::path& path,
const std::string& searchString,
const SearchParams& searchParams)
{
std::list<std::filesystem::path> files{};
std::error_code errorCode;
const std::error_code nonErrorCode;
std::deque<std::filesystem::path> queue{};
const std::filesystem::directory_iterator end;
queue.emplace_back(std::filesystem::path(path));
while (!queue.empty())
{
try
{
for (std::filesystem::directory_iterator file(queue.front(), std::filesystem::directory_options::none, errorCode); file != end; ++file)
{
if (errorCode != nonErrorCode)
{
std::cerr << file->path().string() << " " << errorCode.message();
errorCode.clear();
continue;
}
if (file->is_symlink())
{
continue;
}
if (file->is_regular_file())
{
files.push_back(file->path());
}
else if (file->is_directory())
{
queue.emplace_back(file->path());
}
}
}
catch (const std::exception& e)
{
SearchErr(" ", queue.front().string(), ":\n ", e.what());
}
queue.pop_front();
}
std::for_each(std::execution::par_unseq, files.begin(), files.end(), [&](const auto& file)
{
SearchFile(file, searchString, searchParams);
});
}
int main(int argc, char* argv[])
{
using Arguments::Argument;
#define InvalidArgument(v) Argument<>::ConstraintFuncMsg{ (v) + ": Invalid argument" }
#define InvalidArgumentFunc(func) [](const auto& v) { return (func) ? std::nullopt : InvalidArgument(v); }
#define NilConstraint [](auto) { return std::nullopt; }
Arguments::Arguments args{};
Argument searchStringArg(
{},
"search string",
{},
{ InvalidArgumentFunc(!v.empty()) });
Argument searchPathArg(
"-p",
"directory path",
{},
{ InvalidArgumentFunc(std::filesystem::exists(v)) });
Argument<SearchMode> modeArg(
"-sm",
"search mode " + SearchModeDesc(ToString(SearchMode::Contains)),
{ SearchMode::Contains },
{ NilConstraint },
{ ToSearchMode });
Argument<MatchFlag> flagArg(
"-mf",
"match flag " + MatchFlagDesc(ToString(MatchFlag::Line)),
{ MatchFlag::Line },
{ NilConstraint },
{ ToMatchFlag });
Argument<bool> ignoreCaseArg(
"-ic",
"ignore case [y|(n)]",
{ false },
{ NilConstraint },
{ [](const auto& v) { return v == "y"; } });
Argument<bool> preLoadArg(
"-pl",
"pre load [y|(n)]",
{ false },
{ NilConstraint },
{ [](const auto& v) { return v == "y"; } });
Argument<int16_t> borderArg(
"-b",
"border (50){0," MacroToString(INT16_MAX) "}",
{ 50 },
{ NilConstraint },
{ [](const auto& v) { return Convert::ToInt(v); } });
Argument<bool> errOutputArg(
"-e",
"err output [(y)|n]",
{ true },
{ NilConstraint },
{ [](const auto& v) { return !(v == "n"); } });
Argument exportPathArg(
"-ep",
"export path");
Argument<ExportFileType> exportTypeArg(
"-ef",
"export type" + ExportFileTypeDesc(ToString(ExportFileType::CSV)),
{ ExportFileType::CSV },
{ NilConstraint },
{ ToExportFileType });
args.Add(searchStringArg);
args.Add(searchPathArg);
args.Add(modeArg);
args.Add(flagArg);
args.Add(ignoreCaseArg);
args.Add(preLoadArg);
args.Add(borderArg);
args.Add(errOutputArg);
args.Add(exportPathArg);
args.Add(exportTypeArg);
//#define Debug
#ifndef Debug
try
#endif
{
#define Value(__arg__) args.Value<decltype(__arg__)::ValueType>(__arg__)
args.Parse(argc, argv);
Err = Value(errOutputArg);
const auto path = Value(searchPathArg);
SearchParams::ExportFileValueType exportFile = {};
if (args.Get(exportPathArg).has_value())
{
const auto exportPath = Value(exportPathArg);
switch (Value(exportTypeArg))
{
case ExportFileType::Text:
exportFile = std::ofstream(exportPath);
break;
default:
exportFile = CSV::CsvFile(exportPath);
break;
}
}
auto func = SearchDirectory;
if (!std::filesystem::is_directory(path)) func = SearchFile;
func(
path,
Value(searchStringArg),
{
Value(modeArg),
Value(flagArg),
Value(ignoreCaseArg),
Value(preLoadArg),
Value(borderArg),
exportFile
});
}
#ifndef Debug
catch (const std::exception& e)
{
SearchErr(" ", e.what());
}
#endif
}