Regex in bash to search and replace
I'm trying to capture some text from many files in a directory to a new
file (preferably a csv) but I'm stuck on how to do it. The files are html
and the part i want to capture is the part in bold. There is lots of html
and other stuff i want to elimate, so awk, sed, grep is what i assume
would be the correct thing to use:
.... (lots of html) ....
id=00000
.... (lots more html) ....
1 = 1/0
.... (more html, then another id=) ....
i'd like the results to look like in a csv:
00000 1 1 0
here is the code for the pages:
<!DOCTYPE html>
<html>
<head>
<title>Regex in bash to search and replace - Stack Overflow</title>
<link rel="shortcut icon"
href="//cdn.sstatic.net/stackoverflow/img/favicon.ico">
<link rel="apple-touch-icon image_src"
href="//cdn.sstatic.net/stackoverflow/img/apple-touch-icon.png">
<link rel="search" type="application/opensearchdescription+xml"
title="Stack Overflow" href="/opensearch.xml">
<script type="text/javascript"
src="//ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
<script type="text/javascript"
src="//cdn.sstatic.net/js/stub.js?v=0ad8a2cff262"></script>
<link rel="stylesheet" type="text/css"
href="//cdn.sstatic.net/stackoverflow/all.css?v=cff37b15495c">
<link rel="canonical"
href="http://stackoverflow.com/questions/18681039/regex-in-bash-to-search-and-replace">
<link rel="alternate" type="application/atom+xml" title="Feed for
question 'Regex in bash to search and replace'"
href="/feeds/question/18681039">
<script type="text/javascript">
StackExchange.ready(function () {
StackExchange.using("postValidation", function () {
StackExchange.postValidation.initOnBlurAndSubmit($('#post-form'),
2, 'answer');
});
StackExchange.question.init({votesCast:[],autoShowCommentHelp:true,showAnswerHelp:true,totalCommentCount:4,shownCommentCount:4,highlightColor:'#F4A83D',backgroundColor:'#FFF',questionId:18681039});
styleCode();
StackExchange.realtime.subscribeToQuestion('1', '18681039');
});
</script>
<script type="text/javascript">
StackExchange.init({"stackAuthUrl":"https://stackauth.com","serverTime":1378622166,"styleCode":true,"enableUserHovercards":true,"site":{"name":"Stack
Overflow","description":"Q&A for professional and enthusiast
programmers","isNoticesTabEnabled":true,"recaptchaPublicKey":"6LdchgIAAAAAAJwGpIzRQSOFaO0pU6s44Xt8aTwc","enableSocialMediaInSharePopup":true},"user":{"fkey":"e72538955cb044ce7db3f60a80378a97","isRegistered":true,"userType":3,"userId":2758332,"accountId":3276476,"gravatar":"<div
class=\"\"><img
src=\"https://www.gravatar.com/avatar/3d5c9f6a2a59bdaaf280fcb4bcee236a?s=32&d=identicon&r=PG&f=1\"
alt=\"\" width=\"32\"
height=\"32\"></div>","profileUrl":"http://stackoverflow.com/users/2758332/charles","notificationsUnviewedCount":0,"inboxUnviewedCount":0}});
StackExchange.using.setCacheBreakers({"js/prettify-full.js":"6c261bebf56a","js/moderator.js":"ed7a8238b2f8","js/full-anon.js":"91e39f868c82","js/full.js":"f9e9cb812ede","js/wmd.js":"12e251f7c6c3","js/third-party/jquery.autocomplete.min.js":"e5f01e97f7c3","js/mobile.js":"75f8938d6753","js/help.js":"6e6623243cf6","js/tageditor.js":"450c9e8426fc","js/tageditornew.js":"b6c68ad4c7dd","js/inline-tag-editing.js":"8e84e8a137f7","js/revisions.js":"d3e781ee5528","js/review.js":"947758ba83ea","js/tagsuggestions.js":"aa48ef6154df","js/post-validation.js":"bb996020492a","js/explore-qlist.js":"1c5bbd79b562","js/events.js":"27a33f0b2cad"});
</script>
<script type="text/javascript">
StackExchange.using("gps", function() {
StackExchange.gps.init(true);
});
</script>
</head>
<body class="question-page">
<noscript><div id="noscript-padding"></div></noscript>
<div id="notify-container"></div>
<div id="overlay-header"></div>
<div id="custom-header"></div>
<div class="container">
<div id="header" >
<div id="portalLink">
<a class="genu" href="http://stackexchange.com"
onclick="StackExchange.ready(function(){genuwine.click();});return
false;">Stack Exchange</a>
</div>
<div id="topbar">
<div id="hlinks">
<span id="hlinks-user"> <span class="profile-triangle">¥</span><a
href="/users/2758332/charles" class="profile-link">Charles</a> <a
href="/users/2758332/charles?tab=reputation"><span
class="reputation-score" title="your reputation; view reputation changes"
dir="ltr">1</span></a>
<span class="lsep">|</span>
</span>
<span id="hlinks-nav"></span>
<span id="hlinks-custom"> <a
href="http://chat.stackoverflow.com">chat</a>
<span class="lsep">|</span>
<a href="http://meta.stackoverflow.com">meta</a>
<span class="lsep">|</span>
<a href="/about">about</a>
<span class="lsep">|</span>
<a href="/help">help</a>
</span>
</div>
<div id="hsearch">
<form id="search" action="/search" method="get"
autocomplete="off">
<div>
<input autocomplete="off" name="q"
class="textbox" placeholder="search"
tabindex="1" type="text" maxlength="240"
size="28" value="">
</div>
</form>
</div>
</div>
<br class="cbt">
<div id="hlogo">
<a href="/">
Stack Overflow
</a>
</div>
<div id="hmenus">
<div class="nav mainnavs ">
<ul>
<li class="youarehere"><a id="nav-questions"
href="/questions">Questions</a></li>
<li><a id="nav-tags" href="/tags">Tags</a></li>
<li><a id="nav-users"
href="/users">Users</a></li>
<li><a id="nav-badges"
href="/help/badges">Badges</a></li>
<li><a id="nav-unanswered"
href="/unanswered">Unanswered</a></li>
</ul>
</div>
<div class="nav askquestion">
<ul>
<li>
<a id="nav-askquestion"
href="/questions/ask">Ask Question</a>
</li>
</ul>
</div>
</div>
</div>
<div id="content">
<div itemscope itemtype="http://schema.org/Article">
<link itemprop="image"
href="//cdn.sstatic.net/stackoverflow/img/apple-touch-icon.png">
<div id="question-header">
<h1 itemprop="name"><a
href="/questions/18681039/regex-in-bash-to-search-and-replace"
class="question-hyperlink">Regex in bash to search and
replace</a></h1>
</div>
<div id="mainbar">
<div class="question" data-questionid="18681039" id="question">
<input type="hidden"
id="18681039-only-allow-inform-moderator-flagging" value="true" />
<script type="text/javascript">
var ados = ados || {};ados.run = ados.run || [];
ados.run.push(function() {
ados_add_placement(22,8277,"adzerk1513585107",4).setZone(43)
; });
</script>
<div class="everyonelovesstackoverflow" id="adzerk1513585107">
</div>
<table>
<tr>
<td class="votecell">
<div class="vote">
<input type="hidden" name="_id_" value="18681039">
<a class="vote-up-off" title="This question shows research effort; it
is useful and clear (click again to undo)">up vote</a>
<span class="vote-count-post ">-3</span>
<a class="vote-down-off" title="This question does not show any
research effort; it is unclear or not useful (click again to
undo)">down vote</a>
<a class="star-off" href="#" title="This is a favorite question (click
again to undo)">favorite</a>
<div class="favoritecount"><b></b></div>
</div>
</td>
<td class="postcell">
<div>
<div class="post-text" itemprop="description">
<p>I'm trying to capture some text from many files in a directory
to a new file (preferably a csv) but I'm stuck on how to do it.
The files are html and the part i want to capture is the part in
bold. There is lots of html and other stuff i want to elimate, so
awk, sed, grep is what i assume would be the correct thing to
use:</p>
<p>.... (lots of html) ....</p>
<p><strong>id=00000</strong> </p>
<p>.... (lots more html) ....</p>
<p><strong>1 = 1/0</strong></p>
<p>.... (more html, then another id=) ....</p>
<p>i'd like the results to look like in a csv:</p>
<p><strong>00000 1 1 0</strong></p>
</div>
<div class="post-taglist">
<a href="/questions/tagged/regex" class="post-tag" title="show
questions tagged 'regex'" rel="tag">regex</a> <a
href="/questions/tagged/bash" class="post-tag" title="show
questions tagged 'bash'" rel="tag">bash</a> <a
href="/questions/tagged/sed" class="post-tag" title="show
questions tagged 'sed'" rel="tag">sed</a> <a
href="/questions/tagged/awk" class="post-tag" title="show
questions tagged 'awk'" rel="tag">awk</a> <a
href="/questions/tagged/grep" class="post-tag" title="show
questions tagged 'grep'" rel="tag">grep</a>
</div>
<table class="fw">
<tr>
<td class="vt">
<div class="post-menu"><a href="/q/18681039/2758332" title="short
permalink to this question" class="short-link"
id="link-post-18681039">share</a><span class="lsep">|</span><a
href="/posts/18681039/edit" class="edit-post" title="people on this site
are fucking assholes">edit</a><span class="lsep">|</span><a href="#"
id="delete-post-18681039"
title="vote to delete this post"
class=""
data-delete-prompt="Delete this post?"
data-undelete-prompt="Undelete this post?">delete</a><span
class="lsep">|</span><a href="#"
class="flag-post-link"
title="flag this post for serious problems or moderator attention"
data-postid="18681039">flag</a></div>
</td>
<td class="post-signature owner">
<div class="user-info ">
<div class="user-action-time">
asked <span title="2013-09-08 06:16:38Z"
class="relativetime">19 mins ago</span>
</div>
<div class="user-gravatar32">
<a href="/users/2758332/charles"><div class=""><img
src="https://www.gravatar.com/avatar/3d5c9f6a2a59bdaaf280fcb4bcee236a?s=32&id=00000"
alt="" width="32" height="32"></div></a>
</div>
<div class="user-details">
<a href="/users/2758332/charles">Charles</a><br>
<span class="reputation-score" title="reputation score"
dir="ltr">1</span>
</div>
</div>
</td>
</tr>
</table>
</div>
</td>
</tr>
<tr>
<td class="votecell">0 0/0</td>
<td>
<div id="comments-18681039" class="comments">
<table>
<tbody>
<tr id="comment-27516323" class="comment" data-commentid="27516323">
<td></td>
<td class="comment-text"><div><span class="comment-copy">Use an
online service or a tool of your choice to test the regex pattern
and then try to use the matched code in a text editor
somehow.</span> – <a href="/users/2695458/daapii"
title="53 reputation" class="comment-user">Daapii</a> <span
class="comment-date" dir="ltr"><span title="2013-09-08 06:21:52Z"
class="relativetime-clean">14 mins ago</span></span></div></td>
</tr>
<tr id="comment-27516332" class="comment" data-commentid="27516332">
<td></td>
<td class="comment-text"><div><span class="comment-copy">i tried
and it didn't work</span> – <a
href="/users/2758332/charles" title="1 reputation"
class="comment-user owner">Charles</a> <span class="comment-date"
dir="ltr"><span title="2013-09-08 06:22:33Z"
class="relativetime-clean">13 mins ago</span></span> <a
class="comment-delete delete-tag" title="delete this
comment"></a></div></td>
</tr>
<tr id="comment-27516365" class="comment" data-commentid="27516365">
<td></td>
<td class="comment-text"><div><span class="comment-copy">Your
question is so vague that it will be hard to help. Post real data
and desired output example. We are here to help out fixing
problems, not to create complete solutions. What have you
tried?</span> – <a href="/users/2341847/jotne"
title="407 reputation" class="comment-user">Jotne</a> <span
class="comment-date" dir="ltr"><span title="2013-09-08 06:24:54Z"
class="relativetime-clean">11 mins ago</span></span><span
class="edited-yes" title="this comment was
edited"></span></div></td>
</tr>
<tr id="comment-27516478" class="comment" data-commentid="27516478">
<td></td>
<td class="comment-text"><div><span class="comment-copy">well
i'm not a computer person, and don't know the terminology.
i guess asking questions here is viewed as negative. sorry i
bothered all of you 'geniuses'. at least i know where not
to ask questions next time. :)</span> – <a
href="/users/2758332/charles" title="1 reputation"
class="comment-user owner">Charles</a> <span class="comment-date"
dir="ltr"><span title="2013-09-08 06:33:56Z"
class="relativetime-clean">2 mins ago</span></span><span
class="edited-yes" title="this comment was edited 2
times"></span> <a class="comment-edit">edit</a> <a
class="comment-delete delete-tag" title="delete this
comment"></a></div><form id="edit-comment-27516478"
class="dno"><div class="dno">well i'm not a computer person,
and don't know the terminology. i guess asking questions here
is viewed as negative. sorry i bothered all of you
'geniuses'. at least i know where not to ask questions
next time. :)</div></form></td>
</tr>
</tbody>
<tfoot>
<tr>
<td></td>
<td class="comment-form"><form id="add-comment-18681039"
data-placeholdertext="Use comments to reply to other users
or notify them of changes. If you are adding new
information, edit your post instead of
commenting."></form></td>
</tr>
</tfoot>
</table>
</div>
<a id="comments-link-18681039" class="comments-link"
data-comments-counts="0" title="Use comments to reply to other users
or notify them of changes. If you are adding new information, edit
your post instead of commenting." href="#">add comment</a>
</td>
</tr> </table>
</div>
<div id="answers" class="no-answers">
<a name="tab-top"></a>
<div id="answers-header">
<div class="subheader answers-subheader">
<h2>
</h2>
<div style="display:none;">
<div id="tabs">
<a
href="/questions/18681039/regex-in-bash-to-search-and-replace?answertab=active#tab-top"
title="Answers with the latest activity first">
active
</a>
<a
href="/questions/18681039/regex-in-bash-to-search-and-replace?answertab=oldest#tab-top"
title="Answers in the order they were provided">
oldest
</a>
<a class="youarehere"
href="/questions/18681039/regex-in-bash-to-search-and-replace?answertab=votes#tab-top"
title="Answers with the highest score first">
votes
</a>
</div>
</div>
</div>
</div>
<h2 class="bottom-notice"
style="padding-top: 8px;
margin-bottom: -8px;">
Know someone who can answer?
Share a link to this <a
href="/q/18681039/2758332">question</a> via <a
href="mailto:?subject=Stack%20Overflow%20Question&body=Regex%20in%20bash%20to%20search%20and%20replace%0Ahttp%3a%2f%2fstackoverflow.com%2fq%2f18681039%2f2758332%3fsem%3d2">email</a>,
<a
href="https://plus.google.com/share?url=http%3a%2f%2fstackoverflow.com%2fq%2f18681039%2f2758332%3fsgp%3d2">Google+</a>,
<a
href="http://twitter.com/share?url=http%3a%2f%2fstackoverflow.com%2fq%2f18681039%2f2758332%3fstw%3d2">Twitter</a>,
or <a
href="http://www.facebook.com/sharer.php?id=00000">Facebook</a>.
</h2>
<a name='new-answer'></a>
<form id="post-form"
action="/questions/18681039/answer/submit"
method="post" class="dno post-form">
<input type="hidden" id="post-id" value="18681039" />
<h2 class="space">Your Answer</h2>
<script type="text/javascript">
StackExchange.ready(function() {
initTagRenderer("".split(" "), "".split(" "));
prepareEditor({
heartbeatType: 'answer',
bindNavPrevention: true,
postfix: "",
onDemand: false,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
});
</script>
<div id="post-editor" class="post-editor">
<div style="position: relative;">
<div class="wmd-container">
<div id="wmd-button-bar" class="wmd-button-bar"></div>
<textarea id="wmd-input" class="wmd-input" name="post-text"
cols="92" rows="15" tabindex="101"
data-min-length=""></textarea>
</div>
</div>
<div class="fl" style="margin-top: 8px; height:24px;"> </div>
<div id="draft-saved" class="draft-saved community-option fl"
style="margin-top: 8px; height:24px; display:none;">draft saved</div>
<div id="draft-discarded" class="draft-discarded community-option fl"
style="margin-top: 8px; height:24px; display:none;">draft
discarded</div>
<div id="wmd-preview" class="wmd-preview"></div>
<div></div>
<div class="edit-block">
<input id="fkey" name="fkey" type="hidden"
value="e72538955cb044ce7db3f60a80378a97">
<input id="author" name="author" type="text">
</div>
</div>
<div style="position: relative;">
</div>
<div
class="form-submit
cbt">
<input id="submit-button" type="submit"
value="Post Your Answer" tabindex="110">
<a href="#" class="discard-answer
dno">discard</a>
</div>
</form>
<div id="show-editor-button" class="form-submit">
<input type="button" value="Answer Your Question"
data-confirm-text="Are you sure you want to answer
your own question?
If you're responding to answers left on your question, enter comments
under each answer.
If you need to add details to your question, use the edit link under your
question."/>
</div>
<script type="text/javascript">
StackExchange.ready(function () {
$("#show-editor-button input").click(function
() {
if (confirm($(this).data('confirm-text'))) {
$("#show-editor-button").hide();
$("#post-form").removeClass("dno");
StackExchange.editor.finallyInit();
}
});
});
</script>
<div>
<h2 class="bottom-notice">
Would you like to have responses to your questions <a
id="inbox-notify-" href="#">sent to you via email</a>?
</h2>
</div>
<script type="text/javascript">
StackExchange.ready(function () {
$("#inbox-notify-").click(function () {
$('html, body').animate({ scrollTop: 0 }, 200);
StackExchange.ready(function () {
genuwine.click();
$("#seTabEmail").click();
});
return false;
});
});
</script>
</div>
</div>
<div id="sidebar" class="show-votes">
<script type="text/javascript">
window.showNewUser = false;
</script>
<div class="module newuser newuser-greeting" id="newuser-box"
style="display:none;">
<h4>Hello World!</h4>
<div>
<p>This is a collaboratively edited question and answer site for
<b>professional and enthusiast programmers</b>. It's 100% free, no
registration required.</p>
<p class="ar">
<a href="/about?mnu=1">about »</a>
<a href="/help?mnu=1">help »</a>
</p>
</div>
</div>
<script type="text/javascript">
if (showNewUser) {
document.getElementById('newuser-box').style.display = '';
}
</script> <div class="module question-stats">
<p class="label-key">tagged</p>
<div class="tagged"><a href="/questions/tagged/regex"
class="post-tag" title="show questions tagged 'regex'"
rel="tag">regex</a> <span
class="item-multiplier"><span
class="item-multiplier-x">×</span> <span
class="item-multiplier-count">66794</span></span><br>
<a href="/questions/tagged/bash" class="post-tag" title="show questions
tagged 'bash'" rel="tag">bash</a> <span class="item-multiplier"><span
class="item-multiplier-x">×</span> <span
class="item-multiplier-count">25594</span></span><br>
<a href="/questions/tagged/sed" class="post-tag" title="show questions
tagged 'sed'" rel="tag">sed</a> <span class="item-multiplier"><span
class="item-multiplier-x">×</span> <span
class="item-multiplier-count">5491</span></span><br>
<a href="/questions/tagged/awk" class="post-tag" title="show questions
tagged 'awk'" rel="tag">awk</a> <span class="item-multiplier"><span
class="item-multiplier-x">×</span> <span
class="item-multiplier-count">5253</span></span><br>
<a href="/questions/tagged/grep" class="post-tag" title="show questions
tagged 'grep'" rel="tag">grep</a> <span class="item-multiplier"><span
class="item-multiplier-x">×</span> <span
class="item-multiplier-count">3586</span></span><br>
</div>
<table id="qinfo">
<tr>
<td>
<p class="label-key">asked</p>
</td>
<td style="padding-left: 10px">
<p class="label-key" title="2013-09-08
06:16:38Z"><b>today</b></p>
</td>
</tr>
<tr>
<td>
<p class="label-key">viewed</p>
</td>
<td style="padding-left: 10px">
<p class="label-key">
<b>10 times</b>
</p>
</td>
</tr>
</table>
</div>
<div class="module community-bulletin" data-tracker="cb=1">
<h4>Community Bulletin</h4>
<div class="related">
<div class="spacer">
<div class="bulletin-item-type"><a
href="http://blog.stackoverflow.com" class="event-date"
target="_blank">blog</a></div>
<div class="bulletin-item-content">
<a
href="http://blog.stackoverflow.com/2013/09/community-management-by-popular-demand-kevin-chang-joins-the-team/"
class="question-hyperlink" target="_blank">Community
Management by Popular Demand: Kevin Chang Joins The
Team</a>
</div>
<br class="cbt" />
</div>
</div>
</div>
<script type="text/javascript">
var ados = ados || {};ados.run = ados.run || [];
ados.run.push(function() {
ados_add_placement(22,8277,"adzerk883613121",17).setZone(45)
; });
</script>
<div class="everyonelovesstackoverflow" id="adzerk883613121">
</div>
<div id="hireme">
<script type="text/javascript">
window.careers_adurl="http://careers.stackoverflow.com/gethired/js",window.careers_cssurl="//cdn-careers.sstatic.net/careers/gethired/sidebar.min.css?v=7ae83073b37e",window.careers_leaderboardcssurl="//cdn-careers.sstatic.net/careers/gethired/ninja.min.css?v=78ccbfa5c59f",id=00000",window.careers_adselector="div.hireme,
div#hireme",StackExchange.ready(function(){$.ajax({url:"//cdn-careers.sstatic.net/careers/gethired/loader.min.js?v=607ab2dda910",dataType:"script",cache:!0})})
</script>
</div>
<div class="module sidebar-related">
<h4 id="h-related">Related</h4>
<div class="related" data-tracker="rq=1">
<div class="spacer">
<a href="/q/5327409" title="Vote score (upvotes - downvotes)">
<div class="answer-votes default">1</div>
</a>
<a href="/questions/5327409/parsing-out-numbers-from-a-string-bash-script"
class="question-hyperlink">Parsing out numbers from a string - BASH
script</a>
</div>
<div class="spacer">
<a href="/q/6661839" title="Vote score (upvotes - downvotes)">
<div class="0 0/0</div>
</a>
<a
href="/questions/6661839/parse-clamav-logs-in-bash-script-using-regex-to-insert-in-mysql"
class="question-hyperlink">Parse ClamAV logs in Bash script using Regex to
insert in MySQL</a>
</div>
<div class="spacer">
<a href="/q/9829169" title="Vote score (upvotes - downvotes)">
<div class="answer-votes answered-accepted default">0</div>
</a>
<a href="/questions/9829169/sed-replace-entire-line-with-replacement"
class="question-hyperlink">Sed replace entire line with replacement</a>
No comments:
Post a Comment